Merge tag 'for-linus-5.16-2' of git://github.com/cminyard/linux-ipmi

Pull IPMI fixes from Corey Minyard:
 "Some changes that went in 5.16 had issues. When working on the design
  a piece was redesigned and things got missed. And the message type was
  not being initialized when it was allocated, resulting in crashes.

  In addition, the IPMI driver has had a shutdown issue where it could
  still have an item in a system workqueue after it had been shutdown.
  Move to a private workqueue to avoid that problem"

* tag 'for-linus-5.16-2' of git://github.com/cminyard/linux-ipmi:
  ipmi:ipmb: Fix unknown command response
  ipmi: fix IPMI_SMI_MSG_TYPE_IPMB_DIRECT response length checking
  ipmi: fix oob access due to uninit smi_msg type
  ipmi: msghandler: Make symbol 'remove_work_wq' static
  ipmi: Move remove_work to dedicated workqueue
diff --git a/.mailmap b/.mailmap
index 14314e3..6277bb2 100644
--- a/.mailmap
+++ b/.mailmap
@@ -71,6 +71,9 @@
 Chao Yu <chao@kernel.org> <yuchao0@huawei.com>
 Chris Chiu <chris.chiu@canonical.com> <chiu@endlessm.com>
 Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
+Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
+Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com>
+Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
 Christophe Ricard <christophe.ricard@gmail.com>
 Christoph Hellwig <hch@lst.de>
 Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index f627e70..b268e3e 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -512,3 +512,19 @@
 Contact:	"Daeho Jeong" <daehojeong@google.com>
 Description:	You can	control the multiplier value of	bdi device readahead window size
 		between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
+
+What:		/sys/fs/f2fs/<disk>/max_fragment_chunk
+Date:		August 2021
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
+		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+		in the length of 1..<max_fragment_hole> by turns. This value can be set
+		between 1..512 and the default value is 4.
+
+What:		/sys/fs/f2fs/<disk>/max_fragment_hole
+Date:		August 2021
+Contact:	"Daeho Jeong" <daehojeong@google.com>
+Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
+		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+		in the length of 1..<max_fragment_hole> by turns. This value can be set
+		between 1..512 and the default value is 4.
diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
index 6721a80..475eb0e 100644
--- a/Documentation/admin-guide/laptops/thinkpad-acpi.rst
+++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
@@ -1520,15 +1520,15 @@
 Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
 and set.
 
-- 1 = Home mode
-- 2 = Web-browser mode
-- 3 = Web-conference mode
-- 4 = Function mode
-- 5 = Layflat mode
+- 0 = Home mode
+- 1 = Web-browser mode
+- 2 = Web-conference mode
+- 3 = Function mode
+- 4 = Layflat mode
 
 For more details about which buttons will appear depending on the mode, please
 review the laptop's user guide:
-http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
+https://download.lenovo.com/ibmdl/pub/pc/pccbbs/mobiles_pdf/x1carbon_2_ug_en.pdf
 
 Battery charge control
 ----------------------
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 4261620..0e486f4 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1099,7 +1099,7 @@
 ===============
 
 Enables/disables task delay accounting (see
-:doc:`accounting/delay-accounting.rst`). Enabling this feature incurs
+Documentation/accounting/delay-accounting.rst. Enabling this feature incurs
 a small amount of overhead in the scheduler but is useful for debugging
 and performance tuning. It is required by some tools such as iotop.
 
diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst
index 8323c79..9485a5a 100644
--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arm/marvell.rst
@@ -104,6 +104,8 @@
 
                 Not supported by the Linux kernel.
 
+  Homepage:
+        https://web.archive.org/web/20110924171043/http://www.marvell.com/embedded-processors/discovery-innovation/
   Core:
 	Feroceon 88fr571-vd ARMv5 compatible
 
@@ -120,6 +122,7 @@
         - 88F6707
         - 88F6W11
 
+    - Product infos:   https://web.archive.org/web/20141002083258/http://www.marvell.com/embedded-processors/armada-370/
     - Product Brief:   https://web.archive.org/web/20121115063038/http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
     - Hardware Spec:   https://web.archive.org/web/20140617183747/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
     - Functional Spec: https://web.archive.org/web/20140617183701/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
@@ -127,9 +130,29 @@
   Core:
 	Sheeva ARMv7 compatible PJ4B
 
+  Armada XP Flavors:
+        - MV78230
+        - MV78260
+        - MV78460
+
+    NOTE:
+	not to be confused with the non-SMP 78xx0 SoCs
+
+    - Product infos:   https://web.archive.org/web/20150101215721/http://www.marvell.com/embedded-processors/armada-xp/
+    - Product Brief:   https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
+    - Functional Spec: https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
+    - Hardware Specs:
+        - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
+        - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
+        - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
+
+  Core:
+	Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
+
   Armada 375 Flavors:
 	- 88F6720
 
+    - Product infos: https://web.archive.org/web/20140108032402/http://www.marvell.com/embedded-processors/armada-375/
     - Product Brief: https://web.archive.org/web/20131216023516/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
 
   Core:
@@ -162,29 +185,6 @@
   Core:
 	ARM Cortex-A9
 
-  Armada XP Flavors:
-        - MV78230
-        - MV78260
-        - MV78460
-
-    NOTE:
-	not to be confused with the non-SMP 78xx0 SoCs
-
-    Product Brief:
-	https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
-
-    Functional Spec:
-	https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
-
-    - Hardware Specs:
-
-        - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
-        - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
-        - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
-
-  Core:
-	Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
-
   Linux kernel mach directory:
 	arch/arm/mach-mvebu
   Linux kernel plat directory:
@@ -436,7 +436,7 @@
   - Flavors:
 	- 88DE3010, Armada 1000 (no Linux support)
 		- Core:		Marvell PJ1 (ARMv5TE), Dual-core
-		- Product Brief:	http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
+		- Product Brief:	https://web.archive.org/web/20131103162620/http://www.marvell.com/digital-entertainment/assets/armada_1000_pb.pdf
 	- 88DE3005, Armada 1500 Mini
 		- Design name:	BG2CD
 		- Core:		ARM Cortex-A9, PL310 L2CC
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 37f273a..610450f 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -15,7 +15,7 @@
 libbpf
 ======
 
-Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs.
+Documentation/bpf/libbpf/index.rst is a userspace library for loading and interacting with bpf programs.
 
 BPF Type Format (BTF)
 =====================
diff --git a/Documentation/devicetree/bindings/arm/sti.yaml b/Documentation/devicetree/bindings/arm/sti.yaml
index b1f28d1..a41cd87 100644
--- a/Documentation/devicetree/bindings/arm/sti.yaml
+++ b/Documentation/devicetree/bindings/arm/sti.yaml
@@ -7,7 +7,7 @@
 title: ST STi Platforms Device Tree Bindings
 
 maintainers:
-  - Patrice Chotard <patrice.chotard@st.com>
+  - Patrice Chotard <patrice.chotard@foss.st.com>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
index 8e711bd..ecb28e9 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
@@ -7,8 +7,8 @@
 title: STMicroelectronics STM32 ML-AHB interconnect bindings
 
 maintainers:
-  - Fabien Dessenne <fabien.dessenne@st.com>
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 description: |
   These bindings describe the STM32 SoCs ML-AHB interconnect bus which connects
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
index 149afb5..6f846d6 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
@@ -7,8 +7,8 @@
 title: STMicroelectronics STM32 Platforms System Controller bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
-  - Christophe Roullier <christophe.roullier@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
+  - Christophe Roullier <christophe.roullier@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
index 9ac7da0..bcaf7be 100644
--- a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 Platforms Device Tree Bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
 
 properties:
   $nodename:
diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
index 6e80dbc..aa1df03 100644
--- a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
+++ b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
@@ -104,7 +104,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     cgu: clock-controller@10000000 {
       compatible = "ingenic,jz4770-cgu", "simple-mfd";
       reg = <0x10000000 0x100>;
diff --git a/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml b/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml
index e17143c..252085a 100644
--- a/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml
+++ b/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml
@@ -42,6 +42,9 @@
   "#clock-cells":
     const: 1
 
+  "#reset-cells":
+    const: 1
+
 required:
   - compatible
   - reg
@@ -57,4 +60,5 @@
       reg = <0x10000000 0x1000>;
       clocks = <&hfclk>, <&rtcclk>;
       #clock-cells = <1>;
+      #reset-cells = <1>;
     };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si5351.txt b/Documentation/devicetree/bindings/clock/silabs,si5351.txt
index 8fe6f80..bfda6af 100644
--- a/Documentation/devicetree/bindings/clock/silabs,si5351.txt
+++ b/Documentation/devicetree/bindings/clock/silabs,si5351.txt
@@ -2,7 +2,7 @@
 
 Reference
 [1] Si5351A/B/C Data Sheet
-    https://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf
+    https://www.skyworksinc.com/-/media/Skyworks/SL/documents/public/data-sheets/Si5351-B.pdf
 
 The Si5351a/b/c are programmable i2c clock generators with up to 8 output
 clocks. Si5351a also has a reduced pin-count package (MSOP10) where only
diff --git a/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml b/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml
index c3930ed..9a0cc73 100644
--- a/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml
@@ -23,6 +23,7 @@
           - socionext,uniphier-ld11-clock
           - socionext,uniphier-ld20-clock
           - socionext,uniphier-pxs3-clock
+          - socionext,uniphier-nx1-clock
       - description: Media I/O (MIO) clock, SD clock
         enum:
           - socionext,uniphier-ld4-mio-clock
@@ -33,6 +34,7 @@
           - socionext,uniphier-ld11-mio-clock
           - socionext,uniphier-ld20-sd-clock
           - socionext,uniphier-pxs3-sd-clock
+          - socionext,uniphier-nx1-sd-clock
       - description: Peripheral clock
         enum:
           - socionext,uniphier-ld4-peri-clock
@@ -43,6 +45,10 @@
           - socionext,uniphier-ld11-peri-clock
           - socionext,uniphier-ld20-peri-clock
           - socionext,uniphier-pxs3-peri-clock
+          - socionext,uniphier-nx1-peri-clock
+      - description: SoC-glue clock
+        enum:
+          - socionext,uniphier-pro4-sg-clock
 
   "#clock-cells":
     const: 1
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
index 8b1ecb2..a0ae486 100644
--- a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
+++ b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
@@ -7,7 +7,7 @@
 title: Reset Clock Controller Binding
 
 maintainers:
-  - Gabriel Fernandez <gabriel.fernandez@st.com>
+  - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
 
 description: |
   The RCC IP is both a reset and a clock controller.
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
index cee624c..b72e485 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 CRC bindings
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
index a4574552..ed23bf9 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 CRYP bindings
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
index 6dd658f..10ba947 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 HASH bindings
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
index 304a136..1faae3e 100644
--- a/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/lvds-codec.yaml
@@ -49,11 +49,26 @@
 
     properties:
       port@0:
-        $ref: /schemas/graph.yaml#/properties/port
+        $ref: /schemas/graph.yaml#/$defs/port-base
         description: |
           For LVDS encoders, port 0 is the parallel input
           For LVDS decoders, port 0 is the LVDS input
 
+        properties:
+          endpoint:
+            $ref: /schemas/media/video-interfaces.yaml#
+            unevaluatedProperties: false
+
+            properties:
+              data-mapping:
+                enum:
+                  - jeida-18
+                  - jeida-24
+                  - vesa-24
+                description: |
+                  The color signals mapping order. See details in
+                  Documentation/devicetree/bindings/display/panel/lvds.yaml
+
       port@1:
         $ref: /schemas/graph.yaml#/properties/port
         description: |
@@ -71,6 +86,22 @@
 
   power-supply: true
 
+if:
+  not:
+    properties:
+      compatible:
+        contains:
+          const: lvds-decoder
+then:
+  properties:
+    ports:
+      properties:
+        port@0:
+          properties:
+            endpoint:
+              properties:
+                data-mapping: false
+
 required:
   - compatible
   - ports
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
index fce82b6..cdaf7a7 100644
--- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
@@ -40,6 +40,9 @@
   vdd33-supply:
     description: Regulator for 3.3V digital core power.
 
+  aux-bus:
+    $ref: /schemas/display/dp-aux-bus.yaml#
+
   ports:
     $ref: /schemas/graph.yaml#/properties/ports
 
@@ -98,7 +101,21 @@
                     reg = <1>;
                     ps8640_out: endpoint {
                         remote-endpoint = <&panel_in>;
-                   };
+                    };
+                };
+            };
+
+            aux-bus {
+                panel {
+                    compatible = "boe,nv133fhm-n62";
+                    power-supply = <&pp3300_dx_edp>;
+                    backlight = <&backlight>;
+
+                    port {
+                        panel_in: endpoint {
+                            remote-endpoint = <&ps8640_out>;
+                        };
+                    };
                 };
             };
         };
diff --git a/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
index 3c3e51a..11fd68a 100644
--- a/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
@@ -7,7 +7,7 @@
 title: Synopsys DesignWare MIPI DSI host controller
 
 maintainers:
-  - Philippe CORNU <philippe.cornu@st.com>
+  - Philippe CORNU <philippe.cornu@foss.st.com>
 
 description: |
   This document defines device tree properties for the Synopsys DesignWare MIPI
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
deleted file mode 100644
index 583c5e9..0000000
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Toshiba TC358767 eDP bridge bindings
-
-Required properties:
- - compatible: "toshiba,tc358767"
- - reg: i2c address of the bridge, 0x68 or 0x0f, depending on bootstrap pins
- - clock-names: should be "ref"
- - clocks: OF device-tree clock specification for refclk input. The reference
-   clock rate must be 13 MHz, 19.2 MHz, 26 MHz, or 38.4 MHz.
-
-Optional properties:
- - shutdown-gpios: OF device-tree gpio specification for SD pin
-                   (active high shutdown input)
- - reset-gpios: OF device-tree gpio specification for RSTX pin
-                (active low system reset)
- - toshiba,hpd-pin: TC358767 GPIO pin number to which HPD is connected to (0 or 1)
- - ports: the ports node can contain video interface port nodes to connect
-   to a DPI/DSI source and to an eDP/DP sink according to [1][2]:
-    - port@0: DSI input port
-    - port@1: DPI input port
-    - port@2: eDP/DP output port
-
-[1]: Documentation/devicetree/bindings/graph.txt
-[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-	edp-bridge@68 {
-		compatible = "toshiba,tc358767";
-		reg = <0x68>;
-		shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
-		reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
-		clock-names = "ref";
-		clocks = <&edp_refclk>;
-
-		ports {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			port@1 {
-				reg = <1>;
-
-				bridge_in: endpoint {
-					remote-endpoint = <&dpi_out>;
-				};
-			};
-
-			port@2 {
-				reg = <2>;
-
-				bridge_out: endpoint {
-					remote-endpoint = <&panel_in>;
-				};
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
new file mode 100644
index 0000000..f1541cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358767.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba TC358767 eDP bridge bindings
+
+maintainers:
+  - Andrey Gusakov <andrey.gusakov@cogentembedded.com>
+
+description: The TC358767 is bridge device which converts DSI/DPI to eDP/DP
+
+properties:
+  compatible:
+    const: toshiba,tc358767
+
+  reg:
+    enum:
+      - 0x68
+      - 0x0f
+    description: |
+        i2c address of the bridge, 0x68 or 0x0f, depending on bootstrap pins
+
+  clock-names:
+    const: "ref"
+
+  clocks:
+    maxItems: 1
+    description: |
+        OF device-tree clock specification for refclk input. The reference.
+        clock rate must be 13 MHz, 19.2 MHz, 26 MHz, or 38.4 MHz.
+
+  shutdown-gpios:
+    maxItems: 1
+    description: |
+        OF device-tree gpio specification for SD pin(active high shutdown input)
+
+  reset-gpios:
+    maxItems: 1
+    description: |
+        OF device-tree gpio specification for RSTX pin(active low system reset)
+
+  toshiba,hpd-pin:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum:
+      - 0
+      - 1
+    description: TC358767 GPIO pin number to which HPD is connected to (0 or 1)
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: |
+            DSI input port. The remote endpoint phandle should be a
+            reference to a valid DSI output endpoint node
+
+      port@1:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: |
+            DPI input port. The remote endpoint phandle should be a
+            reference to a valid DPI output endpoint node
+
+      port@2:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: |
+            eDP/DP output port. The remote endpoint phandle should be a
+            reference to a valid eDP panel input endpoint node. This port is
+            optional, treated as DP panel if not defined
+
+    oneOf:
+      - required:
+          - port@0
+      - required:
+          - port@1
+
+
+required:
+  - compatible
+  - reg
+  - clock-names
+  - clocks
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    /* DPI input and eDP output */
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        edp-bridge@68 {
+            compatible = "toshiba,tc358767";
+            reg = <0x68>;
+            shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
+            clock-names = "ref";
+            clocks = <&edp_refclk>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@1 {
+                    reg = <1>;
+
+                    bridge_in_0: endpoint {
+                        remote-endpoint = <&dpi_out>;
+                    };
+                };
+
+                port@2 {
+                    reg = <2>;
+
+                    bridge_out: endpoint {
+                        remote-endpoint = <&panel_in>;
+                    };
+                };
+            };
+        };
+    };
+  - |
+    /* DPI input and DP output */
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        edp-bridge@68 {
+            compatible = "toshiba,tc358767";
+            reg = <0x68>;
+            shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
+            clock-names = "ref";
+            clocks = <&edp_refclk>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@1 {
+                    reg = <1>;
+
+                    bridge_in_1: endpoint {
+                        remote-endpoint = <&dpi_out>;
+                    };
+                };
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
index e679f48..3f93def 100644
--- a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
+++ b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
@@ -45,7 +45,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     ipu@13080000 {
       compatible = "ingenic,jz4770-ipu", "ingenic,jz4760-ipu";
       reg = <0x13080000 0x800>;
diff --git a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
index 50d2b0a..0049010 100644
--- a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
+++ b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
@@ -88,7 +88,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     lcd-controller@13050000 {
       compatible = "ingenic,jz4740-lcd";
       reg = <0x13050000 0x1000>;
@@ -107,7 +107,7 @@
     };
 
   - |
-    #include <dt-bindings/clock/jz4725b-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
     lcd-controller@13050000 {
       compatible = "ingenic,jz4725b-lcd";
       reg = <0x13050000 0x1000>;
diff --git a/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml b/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml
index 4b6dda6..17cbd0a 100644
--- a/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml
+++ b/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml
@@ -7,7 +7,7 @@
 title: Orise Tech OTM8009A 3.97" 480x800 TFT LCD panel (MIPI-DSI video mode)
 
 maintainers:
-  - Philippe CORNU <philippe.cornu@st.com>
+  - Philippe CORNU <philippe.cornu@foss.st.com>
 
 description: |
              The Orise Tech OTM8009A is a 3.97" 480x800 TFT LCD panel connected using
diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
index 335776c..f3c9395d2 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@@ -166,6 +166,8 @@
       - innolux,at070tn92
         # Innolux G070Y2-L01 7" WVGA (800x480) TFT LCD panel
       - innolux,g070y2-l01
+        # Innolux G070Y2-T02 7" WVGA (800x480) TFT LCD TTL panel
+      - innolux,g070y2-t02
         # Innolux Corporation 10.1" G101ICE-L01 WXGA (1280x800) LVDS panel
       - innolux,g101ice-l01
         # Innolux Corporation 12.1" WXGA (1280x800) TFT LCD panel
@@ -309,6 +311,8 @@
       - urt,umsh-8596md-11t
       - urt,umsh-8596md-19t
       - urt,umsh-8596md-20t
+        # Vivax TPC-9150 tablet 9.0" WSVGA TFT LCD panel
+      - vivax,tpc9150-panel
         # VXT 800x480 color TFT LCD panel
       - vxt,vl050-8048nt-c01
         # Winstar Display Corporation 3.5" QVGA (320x240) TFT LCD panel
@@ -317,6 +321,7 @@
       - yes-optoelectronics,ytc700tlag-05-201c
 
   backlight: true
+  ddc-i2c-bus: true
   enable-gpios: true
   port: true
   power-supply: true
diff --git a/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml b/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml
index 3947779..e8ce231 100644
--- a/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml
+++ b/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml
@@ -7,7 +7,7 @@
 title: Raydium Semiconductor Corporation RM68200 5.5" 720p MIPI-DSI TFT LCD panel
 
 maintainers:
-  - Philippe CORNU <philippe.cornu@st.com>
+  - Philippe CORNU <philippe.cornu@foss.st.com>
 
 description: |
   The Raydium Semiconductor Corporation RM68200 is a 5.5" 720x1280 TFT LCD
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ls060t1sx01.yaml b/Documentation/devicetree/bindings/display/panel/sharp,ls060t1sx01.yaml
new file mode 100644
index 0000000..271c097
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sharp,ls060t1sx01.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/sharp,ls060t1sx01.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sharp Microelectronics 6.0" FullHD TFT LCD panel
+
+maintainers:
+  - Dmitry Baryskov <dmitry.baryshkov@linaro.org>
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+    const: sharp,ls060t1sx01
+
+  reg: true
+  backlight: true
+  reset-gpios: true
+  port: true
+
+  avdd-supply:
+    description: handle of the regulator that provides the positive supply voltage
+  avee-supply:
+    description: handle of the regulator that provides the negative supply voltage
+  vddi-supply:
+    description: handle of the regulator that provides the I/O supply voltage
+  vddh-supply:
+    description: handle of the regulator that provides the analog supply voltage
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    dsi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+            compatible = "sharp,ls060t1sx01";
+            reg = <0>;
+            avdd-supply = <&pm8941_l22>;
+            backlight = <&backlight>;
+            reset-gpios = <&pm8916_gpios 25 GPIO_ACTIVE_LOW>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml b/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml
index ed310bb..ce1ef93 100644
--- a/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/st,stm32-dsi.yaml
@@ -7,8 +7,8 @@
 title: STMicroelectronics STM32 DSI host controller
 
 maintainers:
-  - Philippe Cornu <philippe.cornu@st.com>
-  - Yannick Fertre <yannick.fertre@st.com>
+  - Philippe Cornu <philippe.cornu@foss.st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
 
 description:
   The STMicroelectronics STM32 DSI controller uses the Synopsys DesignWare MIPI-DSI host controller.
diff --git a/Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml b/Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml
index 4ae3d75..01e2da2 100644
--- a/Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml
+++ b/Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml
@@ -7,8 +7,8 @@
 title: STMicroelectronics STM32 lcd-tft display controller
 
 maintainers:
-  - Philippe Cornu <philippe.cornu@st.com>
-  - Yannick Fertre <yannick.fertre@st.com>
+  - Philippe Cornu <philippe.cornu@foss.st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
index ac4d594..dc059d6 100644
--- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
+++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
@@ -68,7 +68,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     dma: dma-controller@13420000 {
       compatible = "ingenic,jz4780-dma";
       reg = <0x13420000 0x400>, <0x13421000 0x40>;
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
index 4bf676fd..55faab6 100644
--- a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
@@ -50,7 +50,7 @@
 
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 allOf:
   - $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
index c8d2b51..f751796 100644
--- a/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
+++ b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 DMA MUX (DMA request router) bindings
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 allOf:
   - $ref: "dma-router.yaml#"
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
index c30be84..87b4afd 100644
--- a/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
+++ b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
@@ -50,7 +50,7 @@
        if no HW ack signal is used by the MDMA client
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 allOf:
   - $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/gpio/gpio-xlp.txt b/Documentation/devicetree/bindings/gpio/gpio-xlp.txt
deleted file mode 100644
index 47fc649..0000000
--- a/Documentation/devicetree/bindings/gpio/gpio-xlp.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Netlogic XLP Family GPIO
-========================
-
-This GPIO driver is used for following Netlogic XLP SoCs:
-	XLP832, XLP316, XLP208, XLP980, XLP532
-This GPIO driver is also compatible with GPIO controller found on
-Broadcom Vulcan ARM64.
-
-Required properties:
--------------------
-
-- compatible: Should be one of the following:
-  - "netlogic,xlp832-gpio": For Netlogic XLP832
-  - "netlogic,xlp316-gpio": For Netlogic XLP316
-  - "netlogic,xlp208-gpio": For Netlogic XLP208
-  - "netlogic,xlp980-gpio": For Netlogic XLP980
-  - "netlogic,xlp532-gpio": For Netlogic XLP532
-  - "brcm,vulcan-gpio": For Broadcom Vulcan ARM64
-- reg: Physical base address and length of the controller's registers.
-- #gpio-cells: Should be two. The first cell is the pin number and the second
-  cell is used to specify optional parameters (currently unused).
-- gpio-controller: Marks the device node as a GPIO controller.
-- nr-gpios: Number of GPIO pins supported by the controller.
-- interrupt-cells: Should be two. The first cell is the GPIO Number. The
-  second cell is used to specify flags. The following subset of flags is
-  supported:
-  - trigger type:
-	1 = low to high edge triggered.
-	2 = high to low edge triggered.
-	4 = active high level-sensitive.
-	8 = active low level-sensitive.
-- interrupts: Interrupt number for this device.
-- interrupt-controller: Identifies the node as an interrupt controller.
-
-Example:
-
-	gpio: xlp_gpio@34000 {
-		compatible = "netlogic,xlp316-gpio";
-		reg = <0 0x34100 0x1000
-		       0 0x35100 0x1000>;
-		#gpio-cells = <2>;
-		gpio-controller;
-		nr-gpios = <57>;
-
-		#interrupt-cells = <2>;
-		interrupt-parent = <&pic>;
-		interrupts = <39>;
-		interrupt-controller;
-	};
diff --git a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
index 47cf9c8..b18c616 100644
--- a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
+++ b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
@@ -7,8 +7,7 @@
 title: STMicroelectronics STM32 Hardware Spinlock bindings
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
-  - Fabien Dessenne <fabien.dessenne@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
 
 properties:
   "#hwlock-cells":
diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
index 29b9447..fe0c89e 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
@@ -17,9 +17,10 @@
     oneOf:
       - enum:
           - fsl,imx7ulp-lpi2c
-          - fsl,imx8qm-lpi2c
       - items:
-          - const: fsl,imx8qxp-lpi2c
+          - enum:
+              - fsl,imx8qxp-lpi2c
+              - fsl,imx8qm-lpi2c
           - const: fsl,imx7ulp-lpi2c
 
   reg:
diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml
index 3592d49..c167958 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml
@@ -57,7 +57,9 @@
     const: ipg
 
   clock-frequency:
-    enum: [ 100000, 400000 ]
+    minimum: 1
+    default: 100000
+    maximum: 400000
 
   dmas:
     items:
diff --git a/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt b/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt
deleted file mode 100644
index f818ef5..0000000
--- a/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-Device tree configuration for the I2C controller on the XLP9xx/5xx SoC
-
-Required properties:
-- compatible      : should be "netlogic,xlp980-i2c"
-- reg             : bus address start and address range size of device
-- interrupts      : interrupt number
-
-Optional properties:
-- clock-frequency : frequency of bus clock in Hz
-                    Defaults to 100 KHz when the property is not specified
-
-Example:
-
-i2c0: i2c@113100 {
-	compatible = "netlogic,xlp980-i2c";
-	#address-cells = <1>;
-	#size-cells = <0>;
-	reg = <0 0x113100 0x100>;
-	clock-frequency = <400000>;
-	interrupts = <30>;
-	interrupt-parent = <&pic>;
-};
diff --git a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml
index e1e65eb..febde6cc 100644
--- a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml
@@ -60,7 +60,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/dma/jz4780-dma.h>
     #include <dt-bindings/interrupt-controller/irq.h>
     i2c@10054000 {
diff --git a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
index d747f49..c07289a 100644
--- a/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
@@ -7,7 +7,7 @@
 title: I2C controller embedded in STMicroelectronics STM32 I2C platform
 
 maintainers:
-  - Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+  - Pierre-Yves MORDRET <pierre-yves.mordret@foss.st.com>
 
 allOf:
   - $ref: /schemas/i2c/i2c-controller.yaml#
diff --git a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml
index 3eb7aa8..698beb8 100644
--- a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml
@@ -74,7 +74,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     #include <dt-bindings/iio/adc/ingenic,adc.h>
 
     adc@10070000 {
diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
index a390343d..2287697 100644
--- a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
@@ -7,7 +7,7 @@
 title: Device-Tree bindings for sigma delta modulator
 
 maintainers:
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
index ec0450d..4d60745 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
@@ -19,7 +19,7 @@
   Each STM32 ADC block can have up to 3 ADC instances.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
index 733351d..7c260f2 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
@@ -7,8 +7,8 @@
 title: STMicroelectronics STM32 DFSDM ADC device driver
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description: |
   STM32 DFSDM ADC is a sigma delta analog-to-digital converter dedicated to
diff --git a/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml
index 393f700..6adeda4 100644
--- a/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml
+++ b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml
@@ -15,7 +15,7 @@
   current.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/input/cypress-sf.yaml b/Documentation/devicetree/bindings/input/cypress-sf.yaml
new file mode 100644
index 0000000..c0b0514
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/cypress-sf.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/cypress-sf.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cypress StreetFighter touchkey controller
+
+maintainers:
+  - Yassine Oudjana <y.oudjana@protonmail.com>
+
+allOf:
+  - $ref: input.yaml#
+
+properties:
+  compatible:
+    const: cypress,sf3155
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  avdd-supply:
+    description: Regulator for AVDD analog voltage
+
+  vdd-supply:
+    description: Regulator for VDD digital voltage
+
+  linux,keycodes:
+    minItems: 1
+    maxItems: 8
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - avdd-supply
+  - vdd-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/input/input.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        touchkey@28 {
+                compatible = "cypress,sf3155";
+                reg = <0x28>;
+                interrupt-parent = <&msmgpio>;
+                interrupts = <77 IRQ_TYPE_EDGE_FALLING>;
+                avdd-supply = <&vreg_l6a_1p8>;
+                vdd-supply = <&vdd_3v2_tp>;
+                linux,keycodes = <KEY_BACK KEY_MENU>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml
index fa0f37a..d5d6bce 100644
--- a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml
+++ b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml
@@ -19,6 +19,7 @@
       - microchip,cap1106
       - microchip,cap1126
       - microchip,cap1188
+      - microchip,cap1206
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
index 6d3e68e..d19c881 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
@@ -7,8 +7,8 @@
 title: STM32 External Interrupt Controller Device Tree Bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
-  - Ludovic Barre <ludovic.barre@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
+  - Ludovic Barre <ludovic.barre@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
index b15da9b..8eb4bf5 100644
--- a/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
+++ b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
@@ -13,8 +13,8 @@
   channels (N) can be read from a dedicated register.
 
 maintainers:
-  - Fabien Dessenne <fabien.dessenne@st.com>
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml b/Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml
index fa54c56..e287468 100644
--- a/Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml
+++ b/Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml
@@ -30,7 +30,6 @@
 
   power-domain-names:
     minItems: 2
-    maxItems: 3
     items:
       - const: venus
       - const: vcodec0
diff --git a/Documentation/devicetree/bindings/media/st,stm32-cec.yaml b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
index d75019c..77144cc 100644
--- a/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
+++ b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
@@ -7,8 +7,7 @@
 title: STMicroelectronics STM32 CEC bindings
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
-  - Yannick Fertre <yannick.fertre@st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
index 41e1d0c..9c1262a 100644
--- a/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
+++ b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 Digital Camera Memory Interface (DCMI) binding
 
 maintainers:
-  - Hugues Fruchet <hugues.fruchet@st.com>
+  - Hugues Fruchet <hugues.fruchet@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml
index fe0ce19..24f9e19 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml
@@ -84,7 +84,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/gpio/gpio.h>
     nemc: memory-controller@13410000 {
       compatible = "ingenic,jz4780-nemc";
diff --git a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
index cba7420..6b516d3 100644
--- a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
@@ -19,7 +19,7 @@
   Select. The FMC2 performs only one access at a time to an external device.
 
 maintainers:
-  - Christophe Kerello <christophe.kerello@st.com>
+  - Christophe Kerello <christophe.kerello@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
index 8bcea8d..ec7f019 100644
--- a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
@@ -17,7 +17,7 @@
      - simple counter from IN1 input signal.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
index dace353..10b330d 100644
--- a/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
@@ -17,8 +17,7 @@
       programmable prescaler.
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
index 19e9afb..b2a4e4a 100644
--- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
@@ -12,7 +12,7 @@
                through VDD) and resistive touchscreen controller.
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
index 305123e7..426658a 100644
--- a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
@@ -9,7 +9,7 @@
 description: STMicroelectronics STPMIC1 Power Management IC
 
 maintainers:
-  - pascal Paillet <p.paillet@st.com>
+  - pascal Paillet <p.paillet@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/mips/ingenic/ingenic,cpu.yaml b/Documentation/devicetree/bindings/mips/ingenic/ingenic,cpu.yaml
index 6df1a94..b7e7fa71 100644
--- a/Documentation/devicetree/bindings/mips/ingenic/ingenic,cpu.yaml
+++ b/Documentation/devicetree/bindings/mips/ingenic/ingenic,cpu.yaml
@@ -44,7 +44,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 
     cpus {
         #address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
index 546480f..01d5c6d 100644
--- a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
+++ b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
@@ -61,7 +61,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/dma/jz4780-dma.h>
     mmc0: mmc@13450000 {
       compatible = "ingenic,jz4780-mmc";
diff --git a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
index 89aa3ce..9de8ef6 100644
--- a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
@@ -55,7 +55,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     memory-controller@13410000 {
       compatible = "ingenic,jz4780-nemc";
       reg = <0x13410000 0x10000>;
diff --git a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
index 29c5ef2..eab8ea3 100644
--- a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics Flexible Memory Controller 2 (FMC2) Bindings
 
 maintainers:
-  - Christophe Kerello <christophe.kerello@st.com>
+  - Christophe Kerello <christophe.kerello@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/ingenic,mac.yaml b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
index d08a881..8e52b2e 100644
--- a/Documentation/devicetree/bindings/net/ingenic,mac.yaml
+++ b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
@@ -58,7 +58,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/x1000-cgu.h>
+    #include <dt-bindings/clock/ingenic,x1000-cgu.h>
 
     mac: ethernet@134b0000 {
         compatible = "ingenic,x1000-mac";
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 282d774..7ae70dc 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -7,7 +7,7 @@
 title: Synopsys DesignWare MAC Device Tree Bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
   - Giuseppe Cavallaro <peppe.cavallaro@st.com>
   - Jose Abreu <joabreu@synopsys.com>
 
diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
index d3f05d5..577f4e2 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
@@ -8,8 +8,8 @@
 title: STMicroelectronics STM32 / MCU DWMAC glue layer controller
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
-  - Christophe Roullier <christophe.roullier@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
+  - Christophe Roullier <christophe.roullier@foss.st.com>
 
 description:
   This file documents platform glue layer for stmmac.
diff --git a/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml b/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml
index 1485d3f..bf84768 100644
--- a/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml
+++ b/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml
@@ -33,7 +33,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 
     efuse@134100d0 {
         compatible = "ingenic,jz4780-efuse";
diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
index 0b80ce2..a48c8fa 100644
--- a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
@@ -13,7 +13,7 @@
   internal vref (VREFIN_CAL), unique device ID...
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 allOf:
   - $ref: "nvmem.yaml#"
diff --git a/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml b/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml
index 0fd93d7..5cab216 100644
--- a/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml
+++ b/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml
@@ -46,7 +46,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     otg_phy: usb-phy@3c {
       compatible = "ingenic,jz4770-phy";
       reg = <0x3c 0x10>;
diff --git a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
index 2251283..267b695 100644
--- a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
@@ -24,7 +24,7 @@
   |_ UTMI switch_______|          OTG controller
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
index dfee6d3..ac88e01 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -8,7 +8,7 @@
 title: STM32 GPIO and Pin Mux/Config controller
 
 maintainers:
-  - Alexandre TORGUE <alexandre.torgue@st.com>
+  - Alexandre TORGUE <alexandre.torgue@foss.st.com>
 
 description: |
   STMicroelectronics's STM32 MCUs intregrate a GPIO and Pin mux/config hardware
diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml
index 81ccb21..1f5c638 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml
@@ -35,9 +35,11 @@
           - renesas,tpu-r8a7794   # R-Car E2
           - renesas,tpu-r8a7795   # R-Car H3
           - renesas,tpu-r8a7796   # R-Car M3-W
+          - renesas,tpu-r8a77961  # R-Car M3-W+
           - renesas,tpu-r8a77965  # R-Car M3-N
           - renesas,tpu-r8a77970  # R-Car V3M
           - renesas,tpu-r8a77980  # R-Car V3H
+          - renesas,tpu-r8a779a0  # R-Car V3U
       - const: renesas,tpu
 
   reg:
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
index 9f1c703..df0191b 100644
--- a/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 booster for ADC analog input switches bindings
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 description: |
   Some STM32 devices embed a 3.3V booster supplied by Vdda, that can be used
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
index 3cd4a254..836d415 100644
--- a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
@@ -12,7 +12,7 @@
   components through the dedicated VREF+ pin.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 allOf:
   - $ref: "regulator.yaml#"
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml b/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
index e6322bc..bd07b9c 100644
--- a/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
+++ b/Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
@@ -7,7 +7,7 @@
 title: STM32MP1 PWR voltage regulators
 
 maintainers:
-  - Pascal Paillet <p.paillet@st.com>
+  - Pascal Paillet <p.paillet@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml b/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml
index d0aa91b..aaaaaba 100644
--- a/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml
@@ -58,7 +58,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
 
     vpu: video-decoder@132a0000 {
       compatible = "ingenic,jz4770-vpu-rproc";
diff --git a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
index 1e62256..b587c97 100644
--- a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
@@ -11,8 +11,8 @@
   boots firmwares on the ST32MP family chipset.
 
 maintainers:
-  - Fabien Dessenne <fabien.dessenne@st.com>
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/rng/ingenic,trng.yaml b/Documentation/devicetree/bindings/rng/ingenic,trng.yaml
index 808f247..044d9a0 100644
--- a/Documentation/devicetree/bindings/rng/ingenic,trng.yaml
+++ b/Documentation/devicetree/bindings/rng/ingenic,trng.yaml
@@ -32,7 +32,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/x1830-cgu.h>
+    #include <dt-bindings/clock/ingenic,x1830-cgu.h>
 
     dtrng: trng@10072000 {
         compatible = "ingenic,x1830-dtrng";
diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
index 82bb2e9..9a6e4ea 100644
--- a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
@@ -11,7 +11,7 @@
   IP and is fully separated from other crypto functions.
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
index 60e93e8..b235b24 100644
--- a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
@@ -72,7 +72,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     rtc_dev: rtc@10003000 {
       compatible = "ingenic,jz4740-rtc";
       reg = <0x10003000 0x40>;
diff --git a/Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml b/Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml
new file mode 100644
index 0000000..114199c
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/mstar,msc313-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mstar MSC313e RTC Device Tree Bindings
+
+allOf:
+  - $ref: "rtc.yaml#"
+
+maintainers:
+  - Daniel Palmer <daniel@0x0f.com>
+  - Romain Perier <romain.perier@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - mstar,msc313-rtc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  start-year: true
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    rtc@2400 {
+        compatible = "mstar,msc313-rtc";
+        reg = <0x2400 0x40>;
+        clocks = <&xtal_div2>;
+        interrupts-extended = <&intc_irq GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt
index 627bb53..6439682 100644
--- a/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt
+++ b/Documentation/devicetree/bindings/rtc/nxp,pcf85063.txt
@@ -13,10 +13,19 @@
   expressed in femto Farad (fF). Valid values are 7000 and 12500.
   Default value (if no value is specified) is 7000fF.
 
+Optional child node:
+- clock: Provide this if the square wave pin is used as boot-enabled fixed clock.
+
 Example:
 
 pcf85063: rtc@51 {
 	compatible = "nxp,pcf85063";
 	reg = <0x51>;
 	quartz-load-femtofarads = <12500>;
+
+		clock {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <32768>;
+		};
 };
diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
index 5456604..2359f54 100644
--- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 Real Time Clock Bindings
 
 maintainers:
-  - Gabriel Fernandez <gabriel.fernandez@st.com>
+  - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
index b432d4d..9ca7a18 100644
--- a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
+++ b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
@@ -71,7 +71,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/dma/jz4780-dma.h>
     #include <dt-bindings/gpio/gpio.h>
     serial@10032000 {
diff --git a/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
index f50f4ca..333dc42 100644
--- a/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
+++ b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
@@ -5,7 +5,7 @@
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
 maintainers:
-  - Erwan Le Ray <erwan.leray@st.com>
+  - Erwan Le Ray <erwan.leray@foss.st.com>
 
 title: STMicroelectronics STM32 USART bindings
 
diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml
index 0d87e2c8..963a871 100644
--- a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml
+++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml
@@ -7,7 +7,7 @@
 title: CS42L51 audio codec DT bindings
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml
index cdc0fda..d607325 100644
--- a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml
+++ b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml
@@ -71,7 +71,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     aic: audio-controller@10020000 {
       compatible = "ingenic,jz4740-i2s";
       reg = <0x10020000 0x38>;
diff --git a/Documentation/devicetree/bindings/sound/ingenic,codec.yaml b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
index 97d5f38..48aae54 100644
--- a/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
+++ b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
@@ -48,7 +48,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     codec: audio-codec@10020080 {
       compatible = "ingenic,jz4740-codec";
       reg = <0x10020080 0x8>;
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml b/Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml
index 6feb5a0..d3966ae 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 SPI/I2S Controller
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description:
   The SPI/I2S block supports I2S/PCM protocols when configured on I2S mode.
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
index f971324..1538d11 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
+++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 Serial Audio Interface (SAI)
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description:
   The SAI interface (Serial Audio Interface) offers a wide set of audio
diff --git a/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.yaml b/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.yaml
index b7f7dc4..837e830 100644
--- a/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.yaml
+++ b/Documentation/devicetree/bindings/sound/st,stm32-spdifrx.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 S/PDIF receiver (SPDIFRX)
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description: |
   The SPDIFRX peripheral, is designed to receive an S/PDIF flow compliant with
diff --git a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
index cf56cc4..5b1c7a2 100644
--- a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
+++ b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
@@ -55,7 +55,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     spi@10043000 {
       compatible = "ingenic,jz4770-spi", "ingenic,jz4750-spi";
       reg = <0x10043000 0x1c>;
diff --git a/Documentation/devicetree/bindings/spi/spi-xlp.txt b/Documentation/devicetree/bindings/spi/spi-xlp.txt
deleted file mode 100644
index f4925ec..0000000
--- a/Documentation/devicetree/bindings/spi/spi-xlp.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-SPI Master controller for Netlogic XLP MIPS64 SOCs
-==================================================
-
-Currently this SPI controller driver is supported for the following
-Netlogic XLP SoCs:
-	XLP832, XLP316, XLP208, XLP980, XLP532
-
-Required properties:
-- compatible		: Should be "netlogic,xlp832-spi".
-- #address-cells	: Number of cells required to define a chip select address
-			  on the SPI bus.
-- #size-cells		: Should be zero.
-- reg			: Should contain register location and length.
-- clocks		: Phandle of the spi clock
-- interrupts		: Interrupt number used by this controller.
-
-SPI slave nodes must be children of the SPI master node and can contain
-properties described in Documentation/devicetree/bindings/spi/spi-bus.txt.
-
-Example:
-
-	spi: xlp_spi@3a100 {
-		compatible = "netlogic,xlp832-spi";
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reg = <0 0x3a100 0x100>;
-		clocks = <&spi_clk>;
-		interrupts = <34>;
-		interrupt-parent = <&pic>;
-
-		spi_nor@1 {
-			compatible = "spansion,s25sl12801";
-			#address-cells = <1>;
-			#size-cells = <1>;
-			reg = <1>;	/* Chip Select */
-			spi-max-frequency = <40000000>;
-		};
-};
diff --git a/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
index 983c4e5..6ec6f55 100644
--- a/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
+++ b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
@@ -7,8 +7,8 @@
 title: STMicroelectronics STM32 Quad Serial Peripheral Interface (QSPI) bindings
 
 maintainers:
-  - Christophe Kerello <christophe.kerello@st.com>
-  - Patrice Chotard <patrice.chotard@st.com>
+  - Christophe Kerello <christophe.kerello@foss.st.com>
+  - Patrice Chotard <patrice.chotard@foss.st.com>
 
 allOf:
   - $ref: "spi-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
index 2d9af4c..3d64bed 100644
--- a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
@@ -13,8 +13,8 @@
   from 4 to 32-bit data size.
 
 maintainers:
-  - Erwan Leray <erwan.leray@st.com>
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Erwan Leray <erwan.leray@foss.st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 allOf:
   - $ref: "spi-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
index c0f59c5..bee41cf 100644
--- a/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STM32 digital thermal sensor (DTS) binding
 
 maintainers:
-  - David Hernandez Sanchez <david.hernandezsanchez@st.com>
+  - Pascal Paillet <p.paillet@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
index df3eb76..98648bf 100644
--- a/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
+++ b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
@@ -46,7 +46,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/x1000-cgu.h>
+    #include <dt-bindings/clock/ingenic,x1000-cgu.h>
 
     ost: timer@12000000 {
         compatible = "ingenic,x1000-ost";
diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
index 8165df4..7fb37ea 100644
--- a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
+++ b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
@@ -237,7 +237,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     #include <dt-bindings/clock/ingenic,tcu.h>
     tcu: timer@10002000 {
       compatible = "ingenic,jz4770-tcu", "ingenic,jz4760-tcu", "simple-mfd";
diff --git a/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
index 176aa3c..937aa8a 100644
--- a/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
@@ -7,7 +7,8 @@
 title: STMicroelectronics STM32 general-purpose 16 and 32 bits timers bindings
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
+  - Patrice Chotard <patrice.chotard@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml
index f506225a4..5921235 100644
--- a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml
+++ b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml
@@ -58,7 +58,7 @@
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     usb_phy: usb-phy {
       compatible = "usb-nop-xceiv";
       #phy-cells = <0>;
diff --git a/Documentation/devicetree/bindings/usb/st,stusb160x.yaml b/Documentation/devicetree/bindings/usb/st,stusb160x.yaml
index 9a51efa..ead1571 100644
--- a/Documentation/devicetree/bindings/usb/st,stusb160x.yaml
+++ b/Documentation/devicetree/bindings/usb/st,stusb160x.yaml
@@ -7,7 +7,7 @@
 title: STMicroelectronics STUSB160x Type-C controller bindings
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 983f5e4..66d6432 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1286,6 +1286,8 @@
     description: Vitesse Semiconductor Corporation
   "^vivante,.*":
     description: Vivante Corporation
+  "^vivax,.*":
+    description: Vivax brand by M SAN Grupa d.o.o.
   "^vocore,.*":
     description: VoCore Studio
   "^voipac,.*":
diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
index 44cad94..43afa24 100644
--- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
@@ -40,14 +40,12 @@
 
   clocks:
     minItems: 1
-    maxItems: 2
     items:
       - description: High-frequency oscillator input, divided internally
       - description: Low-frequency oscillator input, only found on some variants
 
   clock-names:
     minItems: 1
-    maxItems: 2
     items:
       - const: hosc
       - const: losc
diff --git a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
index 481bf91f..3973644 100644
--- a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
+++ b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
@@ -7,8 +7,8 @@
 title: STMicroelectronics STM32 Independent WatchDoG (IWDG) bindings
 
 maintainers:
-  - Yannick Fertre <yannick.fertre@st.com>
-  - Christophe Roullier <christophe.roullier@st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
+  - Christophe Roullier <christophe.roullier@foss.st.com>
 
 allOf:
   - $ref: "watchdog.yaml#"
diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index ec3e71f..e445cb1 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -27,7 +27,7 @@
 ==============
 
 The ReST markups currently used by the Documentation/ files are meant to be
-built with ``Sphinx`` version 1.3 or higher.
+built with ``Sphinx`` version 1.7 or higher.
 
 There's a script that checks for the Sphinx requirements. Please see
 :ref:`sphinx-pre-install` for further details.
@@ -43,10 +43,6 @@
 
 .. note::
 
-   #) Sphinx versions below 1.5 don't work properly with Python's
-      docutils version 0.13.1 or higher. So, if you're willing to use
-      those versions, you should run ``pip install 'docutils==0.12'``.
-
    #) It is recommended to use the RTD theme for html output. Depending
       on the Sphinx version, it should be installed separately,
       with ``pip install sphinx_rtd_theme``.
@@ -55,13 +51,13 @@
       those expressions are written using LaTeX notation. It needs texlive
       installed with amsfonts and amsmath in order to evaluate them.
 
-In summary, if you want to install Sphinx version 1.7.9, you should do::
+In summary, if you want to install Sphinx version 2.4.4, you should do::
 
-       $ virtualenv sphinx_1.7.9
-       $ . sphinx_1.7.9/bin/activate
-       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_2.4.4
+       $ . sphinx_2.4.4/bin/activate
+       (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
 
-After running ``. sphinx_1.7.9/bin/activate``, the prompt will change,
+After running ``. sphinx_2.4.4/bin/activate``, the prompt will change,
 in order to indicate that you're using the new environment. If you
 open a new shell, you need to rerun this command to enter again at
 the virtual environment before building the documentation.
@@ -81,7 +77,7 @@
 PDF and LaTeX builds
 --------------------
 
-Such builds are currently supported only with Sphinx versions 1.4 and higher.
+Such builds are currently supported only with Sphinx versions 2.4 and higher.
 
 For PDF and LaTeX output, you'll also need ``XeLaTeX`` version 3.14159265.
 
@@ -104,8 +100,8 @@
 	You should run:
 
 		sudo dnf install -y texlive-luatex85
-		/usr/bin/virtualenv sphinx_1.7.9
-		. sphinx_1.7.9/bin/activate
+		/usr/bin/virtualenv sphinx_2.4.4
+		. sphinx_2.4.4/bin/activate
 		pip install -r Documentation/sphinx/requirements.txt
 
 	Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
diff --git a/Documentation/filesystems/autofs.rst b/Documentation/filesystems/autofs.rst
index 681c6a4..4f490278 100644
--- a/Documentation/filesystems/autofs.rst
+++ b/Documentation/filesystems/autofs.rst
@@ -35,7 +35,7 @@
 required with any user-space program.  Subsequent text refers to this
 as the "automount daemon" or simply "the daemon".
 
-"autofs" is a Linux kernel module with provides the "autofs"
+"autofs" is a Linux kernel module which provides the "autofs"
 filesystem type.  Several "autofs" filesystems can be mounted and they
 can each be managed separately, or all managed by the same daemon.
 
diff --git a/Documentation/filesystems/cifs/ksmbd.rst b/Documentation/filesystems/cifs/ksmbd.rst
index a132615..b0d354f 100644
--- a/Documentation/filesystems/cifs/ksmbd.rst
+++ b/Documentation/filesystems/cifs/ksmbd.rst
@@ -50,11 +50,11 @@
 --------------------------------
 
 ksmbd.mountd is userspace process to, transfer user account and password that
-are registered using ksmbd.adduser(part of utils for user space). Further it
+are registered using ksmbd.adduser (part of utils for user space). Further it
 allows sharing information parameters that parsed from smb.conf to ksmbd in
 kernel. For the execution part it has a daemon which is continuously running
 and connected to the kernel interface using netlink socket, it waits for the
-requests(dcerpc and share/user info). It handles RPC calls (at a minimum few
+requests (dcerpc and share/user info). It handles RPC calls (at a minimum few
 dozen) that are most important for file server from NetShareEnum and
 NetServerGetInfo. Complete DCE/RPC response is prepared from the user space
 and passed over to the associated kernel thread for the client.
@@ -154,11 +154,11 @@
 1. Enable all component prints
 	# sudo ksmbd.control -d "all"
 
-2. Enable one of components(smb, auth, vfs, oplock, ipc, conn, rdma)
+2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma)
 	# sudo ksmbd.control -d "smb"
 
-3. Show what prints are enable.
-	# cat/sys/class/ksmbd-control/debug
+3. Show what prints are enabled.
+	# cat /sys/class/ksmbd-control/debug
 	  [smb] auth vfs oplock ipc conn [rdma]
 
 4. Disable prints:
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 6f3c6e9..d7b8469 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -197,10 +197,29 @@
 			 FAULT_DISCARD		  0x000002000
 			 FAULT_WRITE_IO		  0x000004000
 			 FAULT_SLAB_ALLOC	  0x000008000
+			 FAULT_DQUOT_INIT	  0x000010000
 			 ===================	  ===========
 mode=%s			 Control block allocation mode which supports "adaptive"
 			 and "lfs". In "lfs" mode, there should be no random
 			 writes towards main area.
+			 "fragment:segment" and "fragment:block" are newly added here.
+			 These are developer options for experiments to simulate filesystem
+			 fragmentation/after-GC situation itself. The developers use these
+			 modes to understand filesystem fragmentation/after-GC condition well,
+			 and eventually get some insights to handle them better.
+			 In "fragment:segment", f2fs allocates a new segment in ramdom
+			 position. With this, we can simulate the after-GC condition.
+			 In "fragment:block", we can scatter block allocation with
+			 "max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
+			 We added some randomness to both chunk and hole size to make
+			 it close to realistic IO pattern. So, in this mode, f2fs will allocate
+			 1..<max_fragment_chunk> blocks in a chunk and make a hole in the
+			 length of 1..<max_fragment_hole> by turns. With this, the newly
+			 allocated blocks will be scattered throughout the whole partition.
+			 Note that "fragment:block" implicitly enables "fragment:segment"
+			 option for more randomness.
+			 Please, use these options for your experiments and we strongly
+			 recommend to re-format the filesystem after using these options.
 io_bits=%u		 Set the bit size of write IO requests. It should be set
 			 with "mode=lfs".
 usrquota		 Enable plain user disk quota accounting.
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index bb68d39..375baca 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -1,7 +1,7 @@
 .. SPDX-License-Identifier: GPL-2.0
 
 =================================
-NETWORK FILESYSTEM HELPER LIBRARY
+Network Filesystem Helper Library
 =================================
 
 .. Contents:
@@ -37,22 +37,22 @@
 
 The following services are provided:
 
- * Handles transparent huge pages (THPs).
+ * Handle folios that span multiple pages.
 
- * Insulates the netfs from VM interface changes.
+ * Insulate the netfs from VM interface changes.
 
- * Allows the netfs to arbitrarily split reads up into pieces, even ones that
-   don't match page sizes or page alignments and that may cross pages.
+ * Allow the netfs to arbitrarily split reads up into pieces, even ones that
+   don't match folio sizes or folio alignments and that may cross folios.
 
- * Allows the netfs to expand a readahead request in both directions to meet
-   its needs.
+ * Allow the netfs to expand a readahead request in both directions to meet its
+   needs.
 
- * Allows the netfs to partially fulfil a read, which will then be resubmitted.
+ * Allow the netfs to partially fulfil a read, which will then be resubmitted.
 
- * Handles local caching, allowing cached data and server-read data to be
+ * Handle local caching, allowing cached data and server-read data to be
    interleaved for a single request.
 
- * Handles clearing of bufferage that aren't on the server.
+ * Handle clearing of bufferage that aren't on the server.
 
  * Handle retrying of reads that failed, switching reads from the cache to the
    server as necessary.
@@ -70,22 +70,22 @@
 
 Three read helpers are provided::
 
- * void netfs_readahead(struct readahead_control *ractl,
-			const struct netfs_read_request_ops *ops,
-			void *netfs_priv);``
- * int netfs_readpage(struct file *file,
-		      struct page *page,
-		      const struct netfs_read_request_ops *ops,
-		      void *netfs_priv);
- * int netfs_write_begin(struct file *file,
-			 struct address_space *mapping,
-			 loff_t pos,
-			 unsigned int len,
-			 unsigned int flags,
-			 struct page **_page,
-			 void **_fsdata,
-			 const struct netfs_read_request_ops *ops,
-			 void *netfs_priv);
+	void netfs_readahead(struct readahead_control *ractl,
+			     const struct netfs_read_request_ops *ops,
+			     void *netfs_priv);
+	int netfs_readpage(struct file *file,
+			   struct folio *folio,
+			   const struct netfs_read_request_ops *ops,
+			   void *netfs_priv);
+	int netfs_write_begin(struct file *file,
+			      struct address_space *mapping,
+			      loff_t pos,
+			      unsigned int len,
+			      unsigned int flags,
+			      struct folio **_folio,
+			      void **_fsdata,
+			      const struct netfs_read_request_ops *ops,
+			      void *netfs_priv);
 
 Each corresponds to a VM operation, with the addition of a couple of parameters
 for the use of the read helpers:
@@ -103,8 +103,8 @@
 For ->readahead() and ->readpage(), the network filesystem should just jump
 into the corresponding read helper; whereas for ->write_begin(), it may be a
 little more complicated as the network filesystem might want to flush
-conflicting writes or track dirty data and needs to put the acquired page if an
-error occurs after calling the helper.
+conflicting writes or track dirty data and needs to put the acquired folio if
+an error occurs after calling the helper.
 
 The helpers manage the read request, calling back into the network filesystem
 through the suppplied table of operations.  Waits will be performed as
@@ -253,7 +253,7 @@
 		void (*issue_op)(struct netfs_read_subrequest *subreq);
 		bool (*is_still_valid)(struct netfs_read_request *rreq);
 		int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
-					 struct page *page, void **_fsdata);
+					 struct folio *folio, void **_fsdata);
 		void (*done)(struct netfs_read_request *rreq);
 		void (*cleanup)(struct address_space *mapping, void *netfs_priv);
 	};
@@ -313,13 +313,14 @@
 
    There is no return value; the netfs_subreq_terminated() function should be
    called to indicate whether or not the operation succeeded and how much data
-   it transferred.  The filesystem also should not deal with setting pages
+   it transferred.  The filesystem also should not deal with setting folios
    uptodate, unlocking them or dropping their refs - the helpers need to deal
    with this as they have to coordinate with copying to the local cache.
 
-   Note that the helpers have the pages locked, but not pinned.  It is possible
-   to use the ITER_XARRAY iov iterator to refer to the range of the inode that
-   is being operated upon without the need to allocate large bvec tables.
+   Note that the helpers have the folios locked, but not pinned.  It is
+   possible to use the ITER_XARRAY iov iterator to refer to the range of the
+   inode that is being operated upon without the need to allocate large bvec
+   tables.
 
  * ``is_still_valid()``
 
@@ -330,15 +331,15 @@
  * ``check_write_begin()``
 
    [Optional] This is called from the netfs_write_begin() helper once it has
-   allocated/grabbed the page to be modified to allow the filesystem to flush
+   allocated/grabbed the folio to be modified to allow the filesystem to flush
    conflicting state before allowing it to be modified.
 
-   It should return 0 if everything is now fine, -EAGAIN if the page should be
+   It should return 0 if everything is now fine, -EAGAIN if the folio should be
    regrabbed and any other error code to abort the operation.
 
  * ``done``
 
-   [Optional] This is called after the pages in the request have all been
+   [Optional] This is called after the folios in the request have all been
    unlocked (and marked uptodate if applicable).
 
  * ``cleanup``
@@ -390,7 +391,7 @@
      * If NETFS_SREQ_CLEAR_TAIL was set, a short read will be cleared to the
        end of the slice instead of reissuing.
 
- * Once the data is read, the pages that have been fully read/cleared:
+ * Once the data is read, the folios that have been fully read/cleared:
 
    * Will be marked uptodate.
 
@@ -398,11 +399,11 @@
 
    * Unlocked
 
- * Any pages that need writing to the cache will then have DIO writes issued.
+ * Any folios that need writing to the cache will then have DIO writes issued.
 
  * Synchronous operations will wait for reading to be complete.
 
- * Writes to the cache will proceed asynchronously and the pages will have the
+ * Writes to the cache will proceed asynchronously and the folios will have the
    PG_fscache mark removed when that completes.
 
  * The request structures will be cleaned up when everything has completed.
@@ -452,6 +453,9 @@
 			    netfs_io_terminated_t term_func,
 			    void *term_func_priv);
 
+		int (*prepare_write)(struct netfs_cache_resources *cres,
+				     loff_t *_start, size_t *_len, loff_t i_size);
+
 		int (*write)(struct netfs_cache_resources *cres,
 			     loff_t start_pos,
 			     struct iov_iter *iter,
@@ -509,6 +513,14 @@
    indicating whether the termination is definitely happening in the caller's
    context.
 
+ * ``prepare_write()``
+
+   [Required] Called to adjust a write to the cache and check that there is
+   sufficient space in the cache.  The start and length values indicate the
+   size of the write that netfslib is proposing, and this can be adjusted by
+   the cache to respect DIO boundaries.  The file size is passed for
+   information.
+
  * ``write()``
 
    [Required] Called to write to the cache.  The start file offset is given
@@ -525,4 +537,9 @@
 there isn't a read request structure as well, such as writing dirty data to the
 cache.
 
+
+API Function Reference
+======================
+
 .. kernel-doc:: include/linux/netfs.h
+.. kernel-doc:: fs/netfs/read_helper.c
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 6613543..60d1d7e 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -314,16 +314,19 @@
 Garbage collect fbdev scrolling acceleration
 --------------------------------------------
 
-Scroll acceleration is disabled in fbcon by hard-wiring p->scrollmode =
-SCROLL_REDRAW. There's a ton of code this will allow us to remove:
+Scroll acceleration has been disabled in fbcon. Now it works as the old
+SCROLL_REDRAW mode. A ton of code was removed in fbcon.c and the hook bmove was
+removed from fbcon_ops.
+Remaining tasks:
 
-- lots of code in fbcon.c
-
-- a bunch of the hooks in fbcon_ops, maybe the remaining hooks could be called
+- a bunch of the hooks in fbcon_ops could be removed or simplified by calling
   directly instead of the function table (with a switch on p->rotate)
 
 - fb_copyarea is unused after this, and can be deleted from all drivers
 
+- after that, fb_copyarea can be deleted from fb_ops in include/linux/fb.h as
+  well as cfb_copyarea
+
 Note that not all acceleration code can be deleted, since clearing and cursor
 support is still accelerated, which might be good candidates for further
 deletion projects.
diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst
index 9e07e6b..00d8e17 100644
--- a/Documentation/i2c/smbus-protocol.rst
+++ b/Documentation/i2c/smbus-protocol.rst
@@ -36,6 +36,8 @@
 
 =============== =============================================================
 S               Start condition
+Sr              Repeated start condition, used to switch from write to
+                read mode.
 P               Stop condition
 Rd/Wr (1 bit)   Read/Write bit. Rd equals 1, Wr equals 0.
 A, NA (1 bit)   Acknowledge (ACK) and Not Acknowledge (NACK) bit
@@ -100,7 +102,7 @@
 This reads a single byte from a device, from a designated register.
 The register is specified through the Comm byte::
 
-  S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
+  S Addr Wr [A] Comm [A] Sr Addr Rd [A] [Data] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA
 
@@ -114,7 +116,7 @@
 device, from a designated register that is specified through the Comm
 byte. But this time, the data is a complete word (16 bits)::
 
-  S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
+  S Addr Wr [A] Comm [A] Sr Addr Rd [A] [DataLow] A [DataHigh] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA
 
@@ -164,7 +166,7 @@
 16 bits of data to it, and reads 16 bits of data in return::
 
   S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A]
-                               S Addr Rd [A] [DataLow] A [DataHigh] NA P
+                              Sr Addr Rd [A] [DataLow] A [DataHigh] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_PROC_CALL
 
@@ -181,7 +183,7 @@
 ::
 
   S Addr Wr [A] Comm [A]
-             S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
+            Sr Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA
 
@@ -212,7 +214,7 @@
 1 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return::
 
   S Addr Wr [A] Comm [A] Count [A] Data [A] ...
-                               S Addr Rd [A] [Count] A [Data] ... A P
+                              Sr Addr Rd [A] [Count] A [Data] ... A P
 
 Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL
 
@@ -300,7 +302,7 @@
 designated register that is specified through the Comm byte::
 
   S Addr Wr [A] Comm [A]
-             S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
+            Sr Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK
 
diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
index 95ef56d..387fda8 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -37,8 +37,7 @@
 
 	0: disable any special handling on port reuse. The new
 	connection will be delivered to the same real server that was
-	servicing the previous connection. This will effectively
-	disable expire_nodest_conn.
+	servicing the previous connection.
 
 	bit 1: enable rescheduling of new connections when it is safe.
 	That is, whenever expire_nodest_conn and for TCP sockets, when
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst
index a722eb3..80b1335 100644
--- a/Documentation/networking/timestamping.rst
+++ b/Documentation/networking/timestamping.rst
@@ -486,8 +486,8 @@
 Drivers are free to use a more permissive configuration than the requested
 configuration. It is expected that drivers should only implement directly the
 most generic mode that can be supported. For example if the hardware can
-support HWTSTAMP_FILTER_V2_EVENT, then it should generally always upscale
-HWTSTAMP_FILTER_V2_L2_SYNC_MESSAGE, and so forth, as HWTSTAMP_FILTER_V2_EVENT
+support HWTSTAMP_FILTER_PTP_V2_EVENT, then it should generally always upscale
+HWTSTAMP_FILTER_PTP_V2_L2_SYNC, and so forth, as HWTSTAMP_FILTER_PTP_V2_EVENT
 is more generic (and more useful to applications).
 
 A driver which supports hardware time stamping shall update the struct
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
index 8a2788a..5ac62a7 100644
--- a/Documentation/power/energy-model.rst
+++ b/Documentation/power/energy-model.rst
@@ -84,6 +84,16 @@
 2.2 Registration of performance domains
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Registration of 'advanced' EM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 'advanced' EM gets it's name due to the fact that the driver is allowed
+to provide more precised power model. It's not limited to some implemented math
+formula in the framework (like it's in 'simple' EM case). It can better reflect
+the real power measurements performed for each performance state. Thus, this
+registration method should be preferred in case considering EM static power
+(leakage) is important.
+
 Drivers are expected to register performance domains into the EM framework by
 calling the following API::
 
@@ -103,6 +113,18 @@
 See Section 3. for an example of driver implementing this
 callback, or Section 2.4 for further documentation on this API
 
+Registration of 'simple' EM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 'simple' EM is registered using the framework helper function
+cpufreq_register_em_with_opp(). It implements a power model which is tight to
+math formula::
+
+	Power = C * V^2 * f
+
+The EM which is registered using this method might not reflect correctly the
+physics of a real device, e.g. when static power (leakage) is important.
+
 
 2.3 Accessing performance domains
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -138,6 +160,10 @@
 3. Example driver
 -----------------
 
+The CPUFreq framework supports dedicated callback for registering
+the EM for a given CPU(s) 'policy' object: cpufreq_driver::register_em().
+That callback has to be implemented properly for a given driver,
+because the framework would call it at the right time during setup.
 This section provides a simple example of a CPUFreq driver registering a
 performance domain in the Energy Model framework using the (fake) 'foo'
 protocol. The driver implements an est_power() function to be provided to the
@@ -167,25 +193,22 @@
   20		return 0;
   21	}
   22
-  23	static int foo_cpufreq_init(struct cpufreq_policy *policy)
+  23	static void foo_cpufreq_register_em(struct cpufreq_policy *policy)
   24	{
   25		struct em_data_callback em_cb = EM_DATA_CB(est_power);
   26		struct device *cpu_dev;
-  27		int nr_opp, ret;
+  27		int nr_opp;
   28
   29		cpu_dev = get_cpu_device(cpumask_first(policy->cpus));
   30
-  31     	/* Do the actual CPUFreq init work ... */
-  32     	ret = do_foo_cpufreq_init(policy);
-  33     	if (ret)
-  34     		return ret;
-  35
-  36     	/* Find the number of OPPs for this policy */
-  37     	nr_opp = foo_get_nr_opp(policy);
+  31     	/* Find the number of OPPs for this policy */
+  32     	nr_opp = foo_get_nr_opp(policy);
+  33
+  34     	/* And register the new performance domain */
+  35     	em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus,
+  36					    true);
+  37	}
   38
-  39     	/* And register the new performance domain */
-  40     	em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus,
-  41					    true);
-  42
-  43	        return 0;
-  44	}
+  39	static struct cpufreq_driver foo_cpufreq_driver = {
+  40		.register_em = foo_cpufreq_register_em,
+  41	};
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index e35ab74..b398b85 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -54,7 +54,7 @@
 iptables               1.4.2            iptables -V
 openssl & libcrypto    1.0.0            openssl version
 bc                     1.06.95          bc --version
-Sphinx\ [#f1]_	       1.3		sphinx-build --version
+Sphinx\ [#f1]_         1.7              sphinx-build --version
 ====================== ===============  ========================================
 
 .. [#f1] Sphinx is needed only to build the Kernel documentation
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index a0cc969..da085d6 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -22,8 +22,8 @@
 easier.
 
 Some subsystems and maintainer trees have additional information about
-their workflow and expectations, see :ref:`Documentation/process/maintainer
-handbooks <maintainer_handbooks_main>`.
+their workflow and expectations, see
+:ref:`Documentation/process/maintainer-handbooks.rst <maintainer_handbooks_main>`.
 
 Obtain a current source tree
 ----------------------------
diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index 406cc68..d5fd6cc 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -15,7 +15,10 @@
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_sctp_assoc_established()
+
+Also the following security hook has been utilised::
+
+    security_inet_conn_established()
 
 The usage of these hooks are described below with the SELinux implementation
 described in the `SCTP SELinux Support`_ chapter.
@@ -119,12 +122,11 @@
     @newsk - pointer to new sock structure.
 
 
-security_sctp_assoc_established()
+security_inet_conn_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Called when a COOKIE ACK is received, and the peer secid will be
-saved into ``@asoc->peer_secid`` for client::
+Called when a COOKIE ACK is received::
 
-    @asoc - pointer to sctp association structure.
+    @sk  - pointer to sock structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
@@ -132,7 +134,7 @@
 -------------------------------------------------
 
 The following diagram shows the use of ``security_sctp_bind_connect()``,
-``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when
+``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when
 establishing an association.
 ::
 
@@ -170,7 +172,7 @@
           <------------------------------------------- COOKIE ACK
           |                                               |
     sctp_sf_do_5_1E_ca                                    |
- Call security_sctp_assoc_established()                   |
+ Call security_inet_conn_established()                    |
  to set the peer label.                                   |
           |                                               |
           |                               If SCTP_SOCKET_TCP or peeled off
@@ -196,7 +198,7 @@
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_sctp_assoc_established()
+    security_inet_conn_established()
 
 
 security_sctp_assoc_request()
@@ -269,12 +271,12 @@
     @newsk - pointer to new sock structure.
 
 
-security_sctp_assoc_established()
+security_inet_conn_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Called when a COOKIE ACK is received where it sets the connection's peer sid
 to that in ``@skb``::
 
-    @asoc - pointer to sctp association structure.
+    @sk  - pointer to sock structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 4e5b26f..b3166c4 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -2442,11 +2442,10 @@
   #!/bin/bash
 
   tracefs=`sed -ne 's/^tracefs \(.*\) tracefs.*/\1/p' /proc/mounts`
-  echo nop > $tracefs/tracing/current_tracer
-  echo 0 > $tracefs/tracing/tracing_on
-  echo $$ > $tracefs/tracing/set_ftrace_pid
-  echo function > $tracefs/tracing/current_tracer
-  echo 1 > $tracefs/tracing/tracing_on
+  echo 0 > $tracefs/tracing_on
+  echo $$ > $tracefs/set_ftrace_pid
+  echo function > $tracefs/current_tracer
+  echo 1 > $tracefs/tracing_on
   exec "$@"
 
 
diff --git a/Documentation/translations/it_IT/doc-guide/sphinx.rst b/Documentation/translations/it_IT/doc-guide/sphinx.rst
index 0046d75..9762452 100644
--- a/Documentation/translations/it_IT/doc-guide/sphinx.rst
+++ b/Documentation/translations/it_IT/doc-guide/sphinx.rst
@@ -35,7 +35,7 @@
 ====================
 
 I marcatori ReST utilizzati nei file in Documentation/ sono pensati per essere
-processati da ``Sphinx`` nella versione 1.3 o superiore.
+processati da ``Sphinx`` nella versione 1.7 o superiore.
 
 Esiste uno script che verifica i requisiti Sphinx. Per ulteriori dettagli
 consultate :ref:`it_sphinx-pre-install`.
@@ -53,11 +53,6 @@
 
 .. note::
 
-   #) Le versioni di Sphinx inferiori alla 1.5 non funzionano bene
-      con il pacchetto Python docutils versione 0.13.1 o superiore.
-      Se volete usare queste versioni, allora dovere eseguire
-      ``pip install 'docutils==0.12'``.
-
    #) Viene raccomandato l'uso del tema RTD per la documentazione in HTML.
       A seconda della versione di Sphinx, potrebbe essere necessaria
       l'installazione tramite il comando ``pip install sphinx_rtd_theme``.
@@ -67,13 +62,13 @@
       utilizzando LaTeX. Per una corretta interpretazione, è necessario aver
       installato texlive con i pacchetti amdfonts e amsmath.
 
-Riassumendo, se volete installare la versione 1.7.9 di Sphinx dovete eseguire::
+Riassumendo, se volete installare la versione 2.4.4 di Sphinx dovete eseguire::
 
-       $ virtualenv sphinx_1.7.9
-       $ . sphinx_1.7.9/bin/activate
-       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_2.4.4
+       $ . sphinx_2.4.4/bin/activate
+       (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
 
-Dopo aver eseguito ``. sphinx_1.7.9/bin/activate``, il prompt cambierà per
+Dopo aver eseguito ``. sphinx_2.4.4/bin/activate``, il prompt cambierà per
 indicare che state usando il nuovo ambiente. Se aprite un nuova sessione,
 prima di generare la documentazione, dovrete rieseguire questo comando per
 rientrare nell'ambiente virtuale.
@@ -94,7 +89,7 @@
 --------------------------
 
 Al momento, la generazione di questi documenti è supportata solo dalle
-versioni di Sphinx superiori alla 1.4.
+versioni di Sphinx superiori alla 2.4.
 
 Per la generazione di PDF e LaTeX, avrete bisogno anche del pacchetto
 ``XeLaTeX`` nella versione 3.14159265
@@ -119,8 +114,8 @@
 	You should run:
 
 		sudo dnf install -y texlive-luatex85
-		/usr/bin/virtualenv sphinx_1.7.9
-		. sphinx_1.7.9/bin/activate
+		/usr/bin/virtualenv sphinx_2.4.4
+		. sphinx_2.4.4/bin/activate
 		pip install -r Documentation/sphinx/requirements.txt
 
 	Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
diff --git a/Documentation/translations/it_IT/process/changes.rst b/Documentation/translations/it_IT/process/changes.rst
index 87d0818..dc71933 100644
--- a/Documentation/translations/it_IT/process/changes.rst
+++ b/Documentation/translations/it_IT/process/changes.rst
@@ -57,7 +57,7 @@
 iptables               1.4.2              iptables -V
 openssl & libcrypto    1.0.0              openssl version
 bc                     1.06.95            bc --version
-Sphinx\ [#f1]_         1.3                sphinx-build --version
+Sphinx\ [#f1]_         1.7                sphinx-build --version
 ====================== =================  ========================================
 
 .. [#f1] Sphinx è necessario solo per produrre la documentazione del Kernel
diff --git a/Documentation/translations/zh_CN/doc-guide/sphinx.rst b/Documentation/translations/zh_CN/doc-guide/sphinx.rst
index 951595c..23eac67 100644
--- a/Documentation/translations/zh_CN/doc-guide/sphinx.rst
+++ b/Documentation/translations/zh_CN/doc-guide/sphinx.rst
@@ -26,7 +26,7 @@
 安装Sphinx
 ==========
 
-Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。
+Documentation/ 下的ReST文件现在使用sphinx1.7或更高版本构建。
 
 这有一个脚本可以检查Sphinx的依赖项。更多详细信息见
 :ref:`sphinx-pre-install_zh` 。
@@ -40,22 +40,19 @@
 
 .. note::
 
-   #) 低于1.5版本的Sphinx无法与Python的0.13.1或更高版本docutils一起正常工作。
-      如果您想使用这些版本,那么应该运行 ``pip install 'docutils==0.12'`` 。
-
    #) html输出建议使用RTD主题。根据Sphinx版本的不同,它应该用
       ``pip install sphinx_rtd_theme`` 单独安装。
 
    #) 一些ReST页面包含数学表达式。由于Sphinx的工作方式,这些表达式是使用 LaTeX
       编写的。它需要安装amsfonts和amsmath宏包,以便显示。
 
-总之,如您要安装Sphinx 1.7.9版本,应执行::
+总之,如您要安装Sphinx 2.4.4版本,应执行::
 
-       $ virtualenv sphinx_1.7.9
-       $ . sphinx_1.7.9/bin/activate
-       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_2.4.4
+       $ . sphinx_2.4.4/bin/activate
+       (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
 
-在运行 ``. sphinx_1.7.9/bin/activate`` 之后,提示符将变化,以指示您正在使用新
+在运行 ``. sphinx_2.4.4/bin/activate`` 之后,提示符将变化,以指示您正在使用新
 环境。如果您打开了一个新的shell,那么在构建文档之前,您需要重新运行此命令以再
 次进入虚拟环境中。
 
@@ -71,7 +68,7 @@
 PDF和LaTeX构建
 --------------
 
-目前只有Sphinx 1.4及更高版本才支持这种构建。
+目前只有Sphinx 2.4及更高版本才支持这种构建。
 
 对于PDF和LaTeX输出,还需要 ``XeLaTeX`` 3.14159265版本。(译注:此版本号真实
 存在)
@@ -93,8 +90,8 @@
 	You should run:
 
 		sudo dnf install -y texlive-luatex85
-		/usr/bin/virtualenv sphinx_1.7.9
-		. sphinx_1.7.9/bin/activate
+		/usr/bin/virtualenv sphinx_2.4.4
+		. sphinx_2.4.4/bin/activate
 		pip install -r Documentation/sphinx/requirements.txt
 
 	Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
diff --git a/Documentation/translations/zh_CN/process/management-style.rst b/Documentation/translations/zh_CN/process/management-style.rst
index c6a5bb2..8053ae4 100644
--- a/Documentation/translations/zh_CN/process/management-style.rst
+++ b/Documentation/translations/zh_CN/process/management-style.rst
@@ -36,14 +36,14 @@
 每个人都认为管理者做决定,而且决策很重要。决定越大越痛苦,管理者就必须越高级。
 这很明显,但事实并非如此。
 
-游戏的名字是 **避免** 做出决定。尤其是,如果有人告诉你“选择(a)或(b),
+最重要的是 **避免** 做出决定。尤其是,如果有人告诉你“选择(a)或(b),
 我们真的需要你来做决定”,你就是陷入麻烦的管理者。你管理的人比你更了解细节,
 所以如果他们来找你做技术决策,你完蛋了。你显然没有能力为他们做这个决定。
 
 (推论:如果你管理的人不比你更了解细节,你也会被搞砸,尽管原因完全不同。
 也就是说,你的工作是错的,他们应该管理你的才智)
 
-所以游戏的名字是 **避免** 做出决定,至少是那些大而痛苦的决定。做一些小的
+所以最重要的是 **避免** 做出决定,至少是那些大而痛苦的决定。做一些小的
 和非结果性的决定是很好的,并且使您看起来好像知道自己在做什么,所以内核管理者
 需要做的是将那些大的和痛苦的决定变成那些没有人真正关心的小事情。
 
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 3b093d6d..aeeb071 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6911,6 +6911,20 @@
 When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
 perform a bulk copy of tags to/from the guest.
 
+7.29 KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM
+-------------------------------------
+
+Architectures: x86 SEV enabled
+Type: vm
+Parameters: args[0] is the fd of the source vm
+Returns: 0 on success
+
+This capability enables userspace to migrate the encryption context from the VM
+indicated by the fd to the VM this is called on.
+
+This is intended to support intra-host migration of VMs between userspace VMMs,
+upgrading the VMM process without interrupting the guest.
+
 8. Other capabilities.
 ======================
 
diff --git a/Documentation/x86/xstate.rst b/Documentation/x86/xstate.rst
index 65de3f0..5cec7fb 100644
--- a/Documentation/x86/xstate.rst
+++ b/Documentation/x86/xstate.rst
@@ -63,3 +63,12 @@
 the handler allocates a larger xstate buffer for the task so the large
 state can be context switched. In the unlikely cases that the allocation
 fails, the kernel sends SIGSEGV.
+
+Dynamic features in signal frames
+---------------------------------
+
+Dynamcally enabled features are not written to the signal frame upon signal
+entry if the feature is in its initial configuration.  This differs from
+non-dynamic features which are always written regardless of their
+configuration.  Signal handlers can examine the XSAVE buffer's XSTATE_BV
+field to determine if a features was written.
diff --git a/MAINTAINERS b/MAINTAINERS
index ceba033..9138565 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2263,6 +2263,15 @@
 S:	Maintained
 F:	drivers/counter/microchip-tcb-capture.c
 
+ARM/MILBEAUT ARCHITECTURE
+M:	Taichi Sugaya <sugaya.taichi@socionext.com>
+M:	Takao Orito <orito.takao@socionext.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm/boot/dts/milbeaut*
+F:	arch/arm/mach-milbeaut/
+N:	milbeaut
+
 ARM/MIOA701 MACHINE SUPPORT
 M:	Robert Jarzmik <robert.jarzmik@free.fr>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -2283,6 +2292,7 @@
 F:	arch/arm/mach-mstar/
 F:	drivers/clk/mstar/
 F:	drivers/gpio/gpio-msc313.c
+F:	drivers/rtc/rtc-msc313.c
 F:	drivers/watchdog/msc313e_wdt.c
 F:	include/dt-bindings/clock/mstar-*
 F:	include/dt-bindings/gpio/msc313-gpio.h
@@ -2728,10 +2738,11 @@
 F:	drivers/memory/*emif*
 
 ARM/TEXAS INSTRUMENT KEYSTONE ARCHITECTURE
+M:	Nishanth Menon <nm@ti.com>
 M:	Santosh Shilimkar <ssantosh@kernel.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
 F:	arch/arm/boot/dts/keystone-*
 F:	arch/arm/mach-keystone/
 
@@ -3569,13 +3580,14 @@
 S:	Supported
 F:	drivers/net/ethernet/broadcom/b44.*
 
-BROADCOM B53 ETHERNET SWITCH DRIVER
+BROADCOM B53/SF2 ETHERNET SWITCH DRIVER
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
 L:	openwrt-devel@lists.openwrt.org (subscribers-only)
 S:	Supported
 F:	Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
 F:	drivers/net/dsa/b53/*
+F:	drivers/net/dsa/bcm_sf2*
 F:	include/linux/dsa/brcm.h
 F:	include/linux/platform_data/b53.h
 
@@ -3732,7 +3744,7 @@
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
 M:	Ariel Elior <aelior@marvell.com>
 M:	Sudarsana Kalluru <skalluru@marvell.com>
-M:	GR-everest-linux-l2@marvell.com
+M:	Manish Chopra <manishc@marvell.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/bnx2x/
@@ -4458,7 +4470,7 @@
 M:	Hans de Goede <hdegoede@redhat.com>
 L:	linux-input@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
+F:	Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
 F:	drivers/input/touchscreen/chipone_icn8318.c
 
 CHIPONE ICN8505 I2C TOUCHSCREEN DRIVER
@@ -4675,11 +4687,10 @@
 M:	Julia Lawall <Julia.Lawall@inria.fr>
 M:	Gilles Muller <Gilles.Muller@inria.fr>
 M:	Nicolas Palix <nicolas.palix@imag.fr>
-M:	Michal Marek <michal.lkml@markovi.net>
-L:	cocci@systeme.lip6.fr (moderated for non-subscribers)
+L:	cocci@inria.fr (moderated for non-subscribers)
 S:	Supported
-W:	http://coccinelle.lip6.fr/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
+W:	https://coccinelle.gitlabpages.inria.fr/website/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jlawall/linux.git
 F:	Documentation/dev-tools/coccinelle.rst
 F:	scripts/coccicheck
 F:	scripts/coccinelle/
@@ -5204,6 +5215,13 @@
 S:	Maintained
 F:	drivers/input/touchscreen/cy8ctma140.c
 
+CYPRESS STREETFIGHTER TOUCHKEYS DRIVER
+M:	Yassine Oudjana <y.oudjana@protonmail.com>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/input/cypress-sf.yaml
+F:	drivers/input/keyboard/cypress-sf.c
+
 CYTTSP TOUCHSCREEN DRIVER
 M:	Linus Walleij <linus.walleij@linaro.org>
 L:	linux-input@vger.kernel.org
@@ -8062,9 +8080,10 @@
 
 GOODIX TOUCHSCREEN
 M:	Bastien Nocera <hadess@hadess.net>
+M:	Hans de Goede <hdegoede@redhat.com>
 L:	linux-input@vger.kernel.org
 S:	Maintained
-F:	drivers/input/touchscreen/goodix.c
+F:	drivers/input/touchscreen/goodix*
 
 GOOGLE ETHERNET DRIVERS
 M:	Jeroen de Borst <jeroendb@google.com>
@@ -10437,7 +10456,7 @@
 F:	arch/riscv/kvm/
 
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
-M:	Christian Borntraeger <borntraeger@de.ibm.com>
+M:	Christian Borntraeger <borntraeger@linux.ibm.com>
 M:	Janosch Frank <frankja@linux.ibm.com>
 R:	David Hildenbrand <david@redhat.com>
 R:	Claudio Imbrenda <imbrenda@linux.ibm.com>
@@ -10799,11 +10818,6 @@
 F:	include/linux/ata.h
 F:	include/linux/libata.h
 
-LIBLOCKDEP
-M:	Sasha Levin <alexander.levin@microsoft.com>
-S:	Maintained
-F:	tools/lib/lockdep/
-
 LIBNVDIMM BLK: MMIO-APERTURE DRIVER
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
@@ -14409,7 +14423,9 @@
 M:	Deep Shah <sdeep@vmware.com>
 M:	"VMware, Inc." <pv-drivers@vmware.com>
 L:	virtualization@lists.linux-foundation.org
+L:	x86@kernel.org
 S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
 F:	Documentation/virt/paravirt_ops.rst
 F:	arch/*/include/asm/paravirt*.h
 F:	arch/*/kernel/paravirt*
@@ -15588,7 +15604,7 @@
 
 QLOGIC QL4xxx ETHERNET DRIVER
 M:	Ariel Elior <aelior@marvell.com>
-M:	GR-everest-linux-l2@marvell.com
+M:	Manish Chopra <manishc@marvell.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/qlogic/qed/
@@ -15963,6 +15979,7 @@
 
 RANDOM NUMBER DRIVER
 M:	"Theodore Ts'o" <tytso@mit.edu>
+M:	Jason A. Donenfeld <Jason@zx2c4.com>
 S:	Maintained
 F:	drivers/char/random.c
 
@@ -16568,7 +16585,7 @@
 S390
 M:	Heiko Carstens <hca@linux.ibm.com>
 M:	Vasily Gorbik <gor@linux.ibm.com>
-M:	Christian Borntraeger <borntraeger@de.ibm.com>
+M:	Christian Borntraeger <borntraeger@linux.ibm.com>
 R:	Alexander Gordeev <agordeev@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 S:	Supported
@@ -18478,6 +18495,7 @@
 F:	include/uapi/linux/tc_act/
 F:	include/uapi/linux/tc_ematch/
 F:	net/sched/
+F:	tools/testing/selftests/tc-testing
 
 TC90522 MEDIA DRIVER
 M:	Akihiro Tsukada <tskd08@gmail.com>
@@ -19026,11 +19044,12 @@
 F:	include/linux/tifm.h
 
 TI KEYSTONE MULTICORE NAVIGATOR DRIVERS
+M:	Nishanth Menon <nm@ti.com>
 M:	Santosh Shilimkar <ssantosh@kernel.org>
 L:	linux-kernel@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
 F:	drivers/soc/ti/*
 
 TI LM49xxx FAMILY ASoC CODEC DRIVERS
@@ -20312,7 +20331,8 @@
 F:	arch/x86/kernel/cpu/vmware.c
 
 VMWARE PVRDMA DRIVER
-M:	Adit Ranadive <aditr@vmware.com>
+M:	Bryan Tan <bryantan@vmware.com>
+M:	Vishnu Dasa <vdasa@vmware.com>
 M:	VMware PV-Drivers <pv-drivers@vmware.com>
 L:	linux-rdma@vger.kernel.org
 S:	Maintained
@@ -21075,6 +21095,18 @@
 F:	include/linux/zsmalloc.h
 F:	mm/zsmalloc.c
 
+ZSTD
+M:	Nick Terrell <terrelln@fb.com>
+S:	Maintained
+B:	https://github.com/facebook/zstd/issues
+T:	git git://github.com/terrelln/linux.git
+F:	include/linux/zstd*
+F:	lib/zstd/
+F:	lib/decompress_unzstd.c
+F:	crypto/zstd.c
+N:	zstd
+K:	zstd
+
 ZSWAP COMPRESSED SWAP CACHING
 M:	Seth Jennings <sjenning@redhat.com>
 M:	Dan Streetman <ddstreet@ieee.org>
diff --git a/Makefile b/Makefile
index d83d72c..0a6ecc8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 15
+PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION =
-NAME = Trick or Treat
+EXTRAVERSION = -rc3
+NAME = Gobble Gobble
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
@@ -789,7 +789,7 @@
 KBUILD_CFLAGS += $(stackp-flags-y)
 
 KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
-KBUILD_CFLAGS += $(KBUILD_CFLAGS-y)
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
 
 ifdef CONFIG_CC_IS_CLANG
 KBUILD_CPPFLAGS += -Qunused-arguments
@@ -801,10 +801,6 @@
 KBUILD_CFLAGS += -mno-global-merge
 else
 
-# Warn about unmarked fall-throughs in switch statement.
-# Disabled for clang while comment to attribute conversion happens and
-# https://github.com/ClangBuiltLinux/linux/issues/636 is discussed.
-KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=5,)
 # gcc inanely warns about local variables called 'main'
 KBUILD_CFLAGS += -Wno-main
 endif
diff --git a/arch/Kconfig b/arch/Kconfig
index 26b8ed1..d3c4ab2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -991,6 +991,16 @@
 	  and vice-versa 32-bit applications to call 64-bit mmap().
 	  Required for applications doing different bitness syscalls.
 
+config PAGE_SIZE_LESS_THAN_64KB
+	def_bool y
+	depends on !ARM64_64K_PAGES
+	depends on !IA64_PAGE_SIZE_64KB
+	depends on !PAGE_SIZE_64KB
+	depends on !PARISC_PAGE_SIZE_64KB
+	depends on !PPC_64K_PAGES
+	depends on !PPC_256K_PAGES
+	depends on !PAGE_SIZE_256KB
+
 # This allows to use a set of generic functions to determine mmap base
 # address by giving priority to top-down scheme only if the process
 # is not in legacy mode (compat task, unlimited stack size or
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index e4a041c..ca5a322 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -488,3 +488,4 @@
 556	common	landlock_restrict_self		sys_landlock_restrict_self
 # 557 reserved for memfd_secret
 558	common	process_mrelease		sys_process_mrelease
+559	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index e8c2c74..e201b4b 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -36,7 +36,6 @@
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
 void dma_cache_inv(phys_addr_t start, unsigned long sz);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f0f9e8b..c2724d9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1463,6 +1463,7 @@
 	bool "High Memory Support"
 	depends on MMU
 	select KMAP_LOCAL
+	select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
 	help
 	  The address space of ARM processors is only 4 Gigabytes large
 	  and it has to accommodate user address space, kernel address
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index a522716..77172d5 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -60,15 +60,15 @@
 # Note that GCC does not numerically define an architecture version
 # macro, but instead defines a whole series of macros which makes
 # testing for a specific architecture or later rather impossible.
-arch-$(CONFIG_CPU_32v7M)	=-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m
-arch-$(CONFIG_CPU_32v7)		=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
-arch-$(CONFIG_CPU_32v6)		=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
+arch-$(CONFIG_CPU_32v7M)	=-D__LINUX_ARM_ARCH__=7 -march=armv7-m
+arch-$(CONFIG_CPU_32v7)		=-D__LINUX_ARM_ARCH__=7 -march=armv7-a
+arch-$(CONFIG_CPU_32v6)		=-D__LINUX_ARM_ARCH__=6 -march=armv6
 # Only override the compiler option if ARMv6. The ARMv6K extensions are
 # always available in ARMv7
 ifeq ($(CONFIG_CPU_32v6),y)
-arch-$(CONFIG_CPU_32v6K)	=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
+arch-$(CONFIG_CPU_32v6K)	=-D__LINUX_ARM_ARCH__=6 -march=armv6k
 endif
-arch-$(CONFIG_CPU_32v5)		=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
+arch-$(CONFIG_CPU_32v5)		=-D__LINUX_ARM_ARCH__=5 -march=armv5te
 arch-$(CONFIG_CPU_32v4T)	=-D__LINUX_ARM_ARCH__=4 -march=armv4t
 arch-$(CONFIG_CPU_32v4)		=-D__LINUX_ARM_ARCH__=4 -march=armv4
 arch-$(CONFIG_CPU_32v3)		=-D__LINUX_ARM_ARCH__=3 -march=armv3m
@@ -82,7 +82,7 @@
 tune-$(CONFIG_CPU_ARM740T)	=-mtune=arm7tdmi
 tune-$(CONFIG_CPU_ARM9TDMI)	=-mtune=arm9tdmi
 tune-$(CONFIG_CPU_ARM940T)	=-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM946E)	=$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
+tune-$(CONFIG_CPU_ARM946E)	=-mtune=arm9e
 tune-$(CONFIG_CPU_ARM920T)	=-mtune=arm9tdmi
 tune-$(CONFIG_CPU_ARM922T)	=-mtune=arm9tdmi
 tune-$(CONFIG_CPU_ARM925T)	=-mtune=arm9tdmi
@@ -90,11 +90,11 @@
 tune-$(CONFIG_CPU_FA526)	=-mtune=arm9tdmi
 tune-$(CONFIG_CPU_SA110)	=-mtune=strongarm110
 tune-$(CONFIG_CPU_SA1100)	=-mtune=strongarm1100
-tune-$(CONFIG_CPU_XSCALE)	=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_XSC3)		=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_FEROCEON)	=$(call cc-option,-mtune=marvell-f,-mtune=xscale)
-tune-$(CONFIG_CPU_V6)		=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
-tune-$(CONFIG_CPU_V6K)		=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
+tune-$(CONFIG_CPU_XSCALE)	=-mtune=xscale
+tune-$(CONFIG_CPU_XSC3)		=-mtune=xscale
+tune-$(CONFIG_CPU_FEROCEON)	=-mtune=xscale
+tune-$(CONFIG_CPU_V6)		=-mtune=arm1136j-s
+tune-$(CONFIG_CPU_V6K)		=-mtune=arm1136j-s
 
 # Evaluate tune cc-option calls now
 tune-y := $(tune-y)
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
index 3b60297..9e01dbc 100644
--- a/arch/arm/boot/dts/bcm2711.dtsi
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -506,11 +506,17 @@
 			#address-cells = <3>;
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
-			interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
+			interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
 				     <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
 			interrupt-names = "pcie", "msi";
 			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
 			interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143
+							IRQ_TYPE_LEVEL_HIGH>,
+					<0 0 0 2 &gicv2 GIC_SPI 144
+							IRQ_TYPE_LEVEL_HIGH>,
+					<0 0 0 3 &gicv2 GIC_SPI 145
+							IRQ_TYPE_LEVEL_HIGH>,
+					<0 0 0 4 &gicv2 GIC_SPI 146
 							IRQ_TYPE_LEVEL_HIGH>;
 			msi-controller;
 			msi-parent = <&pcie0>;
diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index d4f3550..f69d2af 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -242,6 +242,8 @@
 
 			gpio-controller;
 			#gpio-cells = <2>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
 		};
 
 		pcie0: pcie@12000 {
@@ -408,7 +410,7 @@
 	i2c0: i2c@18009000 {
 		compatible = "brcm,iproc-i2c";
 		reg = <0x18009000 0x50>;
-		interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 		clock-frequency = <100000>;
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e68fb87..5e56288 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -290,7 +290,6 @@
  */
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *folio);
 
 #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index fc2608b..18f0119 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h
@@ -33,7 +33,7 @@
 u32 socfpga_sdram_self_refresh(u32 sdr_base);
 extern unsigned int socfpga_sdram_self_refresh_sz;
 
-extern char secondary_trampoline, secondary_trampoline_end;
+extern char secondary_trampoline[], secondary_trampoline_end[];
 
 extern unsigned long socfpga_cpu1start_addr;
 
diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c
index fbb80b88..201191c 100644
--- a/arch/arm/mach-socfpga/platsmp.c
+++ b/arch/arm/mach-socfpga/platsmp.c
@@ -20,14 +20,14 @@
 
 static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-	int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
+	int trampoline_size = secondary_trampoline_end - secondary_trampoline;
 
 	if (socfpga_cpu1start_addr) {
 		/* This will put CPU #1 into reset. */
 		writel(RSTMGR_MPUMODRST_CPU1,
 		       rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST);
 
-		memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+		memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
 
 		writel(__pa_symbol(secondary_startup),
 		       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff));
@@ -45,12 +45,12 @@
 
 static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-	int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
+	int trampoline_size = secondary_trampoline_end - secondary_trampoline;
 
 	if (socfpga_cpu1start_addr) {
 		writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr +
 		       SOCFPGA_A10_RSTMGR_MODMPURST);
-		memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+		memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
 
 		writel(__pa_symbol(secondary_startup),
 		       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff));
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a4e0060..274e4f7 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -390,9 +390,9 @@
 	BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START);
 	BUG_ON(idx >= __end_of_fixed_addresses);
 
-	/* we only support device mappings until pgprot_kernel has been set */
+	/* We support only device mappings before pgprot_kernel is set. */
 	if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
-		    pgprot_val(pgprot_kernel) == 0))
+		    pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0))
 		return;
 
 	if (pgprot_val(prot))
diff --git a/arch/arm64/boot/dts/exynos/exynosautov9.dtsi b/arch/arm64/boot/dts/exynos/exynosautov9.dtsi
index 3e47273..a960c0b 100644
--- a/arch/arm64/boot/dts/exynos/exynosautov9.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynosautov9.dtsi
@@ -296,8 +296,7 @@
 			pinctrl-0 = <&ufs_rst_n &ufs_refclk_out>;
 			phys = <&ufs_0_phy>;
 			phy-names = "ufs-phy";
-			samsung,sysreg = <&syscon_fsys2>;
-			samsung,ufs-shareability-reg-offset = <0x710>;
+			samsung,sysreg = <&syscon_fsys2 0x710>;
 			status = "disabled";
 		};
 	};
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index a305ce2..d52a0b2 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -68,6 +68,7 @@
 #define ESR_ELx_EC_MAX		(0x3F)
 
 #define ESR_ELx_EC_SHIFT	(26)
+#define ESR_ELx_EC_WIDTH	(6)
 #define ESR_ELx_EC_MASK		(UL(0x3F) << ESR_ELx_EC_SHIFT)
 #define ESR_ELx_EC(esr)		(((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 347b0cc68f..1494cfa 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -12,6 +12,17 @@
 
 #define HAVE_FUNCTION_GRAPH_FP_TEST
 
+/*
+ * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
+ * "return address pointer" which can be used to uniquely identify a return
+ * address which has been overwritten.
+ *
+ * On arm64 we use the address of the caller's frame record, which remains the
+ * same for the lifetime of the instrumented function, unlike the return
+ * address in the LR.
+ */
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #else
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a39fcf3..01d47c5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -91,7 +91,7 @@
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
-#define TCR_EL2_RES1		((1 << 31) | (1 << 23))
+#define TCR_EL2_RES1		((1U << 31) | (1 << 23))
 #define TCR_EL2_TBI		(1 << 20)
 #define TCR_EL2_PS_SHIFT	16
 #define TCR_EL2_PS_MASK		(7 << TCR_EL2_PS_SHIFT)
@@ -276,7 +276,7 @@
 #define CPTR_EL2_TFP_SHIFT 10
 
 /* Hyp Coprocessor Trap Register */
-#define CPTR_EL2_TCPAC	(1 << 31)
+#define CPTR_EL2_TCPAC	(1U << 31)
 #define CPTR_EL2_TAM	(1 << 30)
 #define CPTR_EL2_TTA	(1 << 20)
 #define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4be8486..2a5f7f3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -584,7 +584,7 @@
 	u64 exits;
 };
 
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
+void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 8433a20..2372244 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -76,7 +76,7 @@
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
 {
-	VM_BUG_ON(mm != &init_mm);
+	VM_BUG_ON(mm && mm != &init_mm);
 	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
 }
 
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index a4e046e..6564a01 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -47,9 +47,6 @@
  * @prev_type:   The type of stack this frame record was on, or a synthetic
  *               value of STACK_TYPE_UNKNOWN. This is used to detect a
  *               transition from one stack to another.
- *
- * @graph:       When FUNCTION_GRAPH_TRACER is selected, holds the index of a
- *               replacement lr value in the ftrace graph stack.
  */
 struct stackframe {
 	unsigned long fp;
@@ -57,9 +54,6 @@
 	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
 	unsigned long prev_fp;
 	enum stack_type prev_type;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	int graph;
-#endif
 #ifdef CONFIG_KRETPROBES
 	struct llist_node *kr_cur;
 #endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 6e2e0b7..3a5ff5e 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -281,12 +281,22 @@
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 } while (0)
 
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
 #define __raw_get_user(x, ptr, err)					\
 do {									\
+	__typeof__(*(ptr)) __user *__rgu_ptr = (ptr);			\
+	__typeof__(x) __rgu_val;					\
 	__chk_user_ptr(ptr);						\
+									\
 	uaccess_ttbr0_enable();						\
-	__raw_get_mem("ldtr", x, ptr, err);				\
+	__raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err);		\
 	uaccess_ttbr0_disable();					\
+									\
+	(x) = __rgu_val;						\
 } while (0)
 
 #define __get_user_error(x, ptr, err)					\
@@ -310,14 +320,22 @@
 
 #define get_user	__get_user
 
+/*
+ * We must not call into the scheduler between __uaccess_enable_tco_async() and
+ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
 #define __get_kernel_nofault(dst, src, type, err_label)			\
 do {									\
+	__typeof__(dst) __gkn_dst = (dst);				\
+	__typeof__(src) __gkn_src = (src);				\
 	int __gkn_err = 0;						\
 									\
 	__uaccess_enable_tco_async();					\
-	__raw_get_mem("ldr", *((type *)(dst)),				\
-		      (__force type *)(src), __gkn_err);		\
+	__raw_get_mem("ldr", *((type *)(__gkn_dst)),			\
+		      (__force type *)(__gkn_src), __gkn_err);		\
 	__uaccess_disable_tco_async();					\
+									\
 	if (unlikely(__gkn_err))					\
 		goto err_label;						\
 } while (0)
@@ -351,11 +369,19 @@
 	}								\
 } while (0)
 
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
 #define __raw_put_user(x, ptr, err)					\
 do {									\
-	__chk_user_ptr(ptr);						\
+	__typeof__(*(ptr)) __user *__rpu_ptr = (ptr);			\
+	__typeof__(*(ptr)) __rpu_val = (x);				\
+	__chk_user_ptr(__rpu_ptr);					\
+									\
 	uaccess_ttbr0_enable();						\
-	__raw_put_mem("sttr", x, ptr, err);				\
+	__raw_put_mem("sttr", __rpu_val, __rpu_ptr, err);		\
 	uaccess_ttbr0_disable();					\
 } while (0)
 
@@ -380,14 +406,22 @@
 
 #define put_user	__put_user
 
+/*
+ * We must not call into the scheduler between __uaccess_enable_tco_async() and
+ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
 #define __put_kernel_nofault(dst, src, type, err_label)			\
 do {									\
+	__typeof__(dst) __pkn_dst = (dst);				\
+	__typeof__(src) __pkn_src = (src);				\
 	int __pkn_err = 0;						\
 									\
 	__uaccess_enable_tco_async();					\
-	__raw_put_mem("str", *((type *)(src)),				\
-		      (__force type *)(dst), __pkn_err);		\
+	__raw_put_mem("str", *((type *)(__pkn_src)),			\
+		      (__force type *)(__pkn_dst), __pkn_err);		\
 	__uaccess_disable_tco_async();					\
+									\
 	if (unlikely(__pkn_err))					\
 		goto err_label;						\
 } while(0)
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index fc62dfe..4506c4a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -244,8 +244,6 @@
  * on the way back to parent. For this purpose, this function is called
  * in _mcount() or ftrace_caller() to replace return address (*parent) on
  * the call stack to return_to_handler.
- *
- * Note that @frame_pointer is used only for sanity check later.
  */
 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 			   unsigned long frame_pointer)
@@ -263,8 +261,10 @@
 	 */
 	old = *parent;
 
-	if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+	if (!function_graph_enter(old, self_addr, frame_pointer,
+	    (void *)frame_pointer)) {
 		*parent = return_hooker;
+	}
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c30624f..94f83cd4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -38,9 +38,6 @@
 {
 	frame->fp = fp;
 	frame->pc = pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame->graph = 0;
-#endif
 #ifdef CONFIG_KRETPROBES
 	frame->kr_cur = NULL;
 #endif
@@ -116,20 +113,23 @@
 	frame->prev_fp = fp;
 	frame->prev_type = info.type;
 
+	frame->pc = ptrauth_strip_insn_pac(frame->pc);
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (tsk->ret_stack &&
-		(ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
-		struct ftrace_ret_stack *ret_stack;
+		(frame->pc == (unsigned long)return_to_handler)) {
+		unsigned long orig_pc;
 		/*
 		 * This is a case where function graph tracer has
 		 * modified a return address (LR) in a stack frame
 		 * to hook a function return.
 		 * So replace it to an original value.
 		 */
-		ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
-		if (WARN_ON_ONCE(!ret_stack))
+		orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc,
+						(void *)frame->fp);
+		if (WARN_ON_ONCE(frame->pc == orig_pc))
 			return -EINVAL;
-		frame->pc = ret_stack->ret;
+		frame->pc = orig_pc;
 	}
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 #ifdef CONFIG_KRETPROBES
@@ -137,8 +137,6 @@
 		frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur);
 #endif
 
-	frame->pc = ptrauth_strip_insn_pac(frame->pc);
-
 	return 0;
 }
 NOKPROBE_SYMBOL(unwind_frame);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index f5490af..e4727dc 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -223,7 +223,14 @@
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
-		r = num_online_cpus();
+		/*
+		 * ARM64 treats KVM_CAP_NR_CPUS differently from all other
+		 * architectures, as it does not always bound it to
+		 * KVM_CAP_MAX_VCPUS. It should not matter much because
+		 * this is just an advisory value.
+		 */
+		r = min_t(unsigned int, num_online_cpus(),
+			  kvm_arm_default_max_vcpus());
 		break;
 	case KVM_CAP_MAX_VCPUS:
 	case KVM_CAP_MAX_VCPU_ID:
@@ -1389,12 +1396,9 @@
 		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
 	}
 	case KVM_ARM_PREFERRED_TARGET: {
-		int err;
 		struct kvm_vcpu_init init;
 
-		err = kvm_vcpu_preferred_target(&init);
-		if (err)
-			return err;
+		kvm_vcpu_preferred_target(&init);
 
 		if (copy_to_user(argp, &init, sizeof(init)))
 			return -EFAULT;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5ce26be..e116c77 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -869,13 +869,10 @@
 	return KVM_ARM_TARGET_GENERIC_V8;
 }
 
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	u32 target = kvm_target_cpu();
 
-	if (target < 0)
-		return -ENODEV;
-
 	memset(init, 0, sizeof(*init));
 
 	/*
@@ -885,8 +882,6 @@
 	 * target type.
 	 */
 	init->target = (__u32)target;
-
-	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 9aa9b734..b6b6801 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -44,7 +44,7 @@
 el1_sync:				// Guest trapped into EL2
 
 	mrs	x0, esr_el2
-	lsr	x0, x0, #ESR_ELx_EC_SHIFT
+	ubfx	x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
 	cmp	x0, #ESR_ELx_EC_HVC64
 	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
 	b.ne	el1_trap
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 7a0af1d..96c5f3f 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -403,6 +403,8 @@
 
 static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
 
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
+
 /*
  * Allow the hypervisor to handle the exit with an exit handler if it has one.
  *
@@ -429,6 +431,18 @@
  */
 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
+	/*
+	 * Save PSTATE early so that we can evaluate the vcpu mode
+	 * early on.
+	 */
+	vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+	/*
+	 * Check whether we want to repaint the state one way or
+	 * another.
+	 */
+	early_exit_filter(vcpu, exit_code);
+
 	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
 		vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index de7e14c..7ecca8b 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -70,7 +70,12 @@
 static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
 {
 	ctxt->regs.pc			= read_sysreg_el2(SYS_ELR);
-	ctxt->regs.pstate		= read_sysreg_el2(SYS_SPSR);
+	/*
+	 * Guest PSTATE gets saved at guest fixup time in all
+	 * cases. We still need to handle the nVHE host side here.
+	 */
+	if (!has_vhe() && ctxt->__hyp_running_vcpu)
+		ctxt->regs.pstate	= read_sysreg_el2(SYS_SPSR);
 
 	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
 		ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 0c6116d..3d613e7 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -141,7 +141,7 @@
 .L__vect_start\@:
 	stp	x0, x1, [sp, #-16]!
 	mrs	x0, esr_el2
-	lsr	x0, x0, #ESR_ELx_EC_SHIFT
+	ubfx	x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
 	cmp	x0, #ESR_ELx_EC_HVC64
 	b.eq	__host_hvc
 	b	__host_exit
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 862c7b5..578f717 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -178,7 +178,7 @@
 
 	phys = kvm_pte_to_phys(pte);
 	if (!addr_is_memory(phys))
-		return 0;
+		return -EINVAL;
 
 	/*
 	 * Adjust the host stage-2 mappings to match the ownership attributes
@@ -207,8 +207,18 @@
 		.cb	= finalize_host_mappings_walker,
 		.flags	= KVM_PGTABLE_WALK_LEAF,
 	};
+	int i, ret;
 
-	return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
+	for (i = 0; i < hyp_memblock_nr; i++) {
+		struct memblock_region *reg = &hyp_memory[i];
+		u64 start = (u64)hyp_phys_to_virt(reg->base);
+
+		ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 void __noreturn __pkvm_init_finalise(void)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c0e3fed..d13115a 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -233,7 +233,7 @@
  * Returns false if the guest ran in AArch32 when it shouldn't have, and
  * thus should exit to the host, or true if a the guest run loop can continue.
  */
-static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
 	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 
@@ -248,10 +248,7 @@
 		vcpu->arch.target = -1;
 		*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
 		*exit_code |= ARM_EXCEPTION_IL;
-		return false;
 	}
-
-	return true;
 }
 
 /* Switch to the guest for legacy non-VHE systems */
@@ -316,9 +313,6 @@
 		/* Jump in the fire! */
 		exit_code = __guest_enter(vcpu);
 
-		if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
-			break;
-
 		/* And we're baaack! */
 	} while (fixup_guest_exit(vcpu, &exit_code));
 
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 3787ee6..792cf6e 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -474,7 +474,7 @@
 	return true;
 }
 
-/**
+/*
  * Handler for protected VM restricted exceptions.
  *
  * Inject an undefined exception into the guest and return true to indicate that
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 5a2cb5d..fbb26b9 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -112,6 +112,10 @@
 	return hyp_exit_handlers;
 }
 
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+}
+
 /* Switch to the guest for VHE systems running in EL2 */
 static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h
deleted file mode 100644
index ee6c614..0000000
--- a/arch/hexagon/include/asm/timer-regs.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Timer support for Hexagon
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#ifndef _ASM_TIMER_REGS_H
-#define _ASM_TIMER_REGS_H
-
-/*  This stuff should go into a platform specific file  */
-#define TCX0_CLK_RATE		19200
-#define TIMER_ENABLE		0
-#define TIMER_CLR_ON_MATCH	1
-
-/*
- * 8x50 HDD Specs 5-8.  Simulator co-sim not fixed until
- * release 1.1, and then it's "adjustable" and probably not defaulted.
- */
-#define RTOS_TIMER_INT		3
-#ifdef CONFIG_HEXAGON_COMET
-#define RTOS_TIMER_REGS_ADDR	0xAB000000UL
-#endif
-#define SLEEP_CLK_RATE		32000
-
-#endif
diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h
index 8d4ec76..dfe69e1 100644
--- a/arch/hexagon/include/asm/timex.h
+++ b/arch/hexagon/include/asm/timex.h
@@ -7,11 +7,10 @@
 #define _ASM_TIMEX_H
 
 #include <asm-generic/timex.h>
-#include <asm/timer-regs.h>
 #include <asm/hexagon_vm.h>
 
 /* Using TCX0 as our clock.  CLOCK_TICK_RATE scheduled to be removed. */
-#define CLOCK_TICK_RATE              TCX0_CLK_RATE
+#define CLOCK_TICK_RATE              19200
 
 #define ARCH_HAS_READ_CURRENT_TIMER
 
diff --git a/arch/hexagon/kernel/.gitignore b/arch/hexagon/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/hexagon/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c
index feffe52..febc957 100644
--- a/arch/hexagon/kernel/time.c
+++ b/arch/hexagon/kernel/time.c
@@ -17,9 +17,10 @@
 #include <linux/of_irq.h>
 #include <linux/module.h>
 
-#include <asm/timer-regs.h>
 #include <asm/hexagon_vm.h>
 
+#define TIMER_ENABLE		BIT(0)
+
 /*
  * For the clocksource we need:
  *	pcycle frequency (600MHz)
@@ -33,6 +34,13 @@
 cycles_t	thread_freq_mhz;
 cycles_t	sleep_clk_freq;
 
+/*
+ * 8x50 HDD Specs 5-8.  Simulator co-sim not fixed until
+ * release 1.1, and then it's "adjustable" and probably not defaulted.
+ */
+#define RTOS_TIMER_INT		3
+#define RTOS_TIMER_REGS_ADDR	0xAB000000UL
+
 static struct resource rtos_timer_resources[] = {
 	{
 		.start	= RTOS_TIMER_REGS_ADDR,
@@ -80,7 +88,7 @@
 	iowrite32(0, &rtos_timer->clear);
 
 	iowrite32(delta, &rtos_timer->match);
-	iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable);
+	iowrite32(TIMER_ENABLE, &rtos_timer->enable);
 	return 0;
 }
 
diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c
index d35d69d..55f7539 100644
--- a/arch/hexagon/lib/io.c
+++ b/arch/hexagon/lib/io.c
@@ -27,6 +27,7 @@
 		*dst++ = *src;
 
 }
+EXPORT_SYMBOL(__raw_readsw);
 
 /*
  * __raw_writesw - read words a short at a time
@@ -47,6 +48,7 @@
 
 
 }
+EXPORT_SYMBOL(__raw_writesw);
 
 /*  Pretty sure len is pre-adjusted for the length of the access already */
 void __raw_readsl(const void __iomem *addr, void *data, int len)
@@ -62,6 +64,7 @@
 
 
 }
+EXPORT_SYMBOL(__raw_readsl);
 
 void __raw_writesl(void __iomem *addr, const void *data, int len)
 {
@@ -76,3 +79,4 @@
 
 
 }
+EXPORT_SYMBOL(__raw_writesl);
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 6fea184..707ae12 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -369,3 +369,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 8ab4662..1ac55e7 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -250,7 +250,6 @@
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 #define flush_dcache_page(page)		__flush_page_to_ram(page_address(page))
-void flush_dcache_folio(struct folio *folio);
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 #define flush_icache_page(vma, page)	__flush_page_to_ram(page_address(page))
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 7976dff..45bc32a 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -448,3 +448,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 99058a6..34d6458 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1145,7 +1145,7 @@
  */
 asmlinkage void fpsp040_die(void)
 {
-	force_fatal_sig(SIGSEGV);
+	force_exit_sig(SIGSEGV);
 }
 
 #ifdef CONFIG_M68KFPU_EMU
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 6b0e113..2204bde 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -454,3 +454,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 2c57994..30193bc 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -37,4 +37,4 @@
 platform-$(CONFIG_MACH_VR41XX)		+= vr41xx/
 
 # include the platform specific files
-include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y))
+include $(patsubst %/, $(srctree)/arch/mips/%/Platform, $(platform-y))
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8651074..0215dc1 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -292,6 +292,8 @@
 	select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
 	select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
 	select HARDIRQS_SW_RESEND
+	select HAVE_PCI
+	select PCI_DRIVERS_GENERIC
 	help
 	  Build a generic DT-based kernel image that boots on select
 	  BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
@@ -333,6 +335,9 @@
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_HAS_EARLY_PRINTK
+	select SYS_HAS_CPU_BMIPS32_3300
+	select SYS_HAS_CPU_BMIPS4350
+	select SYS_HAS_CPU_BMIPS4380
 	select SWAP_IO_SPACE
 	select GPIOLIB
 	select MIPS_L1_CACHE_SHIFT_4
@@ -3092,7 +3097,7 @@
 config PGTABLE_LEVELS
 	int
 	default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
-	default 3 if 64BIT && !PAGE_SIZE_64KB
+	default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48)
 	default 2
 
 config MIPS_AUTO_PFN_OFFSET
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index e036fc0..ace7f03 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -253,7 +253,9 @@
 #
 # Board-dependent options and extra files
 #
+ifdef need-compiler
 include $(srctree)/arch/mips/Kbuild.platforms
+endif
 
 ifdef CONFIG_PHYSICAL_START
 load-y					= $(CONFIG_PHYSICAL_START)
diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c
index 5a3e325..1c91064 100644
--- a/arch/mips/bcm63xx/clk.c
+++ b/arch/mips/bcm63xx/clk.c
@@ -381,6 +381,12 @@
 
 EXPORT_SYMBOL(clk_disable);
 
+struct clk *clk_get_parent(struct clk *clk)
+{
+	return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
 unsigned long clk_get_rate(struct clk *clk)
 {
 	if (!clk)
diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore
deleted file mode 100644
index d358395..0000000
--- a/arch/mips/boot/compressed/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ashldi3.c
-bswapsi.c
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 3548b3b..f27cf31 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -50,19 +50,9 @@
 vmlinuzobjs-$(CONFIG_ATH79)			   += $(obj)/uart-ath79.o
 endif
 
-extra-y += uart-ath79.c
-$(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c
-	$(call cmd,shipped)
-
 vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
 
-extra-y += ashldi3.c
-$(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE
-	$(call if_changed,shipped)
-
-extra-y += bswapsi.c
-$(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE
-	$(call if_changed,shipped)
+vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o $(obj)/ashldi3.o
 
 targets := $(notdir $(vmlinuzobjs-y))
 
diff --git a/arch/mips/boot/compressed/ashldi3.c b/arch/mips/boot/compressed/ashldi3.c
new file mode 100644
index 0000000..f7bf6a7
--- /dev/null
+++ b/arch/mips/boot/compressed/ashldi3.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../../../lib/ashldi3.c"
diff --git a/arch/mips/boot/compressed/bswapdi.c b/arch/mips/boot/compressed/bswapdi.c
new file mode 100644
index 0000000..acb28ae
--- /dev/null
+++ b/arch/mips/boot/compressed/bswapdi.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/bswapdi.c"
diff --git a/arch/mips/boot/compressed/bswapsi.c b/arch/mips/boot/compressed/bswapsi.c
new file mode 100644
index 0000000..fdb9c64
--- /dev/null
+++ b/arch/mips/boot/compressed/bswapsi.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/bswapsi.c"
diff --git a/arch/mips/boot/compressed/uart-ath79.c b/arch/mips/boot/compressed/uart-ath79.c
new file mode 100644
index 0000000..d686820
--- /dev/null
+++ b/arch/mips/boot/compressed/uart-ath79.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../ath79/early_printk.c"
diff --git a/arch/mips/boot/dts/ingenic/jz4725b.dtsi b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
index a1f0b71..0c6a5a4 100644
--- a/arch/mips/boot/dts/ingenic/jz4725b.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4725b.dtsi
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4725b-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index c1afdfd..772542e 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4740-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4740-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi
index 05c00b9..dfe7432 100644
--- a/arch/mips/boot/dts/ingenic/jz4770.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4770-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4770-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index 28adc3d9..b0a4e2e 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 #include <dt-bindings/dma/jz4780-dma.h>
 
diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi
index dec7909..8bd27ede 100644
--- a/arch/mips/boot/dts/ingenic/x1000.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1000.dtsi
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <dt-bindings/clock/ingenic,tcu.h>
-#include <dt-bindings/clock/x1000-cgu.h>
+#include <dt-bindings/clock/ingenic,x1000-cgu.h>
 #include <dt-bindings/dma/x1000-dma.h>
 
 / {
diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi
index 215257f..2595df8 100644
--- a/arch/mips/boot/dts/ingenic/x1830.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1830.dtsi
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <dt-bindings/clock/ingenic,tcu.h>
-#include <dt-bindings/clock/x1830-cgu.h>
+#include <dt-bindings/clock/ingenic,x1830-cgu.h>
 #include <dt-bindings/dma/x1830-dma.h>
 
 / {
diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig
index 625bd2d..5956fb9 100644
--- a/arch/mips/configs/bmips_stb_defconfig
+++ b/arch/mips/configs/bmips_stb_defconfig
@@ -1,6 +1,7 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_NO_HZ=y
+CONFIG_HZ=1000
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 # CONFIG_VM_EVENT_COUNTERS is not set
@@ -8,17 +9,34 @@
 CONFIG_BMIPS_GENERIC=y
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_HIGHMEM=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=4
+CONFIG_CC_STACKPROTECTOR_STRONG=y
 # CONFIG_SECCOMP is not set
 CONFIG_MIPS_O32_FP64_SUPPORT=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_RD_XZ=y
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_PCIEASPM_POWERSAVE=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCIE_BRCMSTB=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_BMIPS_CPUFREQ=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_NET=y
@@ -32,32 +50,99 @@
 # CONFIG_INET_DIAG is not set
 CONFIG_CFG80211=y
 CONFIG_NL80211_TESTMODE=y
+CONFIG_WIRELESS=y
 CONFIG_MAC80211=y
+CONFIG_NL80211=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BRCMSTB_GISB_ARB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_UDP_DIAG=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+# CONFIG_IPV6 is not set
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_NET_DSA=y
+CONFIG_NET_SWITCHDEV=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_ALIGNMENT=12
+CONFIG_SPI=y
+CONFIG_SPI_BRCMSTB=y
 CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_ABSENT=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_BRCMNAND=y
+CONFIG_MTD_SPI_NOR=y
+# CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
 # CONFIG_BLK_DEV is not set
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
+CONFIG_VLAN_8021Q=y
+CONFIG_MACVLAN=y
 CONFIG_BCMGENET=y
 CONFIG_USB_USBNET=y
-# CONFIG_INPUT is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
 # CONFIG_SERIO is not set
-# CONFIG_VT is not set
+CONFIG_VT=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_BRCMSTB=y
 CONFIG_POWER_RESET_SYSCON=y
 CONFIG_POWER_SUPPLY=y
 # CONFIG_HWMON is not set
@@ -69,22 +154,76 @@
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
 CONFIG_SOC_BRCMSTB=y
+CONFIG_MMC=y
+CONFIG_MMC_BLOCK_MINORS=16
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
 # CONFIG_DNOTIFY is not set
-CONFIG_FUSE_FS=y
-CONFIG_VFAT_FS=y
 CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
 CONFIG_CIFS=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_FUSE_FS=y
+CONFIG_FHANDLE=y
+CONFIG_CGROUPS=y
+CONFIG_CUSE=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
 CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_CRYPTO_HW is not set
 CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_INFO_REDUCED is not set
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEBUG_USER=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon"
+# CONFIG_MIPS_CMDLINE_FROM_DTB is not set
+CONFIG_MIPS_CMDLINE_DTB_EXTEND=y
+# CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER is not set
+# CONFIG_CRYPTO_HW is not set
+CONFIG_DT_BCM974XX=y
+CONFIG_FW_CFE=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_BRCMSTB=y
+CONFIG_GENERIC_PHY=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_PHY_BRCM_USB=y
+CONFIG_PHY_BRCM_SATA=y
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_DEBUG=y
+CONFIG_SYSVIPC=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index eaad0ed..a8a30bb 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c
@@ -117,21 +117,21 @@
 {
 	switch (mips_machtype) {
 	case MACH_DS23100:	/* DS2100/DS3100 Pmin/Pmax */
-		board_be_handler = dec_kn01_be_handler;
+		mips_set_be_handler(dec_kn01_be_handler);
 		busirq_handler = dec_kn01_be_interrupt;
 		busirq_flags |= IRQF_SHARED;
 		dec_kn01_be_init();
 		break;
 	case MACH_DS5000_1XX:	/* DS5000/1xx 3min */
 	case MACH_DS5000_XX:	/* DS5000/xx Maxine */
-		board_be_handler = dec_kn02xa_be_handler;
+		mips_set_be_handler(dec_kn02xa_be_handler);
 		busirq_handler = dec_kn02xa_be_interrupt;
 		dec_kn02xa_be_init();
 		break;
 	case MACH_DS5000_200:	/* DS5000/200 3max */
 	case MACH_DS5000_2X0:	/* DS5000/240 3max+ */
 	case MACH_DS5900:	/* DS5900 bigmax */
-		board_be_handler = dec_ecc_be_handler;
+		mips_set_be_handler(dec_ecc_be_handler);
 		busirq_handler = dec_ecc_be_interrupt;
 		dec_ecc_be_init();
 		break;
diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c
index a3aa22c..a07a5ed 100644
--- a/arch/mips/generic/yamon-dt.c
+++ b/arch/mips/generic/yamon-dt.c
@@ -75,7 +75,7 @@
 __init int yamon_dt_append_memory(void *fdt,
 				  const struct yamon_mem_region *regions)
 {
-	unsigned long phys_memsize, memsize;
+	unsigned long phys_memsize = 0, memsize;
 	__be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES];
 	unsigned int mem_entries;
 	int i, err, mem_off;
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index f207388..b3dc9c5 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -61,8 +61,6 @@
 		SetPageDcacheDirty(page);
 }
 
-void flush_dcache_folio(struct folio *folio);
-
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h
index b710e76..15cde63 100644
--- a/arch/mips/include/asm/traps.h
+++ b/arch/mips/include/asm/traps.h
@@ -15,7 +15,7 @@
 #define MIPS_BE_FATAL	2		/* treat as an unrecoverable error */
 
 extern void (*board_be_init)(void);
-extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+void mips_set_be_handler(int (*handler)(struct pt_regs *reg, int is_fixup));
 
 extern void (*board_nmi_handler_setup)(void);
 extern void (*board_ejtag_handler_setup)(void);
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index ac0e2cfc..24a529c 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1734,8 +1734,6 @@
 
 static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 {
-	decode_configs(c);
-
 	/* All Loongson processors covered here define ExcCode 16 as GSExc. */
 	c->options |= MIPS_CPU_GSEXCEX;
 
@@ -1796,6 +1794,8 @@
 		panic("Unknown Loongson Processor ID!");
 		break;
 	}
+
+	decode_configs(c);
 }
 #else
 static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { }
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 376a6e2..9f47a88 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -185,7 +185,7 @@
 		seq_puts(m, " tx39_cache");
 	if (cpu_has_octeon_cache)
 		seq_puts(m, " octeon_cache");
-	if (cpu_has_fpu)
+	if (raw_cpu_has_fpu)
 		seq_puts(m, " fpu");
 	if (cpu_has_32fpr)
 		seq_puts(m, " 32fpr");
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 70e32de..72d02d36 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -387,3 +387,4 @@
 446	n32	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	n32	process_mrelease		sys_process_mrelease
+449	n32	futex_waitv			sys_futex_waitv
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 1ca7bc3..e2c481f 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -363,3 +363,4 @@
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	n64	process_mrelease		sys_process_mrelease
+449	n64	futex_waitv			sys_futex_waitv
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index a61c35e..3714c97 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -436,3 +436,4 @@
 446	o32	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	o32	process_mrelease		sys_process_mrelease
+449	o32	futex_waitv			sys_futex_waitv
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 6f07362..d26b0fb 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -103,13 +103,19 @@
 extern void tlb_do_page_fault_0(void);
 
 void (*board_be_init)(void);
-int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+static int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
 void (*board_nmi_handler_setup)(void);
 void (*board_ejtag_handler_setup)(void);
 void (*board_bind_eic_interrupt)(int irq, int regset);
 void (*board_ebase_setup)(void);
 void(*board_cache_error_setup)(void);
 
+void mips_set_be_handler(int (*handler)(struct pt_regs *regs, int is_fixup))
+{
+	board_be_handler = handler;
+}
+EXPORT_SYMBOL_GPL(mips_set_be_handler);
+
 static void show_raw_backtrace(unsigned long reg29, const char *loglvl,
 			       bool user)
 {
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 562aa87..aa20d07 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1067,7 +1067,7 @@
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
-		r = num_online_cpus();
+		r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
index dd819e3..4916ccc 100644
--- a/arch/mips/lantiq/clk.c
+++ b/arch/mips/lantiq/clk.c
@@ -158,6 +158,12 @@
 }
 EXPORT_SYMBOL(clk_deactivate);
 
+struct clk *clk_get_parent(struct clk *clk)
+{
+	return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
 static inline u32 get_counter_resolution(void)
 {
 	u32 res;
diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c
index dc0110a..afe8a61 100644
--- a/arch/mips/sgi-ip22/ip22-berr.c
+++ b/arch/mips/sgi-ip22/ip22-berr.c
@@ -112,5 +112,5 @@
 
 void __init ip22_be_init(void)
 {
-	board_be_handler = ip22_be_handler;
+	mips_set_be_handler(ip22_be_handler);
 }
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index c61362d..16ca470 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -468,7 +468,7 @@
 
 void __init ip22_be_init(void)
 {
-	board_be_handler = ip28_be_handler;
+	mips_set_be_handler(ip28_be_handler);
 }
 
 int ip28_show_be_info(struct seq_file *m)
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 5a38ae6..923a63a 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -85,7 +85,7 @@
 	int cpu = LOCAL_HUB_L(PI_CPU_NUM);
 	int cpuoff = cpu << 8;
 
-	board_be_handler = ip27_be_handler;
+	mips_set_be_handler(ip27_be_handler);
 
 	LOCAL_HUB_S(PI_ERR_INT_PEND,
 		    cpu ? PI_ERR_CLEAR_ALL_B : PI_ERR_CLEAR_ALL_A);
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index c860f95a..478b63b 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -34,5 +34,5 @@
 
 void __init ip32_be_init(void)
 {
-	board_be_handler = ip32_be_handler;
+	mips_set_be_handler(ip32_be_handler);
 }
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index f07b15d..72a31ee 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -122,7 +122,7 @@
 #error invalid SiByte board configuration
 #endif
 
-	board_be_handler = swarm_be_handler;
+	mips_set_be_handler(swarm_be_handler);
 
 	if (xicor_probe())
 		swarm_rtc_type = RTC_XICOR;
diff --git a/arch/mips/txx9/generic/setup_tx4927.c b/arch/mips/txx9/generic/setup_tx4927.c
index 46e9c41..63f9725 100644
--- a/arch/mips/txx9/generic/setup_tx4927.c
+++ b/arch/mips/txx9/generic/setup_tx4927.c
@@ -80,7 +80,7 @@
 }
 static void __init tx4927_be_init(void)
 {
-	board_be_handler = tx4927_be_handler;
+	mips_set_be_handler(tx4927_be_handler);
 }
 
 static struct resource tx4927_sdram_resource[4];
diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c
index 17395d5..ba64654 100644
--- a/arch/mips/txx9/generic/setup_tx4938.c
+++ b/arch/mips/txx9/generic/setup_tx4938.c
@@ -82,7 +82,7 @@
 }
 static void __init tx4938_be_init(void)
 {
-	board_be_handler = tx4938_be_handler;
+	mips_set_be_handler(tx4938_be_handler);
 }
 
 static struct resource tx4938_sdram_resource[4];
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index bf8a3cd..f5f59b7 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -86,7 +86,7 @@
 }
 static void __init tx4939_be_init(void)
 {
-	board_be_handler = tx4939_be_handler;
+	mips_set_be_handler(tx4939_be_handler);
 }
 
 static struct resource tx4939_sdram_resource[4];
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 1b2ea34..d65f55f 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -57,7 +57,7 @@
 
 # VDSO linker flags.
 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
-	$(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \
+	$(filter -E%,$(KBUILD_CFLAGS)) -shared \
 	-G 0 --eh-frame-hdr --hash-style=sysv --build-id=sha1 -T
 
 CFLAGS_REMOVE_vdso.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 3fc0bb7..c2a222e 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -27,7 +27,6 @@
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long vaddr, void *dst, void *src, int len);
 void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 1999561..d0b71dd 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -29,7 +29,6 @@
 	unsigned long pfn);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index d6fd8fa..53061cb 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -231,6 +231,7 @@
 CONFIG_CRC_CCITT=m
 CONFIG_CRC_T10DIF=y
 CONFIG_FONTS=y
+CONFIG_PRINTK_TIME=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_MEMORY_INIT=y
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 7085df0..6d13ae2 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -3,38 +3,19 @@
  * Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
  * Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org>
  * Copyright (C) 1999 SuSE GmbH
+ * Copyright (C) 2021 Helge Deller <deller@gmx.de>
  */
 
 #ifndef _PARISC_ASSEMBLY_H
 #define _PARISC_ASSEMBLY_H
 
-#define CALLEE_FLOAT_FRAME_SIZE	80
-
 #ifdef CONFIG_64BIT
-#define LDREG	ldd
-#define STREG	std
-#define LDREGX  ldd,s
-#define LDREGM	ldd,mb
-#define STREGM	std,ma
-#define SHRREG	shrd
-#define SHLREG	shld
-#define ANDCM   andcm,*
-#define	COND(x)	* ## x
 #define RP_OFFSET	16
 #define FRAME_SIZE	128
 #define CALLEE_REG_FRAME_SIZE	144
 #define REG_SZ		8
 #define ASM_ULONG_INSN	.dword
 #else	/* CONFIG_64BIT */
-#define LDREG	ldw
-#define STREG	stw
-#define LDREGX  ldwx,s
-#define LDREGM	ldwm
-#define STREGM	stwm
-#define SHRREG	shr
-#define SHLREG	shlw
-#define ANDCM   andcm
-#define COND(x)	x
 #define RP_OFFSET	20
 #define FRAME_SIZE	64
 #define CALLEE_REG_FRAME_SIZE	128
@@ -45,6 +26,7 @@
 /* Frame alignment for 32- and 64-bit */
 #define FRAME_ALIGN     64
 
+#define CALLEE_FLOAT_FRAME_SIZE	80
 #define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)
 
 #ifdef CONFIG_PA20
@@ -68,6 +50,28 @@
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_64BIT
+#define LDREG	ldd
+#define STREG	std
+#define LDREGX  ldd,s
+#define LDREGM	ldd,mb
+#define STREGM	std,ma
+#define SHRREG	shrd
+#define SHLREG	shld
+#define ANDCM   andcm,*
+#define	COND(x)	* ## x
+#else	/* CONFIG_64BIT */
+#define LDREG	ldw
+#define STREG	stw
+#define LDREGX  ldwx,s
+#define LDREGM	ldwm
+#define STREGM	stwm
+#define SHRREG	shr
+#define SHLREG	shlw
+#define ANDCM   andcm
+#define COND(x)	x
+#endif
+
+#ifdef CONFIG_64BIT
 /* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so
  * work around that for now... */
 	.level 2.0w
@@ -143,6 +147,17 @@
 	extrd,u \r, 63-(\sa), 64-(\sa), \t
 	.endm
 
+	/* Extract unsigned for 32- and 64-bit
+	 * The extru instruction leaves the most significant 32 bits of the
+	 * target register in an undefined state on PA 2.0 systems. */
+	.macro extru_safe r, p, len, t
+#ifdef CONFIG_64BIT
+	extrd,u	\r, 32+(\p), \len, \t
+#else
+	extru	\r, \p, \len, \t
+#endif
+	.endm
+
 	/* load 32-bit 'value' into 'reg' compensating for the ldil
 	 * sign-extension when running in wide mode.
 	 * WARNING!! neither 'value' nor 'reg' can be expressions
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index da0cd4b..859b8a3 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -50,7 +50,6 @@
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 #define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
index 7efb1aa..af2a598 100644
--- a/arch/parisc/include/asm/jump_label.h
+++ b/arch/parisc/include/asm/jump_label.h
@@ -5,6 +5,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/stringify.h>
 #include <asm/assembly.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 7badd87..3e7cf88 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -76,6 +76,8 @@
 	purge_tlb_end(flags);
 }
 
+extern void __update_cache(pte_t pte);
+
 /* Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
@@ -83,11 +85,14 @@
 #define set_pte(pteptr, pteval)			\
 	do {					\
 		*(pteptr) = (pteval);		\
-		barrier();			\
+		mb();				\
 	} while(0)
 
 #define set_pte_at(mm, addr, pteptr, pteval)	\
 	do {					\
+		if (pte_present(pteval) &&	\
+		    pte_user(pteval))		\
+			__update_cache(pteval);	\
 		*(pteptr) = (pteval);		\
 		purge_tlb_entries(mm, addr);	\
 	} while (0)
@@ -303,6 +308,7 @@
 
 #define pte_none(x)     (pte_val(x) == 0)
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
+#define pte_user(x)	(pte_val(x) & _PAGE_USER)
 #define pte_clear(mm, addr, xp)  set_pte_at(mm, addr, xp, __pte(0))
 
 #define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
@@ -410,7 +416,7 @@
 
 #define PG_dcache_dirty         PG_arch_1
 
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+#define update_mmu_cache(vms,addr,ptep) __update_cache(*ptep)
 
 /* Encode and de-code a swap entry */
 
diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h
index 4b9e3d7..2b3010a 100644
--- a/arch/parisc/include/asm/rt_sigframe.h
+++ b/arch/parisc/include/asm/rt_sigframe.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_PARISC_RT_SIGFRAME_H
 #define _ASM_PARISC_RT_SIGFRAME_H
 
-#define SIGRETURN_TRAMP 3
+#define SIGRETURN_TRAMP 4
 #define SIGRESTARTBLOCK_TRAMP 5 
 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
 
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index c61827e..94150b9 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -83,9 +83,9 @@
 #define pfn_va(pfn)	__va(PFN_PHYS(pfn))
 
 void
-update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+__update_cache(pte_t pte)
 {
-	unsigned long pfn = pte_pfn(*ptep);
+	unsigned long pfn = pte_pfn(pte);
 	struct page *page;
 
 	/* We don't have pte special.  As a result, we can be called with
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 57944d6..6e9cdb26 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -366,17 +366,9 @@
 	 */
 	.macro		L2_ptep	pmd,pte,index,va,fault
 #if CONFIG_PGTABLE_LEVELS == 3
-	extru		\va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
+	extru_safe	\va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
 #else
-# if defined(CONFIG_64BIT)
-	extrd,u		\va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  #else
-  # if PAGE_SIZE > 4096
-	extru		\va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
-  # else
-	extru		\va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  # endif
-# endif
+	extru_safe	\va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
 #endif
 	dep             %r0,31,PAGE_SHIFT,\pmd  /* clear offset */
 #if CONFIG_PGTABLE_LEVELS < 3
@@ -386,7 +378,7 @@
 	bb,>=,n		\pmd,_PxD_PRESENT_BIT,\fault
 	dep		%r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
 	SHLREG		\pmd,PxD_VALUE_SHIFT,\pmd
-	extru		\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
+	extru_safe	\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
 	dep		%r0,31,PAGE_SHIFT,\pmd  /* clear offset */
 	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
 	.endm
@@ -1805,7 +1797,7 @@
 
 	/* Are we being ptraced? */
 	LDREG	TASK_TI_FLAGS(%r1),%r19
-	ldi	_TIF_SYSCALL_TRACE_MASK,%r2
+	ldi	_TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2
 	and,COND(=)	%r19,%r2,%r0
 	b,n	syscall_restore_rfi
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index bbfe23c..46b1050 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -288,21 +288,22 @@
 	   already in userspace. The first words of tramp are used to
 	   save the previous sigrestartblock trampoline that might be
 	   on the stack. We start the sigreturn trampoline at 
-	   SIGRESTARTBLOCK_TRAMP. */
+	   SIGRESTARTBLOCK_TRAMP+X. */
 	err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
-	err |= __put_user(INSN_BLE_SR2_R0, 
+	err |= __put_user(INSN_LDI_R20, 
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
-	err |= __put_user(INSN_LDI_R20,
+	err |= __put_user(INSN_BLE_SR2_R0, 
 			&frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
+	err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
 
-	start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
-	end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
+	start = (unsigned long) &frame->tramp[0];
+	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
 	flush_user_dcache_range_asm(start, end);
 	flush_user_icache_range_asm(start, end);
 
 	/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
-	 * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
+	 * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
 	 * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
 	 */
 	rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index a5bdbb56..f166250 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -36,7 +36,7 @@
         compat_int_t rf_sar;
 };
 
-#define COMPAT_SIGRETURN_TRAMP 3
+#define COMPAT_SIGRETURN_TRAMP 4
 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
 				COMPAT_SIGRESTARTBLOCK_TRAMP)
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
index 6b4ca91..023834e 100644
--- a/arch/parisc/kernel/stacktrace.c
+++ b/arch/parisc/kernel/stacktrace.c
@@ -8,6 +8,7 @@
  *
  *  TODO: Userspace stacktrace (CONFIG_USER_STACKTRACE_SUPPORT)
  */
+#include <linux/kernel.h>
 #include <linux/stacktrace.h>
 
 #include <asm/unwind.h>
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 4fb3b6a9..d2497b3 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -566,7 +566,7 @@
 	ldo	R%lws_lock_start(%r20), %r28
 
 	/* Extract eight bits from r26 and hash lock (Bits 3-11) */
-	extru  %r26, 28, 8, %r20
+	extru_safe  %r26, 28, 8, %r20
 
 	/* Find lock to use, the hash is either one of 0 to
 	   15, multiplied by 16 (keep it 16-byte aligned)
@@ -751,7 +751,7 @@
 	ldo	R%lws_lock_start(%r20), %r28
 
 	/* Extract eight bits from r26 and hash lock (Bits 3-11) */
-	extru  %r26, 28, 8, %r20
+	extru_safe  %r26, 28, 8, %r20
 
 	/* Find lock to use, the hash is either one of 0 to
 	   15, multiplied by 16 (keep it 16-byte aligned)
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index bf751e0..358c000 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -446,3 +446,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common	futex_waitv			sys_futex_waitv
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0e3640e..5fa68c2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -196,3 +196,6 @@
 # Force dependency (incbin is bad)
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg
+
+# for cleaning
+subdir- += vdso32 vdso64
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 6b1ec9e..349c4a8 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -202,11 +202,11 @@
 	mfspr	r1, SPRN_SPRG_THREAD
 	lwz	r1, TASK_CPU - THREAD(r1)
 	slwi	r1, r1, 3
-	addis	r1, r1, emergency_ctx@ha
+	addis	r1, r1, emergency_ctx-PAGE_OFFSET@ha
 #else
-	lis	r1, emergency_ctx@ha
+	lis	r1, emergency_ctx-PAGE_OFFSET@ha
 #endif
-	lwz	r1, emergency_ctx@l(r1)
+	lwz	r1, emergency_ctx-PAGE_OFFSET@l(r1)
 	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
 	EXCEPTION_PROLOG_2 0 vmap_stack_overflow
 	prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 2d596881..0d073b9 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -733,6 +733,7 @@
 #ifdef CONFIG_PIN_TLB_DATA
 	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
 	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+	li	r8, 0
 #ifdef CONFIG_PIN_TLB_IMMR
 	li	r0, 3
 #else
@@ -741,26 +742,26 @@
 	mtctr	r0
 	cmpwi	r4, 0
 	beq	4f
-	LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
 	LOAD_REG_ADDR(r9, _sinittext)
 
 2:	ori	r0, r6, MD_EVALID
+	ori	r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
 	mtspr	SPRN_MD_CTR, r5
 	mtspr	SPRN_MD_EPN, r0
 	mtspr	SPRN_MD_TWC, r7
-	mtspr	SPRN_MD_RPN, r8
+	mtspr	SPRN_MD_RPN, r12
 	addi	r5, r5, 0x100
 	addis	r6, r6, SZ_8M@h
 	addis	r8, r8, SZ_8M@h
 	cmplw	r6, r9
 	bdnzt	lt, 2b
-
-4:	LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+4:
 2:	ori	r0, r6, MD_EVALID
+	ori	r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
 	mtspr	SPRN_MD_CTR, r5
 	mtspr	SPRN_MD_EPN, r0
 	mtspr	SPRN_MD_TWC, r7
-	mtspr	SPRN_MD_RPN, r8
+	mtspr	SPRN_MD_RPN, r12
 	addi	r5, r5, 0x100
 	addis	r6, r6, SZ_8M@h
 	addis	r8, r8, SZ_8M@h
@@ -781,7 +782,7 @@
 #endif
 #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
 	lis	r0, (MD_RSV4I | MD_TWAM)@h
-	mtspr	SPRN_MI_CTR, r0
+	mtspr	SPRN_MD_CTR, r0
 #endif
 	mtspr	SPRN_SRR1, r10
 	mtspr	SPRN_SRR0, r11
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 1f07317..618aecc 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -25,8 +25,14 @@
 
 	return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
 }
-#define unsafe_get_user_sigset(dst, src, label) \
-	unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label)
+#define unsafe_get_user_sigset(dst, src, label) do {			\
+	sigset_t *__dst = dst;						\
+	const sigset_t __user *__src = src;				\
+	int i;								\
+									\
+	for (i = 0; i < _NSIG_WORDS; i++)				\
+		unsafe_get_user(__dst->sig[i], &__src->sig[i], label);	\
+} while (0)
 
 #ifdef CONFIG_VSX
 extern unsigned long copy_vsx_to_user(void __user *to,
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 00a9c9c..3e053e2 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1063,7 +1063,7 @@
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
 	if (do_setcontext(new_ctx, regs, 0)) {
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 		return -EFAULT;
 	}
 
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ef51853..d1e1fc0 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -704,7 +704,7 @@
 	 */
 
 	if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 		return -EFAULT;
 	}
 	set_current_blocked(&set);
@@ -713,7 +713,7 @@
 		return -EFAULT;
 	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
 		user_read_access_end();
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 		return -EFAULT;
 	}
 	user_read_access_end();
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 7bef917..15109af 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -528,3 +528,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index f9ea0e5..3fa6d24 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -187,6 +187,12 @@
 	if (sysctl_hardlockup_all_cpu_backtrace)
 		trigger_allbutself_cpu_backtrace();
 
+	/*
+	 * Force flush any remote buffers that might be stuck in IRQ context
+	 * and therefore could not run their irq_work.
+	 */
+	printk_trigger_flush();
+
 	if (hardlockup_panic)
 		nmi_panic(NULL, "Hard LOCKUP");
 
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fcf4760..70b7a8f 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -695,6 +695,7 @@
 				       "r" (0) : "memory");
 		}
 		asm volatile("ptesync": : :"memory");
+		// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
 		asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
 	} else {
 		for (set = 0; set < kvm->arch.tlb_sets; ++set) {
@@ -705,7 +706,9 @@
 			rb += PPC_BIT(51);	/* increment set number */
 		}
 		asm volatile("ptesync": : :"memory");
-		asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+		// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
 	}
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index eb776d0..32a4b4d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2005,7 +2005,7 @@
 	.globl	hcall_real_table_end
 hcall_real_table_end:
 
-_GLOBAL(kvmppc_h_set_xdabr)
+_GLOBAL_TOC(kvmppc_h_set_xdabr)
 EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
 	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
 	beq	6f
@@ -2015,7 +2015,7 @@
 6:	li	r3, H_PARAMETER
 	blr
 
-_GLOBAL(kvmppc_h_set_dabr)
+_GLOBAL_TOC(kvmppc_h_set_dabr)
 EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
 	li	r5, DABRX_USER | DABRX_KERNEL
 3:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 35e9ccc..a72920f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -641,9 +641,9 @@
 		 * implementations just count online CPUs.
 		 */
 		if (hv_enabled)
-			r = num_present_cpus();
+			r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS);
 		else
-			r = num_online_cpus();
+			r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 8fc49b1..6ec9789 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -314,7 +314,7 @@
 		pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
 
 	ram = min_t(phys_addr_t, __max_low_memory, size);
-	ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, false);
+	ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true);
 	linear_sz = min_t(unsigned long, ram, SZ_512M);
 
 	/* If the linear size is smaller than 64M, do not randmize */
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 89353d4..647bf45 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -645,7 +645,7 @@
 
 		if (map)
 			linear_map_top = map_mem_in_cams(linear_map_top,
-							 num_cams, true, true);
+							 num_cams, false, true);
 	}
 #endif
 
@@ -766,7 +766,7 @@
 		num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
 
 		linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
-					    false, true);
+					    true, true);
 
 		ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
 	} else
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 6f14c8f..59d3cfc 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -376,9 +376,9 @@
 {
 	int i, j;
 	struct device_node *root;
-	const __u8 *numa_dist_table;
+	const __u8 *form2_distances;
 	const __be32 *numa_lookup_index;
-	int numa_dist_table_length;
+	int form2_distances_length;
 	int max_numa_index, distance_index;
 
 	if (firmware_has_feature(FW_FEATURE_OPAL))
@@ -392,45 +392,41 @@
 	max_numa_index = of_read_number(&numa_lookup_index[0], 1);
 
 	/* first element of the array is the size and is encode-int */
-	numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
-	numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
+	form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL);
+	form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1);
 	/* Skip the size which is encoded int */
-	numa_dist_table += sizeof(__be32);
+	form2_distances += sizeof(__be32);
 
-	pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
-		 numa_dist_table_length, max_numa_index);
+	pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n",
+		 form2_distances_length, max_numa_index);
 
 	for (i = 0; i < max_numa_index; i++)
 		/* +1 skip the max_numa_index in the property */
 		numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
 
 
-	if (numa_dist_table_length != max_numa_index * max_numa_index) {
+	if (form2_distances_length != max_numa_index * max_numa_index) {
 		WARN(1, "Wrong NUMA distance information\n");
-		/* consider everybody else just remote. */
-		for (i = 0;  i < max_numa_index; i++) {
-			for (j = 0; j < max_numa_index; j++) {
-				int nodeA = numa_id_index_table[i];
-				int nodeB = numa_id_index_table[j];
-
-				if (nodeA == nodeB)
-					numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
-				else
-					numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
-			}
-		}
+		form2_distances = NULL; // don't use it
 	}
-
 	distance_index = 0;
 	for (i = 0;  i < max_numa_index; i++) {
 		for (j = 0; j < max_numa_index; j++) {
 			int nodeA = numa_id_index_table[i];
 			int nodeB = numa_id_index_table[j];
+			int dist;
 
-			numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
-			pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
+			if (form2_distances)
+				dist = form2_distances[distance_index++];
+			else if (nodeA == nodeB)
+				dist = LOCAL_DISTANCE;
+			else
+				dist = REMOTE_DISTANCE;
+			numa_distance_table[nodeA][nodeB] = dist;
+			pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist);
 		}
 	}
+
 	of_node_put(root);
 }
 
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index bb789f3..a38372f 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -186,7 +186,6 @@
 static int mcu_remove(struct i2c_client *client)
 {
 	struct mcu *mcu = i2c_get_clientdata(client);
-	int ret;
 
 	kthread_stop(shutdown_thread);
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 49b4015..8f998e5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1094,15 +1094,6 @@
 	phys_addr_t max_addr = memory_hotplug_max();
 	struct device_node *memory;
 
-	/*
-	 * The "ibm,pmemory" can appear anywhere in the address space.
-	 * Assuming it is still backed by page structs, set the upper limit
-	 * for the huge DMA window as MAX_PHYSMEM_BITS.
-	 */
-	if (of_find_node_by_type(NULL, "ibm,pmemory"))
-		return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ?
-			(phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS);
-
 	for_each_node_by_type(memory, "memory") {
 		unsigned long start, size;
 		int n_mem_addr_cells, n_mem_size_cells, len;
@@ -1238,7 +1229,6 @@
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	struct dma_win *window;
 	struct property *win64;
-	bool ddw_enabled = false;
 	struct failed_ddw_pdn *fpdn;
 	bool default_win_removed = false, direct_mapping = false;
 	bool pmem_present;
@@ -1253,7 +1243,6 @@
 
 	if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
 		direct_mapping = (len >= max_ram_len);
-		ddw_enabled = true;
 		goto out_unlock;
 	}
 
@@ -1367,8 +1356,10 @@
 		len = order_base_2(query.largest_available_block << page_shift);
 		win_name = DMA64_PROPNAME;
 	} else {
-		direct_mapping = true;
-		win_name = DIRECT64_PROPNAME;
+		direct_mapping = !default_win_removed ||
+			(len == MAX_PHYSMEM_BITS) ||
+			(!pmem_present && (len == max_ram_len));
+		win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
 	}
 
 	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
@@ -1406,8 +1397,8 @@
 			dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
 				 dn, ret);
 
-		/* Make sure to clean DDW if any TCE was set*/
-		clean_dma_window(pdn, win64->value);
+			/* Make sure to clean DDW if any TCE was set*/
+			clean_dma_window(pdn, win64->value);
 			goto out_del_list;
 		}
 	} else {
@@ -1454,7 +1445,6 @@
 	spin_unlock(&dma_win_list_lock);
 
 	dev->dev.archdata.dma_offset = win_addr;
-	ddw_enabled = true;
 	goto out_unlock;
 
 out_del_list:
@@ -1490,10 +1480,10 @@
 	 * as RAM, then we failed to create a window to cover persistent
 	 * memory and need to set the DMA limit.
 	 */
-	if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len)
+	if (pmem_present && direct_mapping && len == max_ram_len)
 		dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
 
-    return ddw_enabled && direct_mapping;
+	return direct_mapping;
 }
 
 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 97796c6..785c292 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -3,7 +3,6 @@
 	bool
 	select PPC_SMP_MUXED_IPI
 	select HARDIRQS_SW_RESEND
-	select IRQ_DOMAIN_NOMAP
 
 config PPC_XIVE_NATIVE
 	bool
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index c5d75c0..7b69299 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1443,8 +1443,7 @@
 
 static void __init xive_init_host(struct device_node *np)
 {
-	xive_irq_domain = irq_domain_add_nomap(np, XIVE_MAX_IRQ,
-					       &xive_irq_domain_ops, NULL);
+	xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
 	if (WARN_ON(xive_irq_domain == NULL))
 		return;
 	irq_set_default_host(xive_irq_domain);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a34c531..821252b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -62,6 +62,7 @@
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
+	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 7f19b78..8a107ed 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -107,11 +107,13 @@
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
+ifeq ($(KBUILD_EXTMOD),)
 ifeq ($(CONFIG_MMU),y)
 prepare: vdso_prepare
 vdso_prepare: prepare0
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
 endif
+endif
 
 ifneq ($(CONFIG_XIP_KERNEL),y)
 ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
@@ -136,3 +138,13 @@
 install zinstall:
 	$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
 	$(boot)/$(install-image) System.map "$(INSTALL_PATH)"
+
+PHONY += rv32_randconfig
+rv32_randconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/32-bit.config \
+		-f $(srctree)/Makefile randconfig
+
+PHONY += rv64_randconfig
+rv64_randconfig:
+	$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \
+		-f $(srctree)/Makefile randconfig
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index b254c60..fc1e586 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -9,10 +9,8 @@
 #define RTCCLK_FREQ		1000000
 
 / {
-	#address-cells = <2>;
-	#size-cells = <2>;
 	model = "Microchip PolarFire-SoC Icicle Kit";
-	compatible = "microchip,mpfs-icicle-kit";
+	compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
 
 	aliases {
 		ethernet0 = &emac1;
@@ -35,9 +33,6 @@
 		reg = <0x0 0x80000000 0x0 0x40000000>;
 		clocks = <&clkcfg 26>;
 	};
-
-	soc {
-	};
 };
 
 &serial0 {
@@ -56,8 +51,17 @@
 	status = "okay";
 };
 
-&sdcard {
+&mmc {
 	status = "okay";
+
+	bus-width = <4>;
+	disable-wp;
+	cap-sd-highspeed;
+	card-detect-delay = <200>;
+	sd-uhs-sdr12;
+	sd-uhs-sdr25;
+	sd-uhs-sdr50;
+	sd-uhs-sdr104;
 };
 
 &emac0 {
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index 9d2fbbc..c9f6d20 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -6,8 +6,8 @@
 / {
 	#address-cells = <2>;
 	#size-cells = <2>;
-	model = "Microchip MPFS Icicle Kit";
-	compatible = "microchip,mpfs-icicle-kit";
+	model = "Microchip PolarFire SoC";
+	compatible = "microchip,mpfs";
 
 	chosen {
 	};
@@ -161,7 +161,7 @@
 		};
 
 		clint@2000000 {
-			compatible = "sifive,clint0";
+			compatible = "sifive,fu540-c000-clint", "sifive,clint0";
 			reg = <0x0 0x2000000 0x0 0xC000>;
 			interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
 						&cpu1_intc 3 &cpu1_intc 7
@@ -172,7 +172,7 @@
 
 		plic: interrupt-controller@c000000 {
 			#interrupt-cells = <1>;
-			compatible = "sifive,plic-1.0.0";
+			compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
 			reg = <0x0 0xc000000 0x0 0x4000000>;
 			riscv,ndev = <186>;
 			interrupt-controller;
@@ -262,39 +262,13 @@
 			status = "disabled";
 		};
 
-		emmc: mmc@20008000 {
-			compatible = "cdns,sd4hc";
+		/* Common node entry for emmc/sd */
+		mmc: mmc@20008000 {
+			compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
 			reg = <0x0 0x20008000 0x0 0x1000>;
 			interrupt-parent = <&plic>;
 			interrupts = <88 89>;
-			pinctrl-names = "default";
 			clocks = <&clkcfg 6>;
-			bus-width = <4>;
-			cap-mmc-highspeed;
-			mmc-ddr-3_3v;
-			max-frequency = <200000000>;
-			non-removable;
-			no-sd;
-			no-sdio;
-			voltage-ranges = <3300 3300>;
-			status = "disabled";
-		};
-
-		sdcard: sdhc@20008000 {
-			compatible = "cdns,sd4hc";
-			reg = <0x0 0x20008000 0x0 0x1000>;
-			interrupt-parent = <&plic>;
-			interrupts = <88>;
-			pinctrl-names = "default";
-			clocks = <&clkcfg 6>;
-			bus-width = <4>;
-			disable-wp;
-			cap-sd-highspeed;
-			card-detect-delay = <200>;
-			sd-uhs-sdr12;
-			sd-uhs-sdr25;
-			sd-uhs-sdr50;
-			sd-uhs-sdr104;
 			max-frequency = <200000000>;
 			status = "disabled";
 		};
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 7db8610..0655b5c 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -141,7 +141,7 @@
 		ranges;
 		plic0: interrupt-controller@c000000 {
 			#interrupt-cells = <1>;
-			compatible = "sifive,plic-1.0.0";
+			compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
 			reg = <0x0 0xc000000 0x0 0x4000000>;
 			riscv,ndev = <53>;
 			interrupt-controller;
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 60846e8..ba304d4 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -8,10 +8,9 @@
 #define RTCCLK_FREQ		1000000
 
 / {
-	#address-cells = <2>;
-	#size-cells = <2>;
 	model = "SiFive HiFive Unleashed A00";
-	compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000";
+	compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000",
+		     "sifive,fu540";
 
 	chosen {
 		stdout-path = "serial0";
@@ -26,9 +25,6 @@
 		reg = <0x0 0x80000000 0x2 0x00000000>;
 	};
 
-	soc {
-	};
-
 	hfclk: hfclk {
 		#clock-cells = <0>;
 		compatible = "fixed-clock";
@@ -63,7 +59,7 @@
 &qspi0 {
 	status = "okay";
 	flash@0 {
-		compatible = "issi,is25wp256", "jedec,spi-nor";
+		compatible = "jedec,spi-nor";
 		reg = <0>;
 		spi-max-frequency = <50000000>;
 		m25p,fast-read;
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
index 2e4ea84..4f66919 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -8,8 +8,6 @@
 #define RTCCLK_FREQ		1000000
 
 / {
-	#address-cells = <2>;
-	#size-cells = <2>;
 	model = "SiFive HiFive Unmatched A00";
 	compatible = "sifive,hifive-unmatched-a00", "sifive,fu740-c000",
 		     "sifive,fu740";
@@ -27,9 +25,6 @@
 		reg = <0x0 0x80000000 0x4 0x00000000>;
 	};
 
-	soc {
-	};
-
 	hfclk: hfclk {
 		#clock-cells = <0>;
 		compatible = "fixed-clock";
@@ -211,7 +206,7 @@
 &qspi0 {
 	status = "okay";
 	flash@0 {
-		compatible = "issi,is25wp256", "jedec,spi-nor";
+		compatible = "jedec,spi-nor";
 		reg = <0>;
 		spi-max-frequency = <50000000>;
 		m25p,fast-read;
diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config
new file mode 100644
index 0000000..43f4132
--- /dev/null
+++ b/arch/riscv/configs/32-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_32BIT=y
diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config
new file mode 100644
index 0000000..313edc5
--- /dev/null
+++ b/arch/riscv/configs/64-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_64BIT=y
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 4ebc803..ef473e2 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -19,6 +19,8 @@
 CONFIG_SOC_MICROCHIP_POLARFIRE=y
 CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
 CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -72,9 +74,10 @@
 CONFIG_GPIO_SIFIVE=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_POWER_RESET=y
-CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
-CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_NOUVEAU=m
+CONFIG_DRM_VIRTIO_GPU=m
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 434ef5b..6e9f12f 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -19,6 +19,8 @@
 CONFIG_ARCH_RV32I=y
 CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
 CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 25ba21f9..2639b9e 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -12,14 +12,12 @@
 #include <linux/types.h>
 #include <linux/kvm.h>
 #include <linux/kvm_types.h>
+#include <asm/csr.h>
 #include <asm/kvm_vcpu_fp.h>
 #include <asm/kvm_vcpu_timer.h>
 
-#ifdef CONFIG_64BIT
-#define KVM_MAX_VCPUS			(1U << 16)
-#else
-#define KVM_MAX_VCPUS			(1U << 9)
-#endif
+#define KVM_MAX_VCPUS			\
+	((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
 
 #define KVM_HALT_POLL_NS_DEFAULT	500000
 
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 109c97e..b3e5ff0 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -157,6 +157,8 @@
 #define page_to_bus(page)	(page_to_phys(page))
 #define phys_to_page(paddr)	(pfn_to_page(phys_to_pfn(paddr)))
 
+#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
+
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn) \
 	(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 39b5503..bf204e7 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -75,7 +75,8 @@
 #endif
 
 #ifdef CONFIG_XIP_KERNEL
-#define XIP_OFFSET		SZ_8M
+#define XIP_OFFSET		SZ_32M
+#define XIP_OFFSET_MASK		(SZ_32M - 1)
 #else
 #define XIP_OFFSET		0
 #endif
@@ -97,7 +98,8 @@
 #ifdef CONFIG_XIP_KERNEL
 #define XIP_FIXUP(addr) ({							\
 	uintptr_t __a = (uintptr_t)(addr);					\
-	(__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ?	\
+	(__a >= CONFIG_XIP_PHYS_ADDR && \
+	 __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ?	\
 		__a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\
 		__a;								\
 	})
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 208e31b..bc6f75f 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -8,30 +8,19 @@
 #ifndef _ASM_RISCV_VDSO_H
 #define _ASM_RISCV_VDSO_H
 
-
 /*
  * All systems with an MMU have a VDSO, but systems without an MMU don't
  * support shared libraries and therefor don't have one.
  */
 #ifdef CONFIG_MMU
 
-#include <linux/types.h>
-/*
- * All systems with an MMU have a VDSO, but systems without an MMU don't
- * support shared libraries and therefor don't have one.
- */
-#ifdef CONFIG_MMU
-
-#define __VVAR_PAGES    1
+#define __VVAR_PAGES    2
 
 #ifndef __ASSEMBLY__
 #include <generated/vdso-offsets.h>
 
 #define VDSO_SYMBOL(base, name)							\
 	(void __user *)((unsigned long)(base) + __vdso_##name##_offset)
-
-#endif /* CONFIG_MMU */
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index f839f16..77d9c2f 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -76,6 +76,13 @@
 	return _vdso_data;
 }
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+	return _timens_data;
+}
+#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 25ec505..f52f01e 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -20,10 +20,20 @@
 	REG_L t0, _xip_fixup
 	add \reg, \reg, t0
 .endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+	la t1, __data_loc
+	li t0, XIP_OFFSET_MASK
+	and t1, t1, t0
+	li t1, XIP_OFFSET
+	sub t0, t0, t1
+	sub \reg, \reg, t0
+.endm
 _xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
 #else
 .macro XIP_FIXUP_OFFSET reg
 .endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+.endm
 #endif /* CONFIG_XIP_KERNEL */
 
 __HEAD
@@ -267,6 +277,7 @@
 	la a3, hart_lottery
 	mv a2, a3
 	XIP_FIXUP_OFFSET a2
+	XIP_FIXUP_FLASH_OFFSET a3
 	lw t1, (a3)
 	amoswap.w t0, t1, (a2)
 	/* first time here if hart_lottery in RAM is not set */
@@ -305,6 +316,7 @@
 	XIP_FIXUP_OFFSET sp
 #ifdef CONFIG_BUILTIN_DTB
 	la a0, __dtb_start
+	XIP_FIXUP_OFFSET a0
 #else
 	mv a0, s1
 #endif /* CONFIG_BUILTIN_DTB */
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index ee5878d..9c842c4 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -12,7 +12,7 @@
 		wait_for_interrupt();
 }
 
-void (*pm_power_off)(void) = default_power_off;
+void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
 void machine_restart(char *cmd)
@@ -23,10 +23,16 @@
 
 void machine_halt(void)
 {
-	pm_power_off();
+	if (pm_power_off != NULL)
+		pm_power_off();
+	else
+		default_power_off();
 }
 
 void machine_power_off(void)
 {
-	pm_power_off();
+	if (pm_power_off != NULL)
+		pm_power_off();
+	else
+		default_power_off();
 }
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index b70956d..a9436a6 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <linux/time_namespace.h>
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 #include <vdso/datapage.h>
@@ -25,14 +26,12 @@
 
 enum vvar_pages {
 	VVAR_DATA_PAGE_OFFSET,
+	VVAR_TIMENS_PAGE_OFFSET,
 	VVAR_NR_PAGES,
 };
 
 #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
 
-static unsigned int vdso_pages __ro_after_init;
-static struct page **vdso_pagelist __ro_after_init;
-
 /*
  * The vDSO data page.
  */
@@ -42,83 +41,228 @@
 } vdso_data_store __page_aligned_data;
 struct vdso_data *vdso_data = &vdso_data_store.data;
 
-static int __init vdso_init(void)
+struct __vdso_info {
+	const char *name;
+	const char *vdso_code_start;
+	const char *vdso_code_end;
+	unsigned long vdso_pages;
+	/* Data Mapping */
+	struct vm_special_mapping *dm;
+	/* Code Mapping */
+	struct vm_special_mapping *cm;
+};
+
+static struct __vdso_info vdso_info __ro_after_init = {
+	.name = "vdso",
+	.vdso_code_start = vdso_start,
+	.vdso_code_end = vdso_end,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		       struct vm_area_struct *new_vma)
 {
-	unsigned int i;
-
-	vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-	vdso_pagelist =
-		kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
-	if (unlikely(vdso_pagelist == NULL)) {
-		pr_err("vdso: pagelist allocation failed\n");
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < vdso_pages; i++) {
-		struct page *pg;
-
-		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
-		vdso_pagelist[i] = pg;
-	}
-	vdso_pagelist[i] = virt_to_page(vdso_data);
+	current->mm->context.vdso = (void *)new_vma->vm_start;
 
 	return 0;
 }
+
+static int __init __vdso_init(void)
+{
+	unsigned int i;
+	struct page **vdso_pagelist;
+	unsigned long pfn;
+
+	if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
+		pr_err("vDSO is not a valid ELF object!\n");
+		return -EINVAL;
+	}
+
+	vdso_info.vdso_pages = (
+		vdso_info.vdso_code_end -
+		vdso_info.vdso_code_start) >>
+		PAGE_SHIFT;
+
+	vdso_pagelist = kcalloc(vdso_info.vdso_pages,
+				sizeof(struct page *),
+				GFP_KERNEL);
+	if (vdso_pagelist == NULL)
+		return -ENOMEM;
+
+	/* Grab the vDSO code pages. */
+	pfn = sym_to_pfn(vdso_info.vdso_code_start);
+
+	for (i = 0; i < vdso_info.vdso_pages; i++)
+		vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+	vdso_info.cm->pages = vdso_pagelist;
+
+	return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(mm);
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long size = vma->vm_end - vma->vm_start;
+
+		if (vma_is_special_mapping(vma, vdso_info.dm))
+			zap_page_range(vma, vma->vm_start, size);
+	}
+
+	mmap_read_unlock(mm);
+	return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_mm == current->mm))
+		return current->nsproxy->time_ns->vvar_page;
+
+	/*
+	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
+	 * through interfaces like /proc/$pid/mem or
+	 * process_vm_{readv,writev}() as long as there's no .access()
+	 * in special_mapping_vmops.
+	 * For more details check_vma_flags() and __access_remote_vm()
+	 */
+	WARN(1, "vvar_page accessed remotely");
+
+	return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+	return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *timens_page = find_timens_vvar_page(vma);
+	unsigned long pfn;
+
+	switch (vmf->pgoff) {
+	case VVAR_DATA_PAGE_OFFSET:
+		if (timens_page)
+			pfn = page_to_pfn(timens_page);
+		else
+			pfn = sym_to_pfn(vdso_data);
+		break;
+#ifdef CONFIG_TIME_NS
+	case VVAR_TIMENS_PAGE_OFFSET:
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+		pfn = sym_to_pfn(vdso_data);
+		break;
+#endif /* CONFIG_TIME_NS */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+enum rv_vdso_map {
+	RV_VDSO_MAP_VVAR,
+	RV_VDSO_MAP_VDSO,
+};
+
+static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
+	[RV_VDSO_MAP_VVAR] = {
+		.name   = "[vvar]",
+		.fault = vvar_fault,
+	},
+	[RV_VDSO_MAP_VDSO] = {
+		.name   = "[vdso]",
+		.mremap = vdso_mremap,
+	},
+};
+
+static int __init vdso_init(void)
+{
+	vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
+	vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
+
+	return __vdso_init();
+}
 arch_initcall(vdso_init);
 
-int arch_setup_additional_pages(struct linux_binprm *bprm,
-	int uses_interp)
+static int __setup_additional_pages(struct mm_struct *mm,
+				    struct linux_binprm *bprm,
+				    int uses_interp)
 {
-	struct mm_struct *mm = current->mm;
-	unsigned long vdso_base, vdso_len;
-	int ret;
+	unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+	void *ret;
 
 	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
 
-	vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
+	vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
+	/* Be sure to map the data page */
+	vdso_mapping_len = vdso_text_len + VVAR_SIZE;
+
+	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+	if (IS_ERR_VALUE(vdso_base)) {
+		ret = ERR_PTR(vdso_base);
+		goto up_fail;
+	}
+
+	ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
+		(VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
+	if (IS_ERR(ret))
+		goto up_fail;
+
+	vdso_base += VVAR_SIZE;
+	mm->context.vdso = (void *)vdso_base;
+	ret =
+	   _install_special_mapping(mm, vdso_base, vdso_text_len,
+		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+		vdso_info.cm);
+
+	if (IS_ERR(ret))
+		goto up_fail;
+
+	return 0;
+
+up_fail:
+	mm->context.vdso = NULL;
+	return PTR_ERR(ret);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	int ret;
 
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
-	vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
-	if (IS_ERR_VALUE(vdso_base)) {
-		ret = vdso_base;
-		goto end;
-	}
-
-	mm->context.vdso = NULL;
-	ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
-		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
-	if (unlikely(ret))
-		goto end;
-
-	ret =
-	   install_special_mapping(mm, vdso_base + VVAR_SIZE,
-		vdso_pages << PAGE_SHIFT,
-		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
-		vdso_pagelist);
-
-	if (unlikely(ret))
-		goto end;
-
-	/*
-	 * Put vDSO base into mm struct. We need to do this before calling
-	 * install_special_mapping or the perf counter mmap tracking code
-	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
-	 */
-	mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
-
-end:
+	ret = __setup_additional_pages(mm, bprm, uses_interp);
 	mmap_write_unlock(mm);
-	return ret;
-}
 
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
-	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
-		return "[vdso]";
-	if (vma->vm_mm && (vma->vm_start ==
-			   (long)vma->vm_mm->context.vdso - VVAR_SIZE))
-		return "[vdso_data]";
-	return NULL;
+	return ret;
 }
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index e9111f7..01d94aa 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -10,6 +10,9 @@
 SECTIONS
 {
 	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
 	. = SIZEOF_HEADERS;
 
 	.hash		: { *(.hash) }			:text
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index 9c9f350..f5ed082 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -64,8 +64,11 @@
 /*
  * From this point, stuff is considered writable and will be copied to RAM
  */
-	__data_loc = ALIGN(16);		/* location in file */
-	. = LOAD_OFFSET + XIP_OFFSET;	/* location in memory */
+	__data_loc = ALIGN(PAGE_SIZE);		/* location in file */
+	. = KERNEL_LINK_ADDR + XIP_OFFSET;	/* location in memory */
+
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK))
 
 	_sdata = .;			/* Start of data section */
 	_data = .;
@@ -96,7 +99,6 @@
 		KEEP(*(__soc_builtin_dtb_table))
 		__soc_builtin_dtb_table_end = .;
 	}
-	PERCPU_SECTION(L1_CACHE_BYTES)
 
 	. = ALIGN(8);
 	.alternative : {
@@ -122,6 +124,8 @@
 
 	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
 
+	PERCPU_SECTION(L1_CACHE_BYTES)
+
 	.rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
 		*(.rel.dyn*)
 	}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index d81bae8..fc058ff 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -453,6 +453,12 @@
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
+	gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+	spin_lock(&kvm->mmu_lock);
+	stage2_unmap_range(kvm, gpa, size, false);
+	spin_unlock(&kvm->mmu_lock);
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e3d3aed..fb84619 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -740,7 +740,7 @@
 		 * Ensure we set mode to IN_GUEST_MODE after we disable
 		 * interrupts and before the final VCPU requests check.
 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
-		 * Documentation/virtual/kvm/vcpu-requests.rst
+		 * Documentation/virt/kvm/vcpu-requests.rst
 		 */
 		vcpu->mode = IN_GUEST_MODE;
 
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index eb3c045..3b0e703 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
  *
  * Authors:
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 26399df..fb18af3 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -74,7 +74,7 @@
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
-		r = num_online_cpus();
+		r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ee3459c..ea54cc0 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -233,8 +233,10 @@
 	local_flush_tlb_all();
 
 	/* Pre-compute ASID details */
-	num_asids = 1 << asid_bits;
-	asid_mask = num_asids - 1;
+	if (asid_bits) {
+		num_asids = 1 << asid_bits;
+		asid_mask = num_asids - 1;
+	}
 
 	/*
 	 * Use ASID allocator only if number of HW ASIDs are
@@ -255,7 +257,7 @@
 		pr_info("ASID allocator using %lu bits (%lu entries)\n",
 			asid_bits, num_asids);
 	} else {
-		pr_info("ASID allocator disabled\n");
+		pr_info("ASID allocator disabled (%lu bits)\n", asid_bits);
 	}
 
 	return 0;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index c0cddf0..24b2b80 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -41,7 +41,7 @@
 EXPORT_SYMBOL(phys_ram_base);
 
 #ifdef CONFIG_XIP_KERNEL
-extern char _xiprom[], _exiprom[];
+extern char _xiprom[], _exiprom[], __data_loc;
 #endif
 
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
@@ -454,10 +454,9 @@
 /* called from head.S with MMU off */
 asmlinkage void __init __copy_data(void)
 {
-	void *from = (void *)(&_sdata);
-	void *end = (void *)(&_end);
+	void *from = (void *)(&__data_loc);
 	void *to = (void *)CONFIG_PHYS_RAM_BASE;
-	size_t sz = (size_t)(end - from + 1);
+	size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
 
 	memcpy(to, from, sz);
 }
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8857ec3..2a5bb4f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -47,7 +47,7 @@
 config KASAN_SHADOW_OFFSET
 	hex
 	depends on KASAN
-	default 0x18000000000000
+	default 0x1C000000000000
 
 config S390
 	def_bool y
@@ -194,6 +194,7 @@
 	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_RSEQ
 	select HAVE_SAMPLE_FTRACE_DIRECT
+	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 69c45f6..609e369 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -77,10 +77,12 @@
 KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
 
 ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
-cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
-ifeq ($(call cc-option,-mstack-size=8192),)
-cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
-endif
+  CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
+  ifeq ($(call cc-option,-mstack-size=8192),)
+    CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
+  endif
+  export CC_FLAGS_CHECK_STACK
+  cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
 endif
 
 ifdef CONFIG_EXPOLINE
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 7571dee..1aa11a8 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -149,82 +149,56 @@
 
 static void setup_kernel_memory_layout(void)
 {
-	bool vmalloc_size_verified = false;
-	unsigned long vmemmap_off;
-	unsigned long vspace_left;
+	unsigned long vmemmap_start;
 	unsigned long rte_size;
 	unsigned long pages;
-	unsigned long vmax;
 
 	pages = ident_map_size / PAGE_SIZE;
 	/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
 	vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
 
 	/* choose kernel address space layout: 4 or 3 levels. */
-	vmemmap_off = round_up(ident_map_size, _REGION3_SIZE);
+	vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
 	if (IS_ENABLED(CONFIG_KASAN) ||
 	    vmalloc_size > _REGION2_SIZE ||
-	    vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE)
-		vmax = _REGION1_SIZE;
-	else
-		vmax = _REGION2_SIZE;
-
-	/* keep vmemmap_off aligned to a top level region table entry */
-	rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE;
-	MODULES_END = vmax;
-	if (is_prot_virt_host()) {
-		/*
-		 * forcing modules and vmalloc area under the ultravisor
-		 * secure storage limit, so that any vmalloc allocation
-		 * we do could be used to back secure guest storage.
-		 */
-		adjust_to_uv_max(&MODULES_END);
-	}
-
-#ifdef CONFIG_KASAN
-	if (MODULES_END < vmax) {
-		/* force vmalloc and modules below kasan shadow */
-		MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+	    vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
+		    _REGION2_SIZE) {
+		MODULES_END = _REGION1_SIZE;
+		rte_size = _REGION2_SIZE;
 	} else {
-		/*
-		 * leave vmalloc and modules above kasan shadow but make
-		 * sure they don't overlap with it
-		 */
-		vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN);
-		vmalloc_size_verified = true;
-		vspace_left = KASAN_SHADOW_START;
+		MODULES_END = _REGION2_SIZE;
+		rte_size = _REGION3_SIZE;
 	}
+	/*
+	 * forcing modules and vmalloc area under the ultravisor
+	 * secure storage limit, so that any vmalloc allocation
+	 * we do could be used to back secure guest storage.
+	 */
+	adjust_to_uv_max(&MODULES_END);
+#ifdef CONFIG_KASAN
+	/* force vmalloc and modules below kasan shadow */
+	MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
 #endif
 	MODULES_VADDR = MODULES_END - MODULES_LEN;
 	VMALLOC_END = MODULES_VADDR;
 
-	if (vmalloc_size_verified) {
-		VMALLOC_START = VMALLOC_END - vmalloc_size;
-	} else {
-		vmemmap_off = round_up(ident_map_size, rte_size);
+	/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
+	vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
+	VMALLOC_START = VMALLOC_END - vmalloc_size;
 
-		if (vmemmap_off + vmemmap_size > VMALLOC_END ||
-		    vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) {
-			/*
-			 * allow vmalloc area to occupy up to 1/2 of
-			 * the rest virtual space left.
-			 */
-			vmalloc_size = min(vmalloc_size, VMALLOC_END / 2);
-		}
-		VMALLOC_START = VMALLOC_END - vmalloc_size;
-		vspace_left = VMALLOC_START;
-	}
-
-	pages = vspace_left / (PAGE_SIZE + sizeof(struct page));
+	/* split remaining virtual space between 1:1 mapping & vmemmap array */
+	pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
 	pages = SECTION_ALIGN_UP(pages);
-	vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size);
-	/* keep vmemmap left most starting from a fresh region table entry */
-	vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size));
-	/* take care that identity map is lower then vmemmap */
-	ident_map_size = min(ident_map_size, vmemmap_off);
+	/* keep vmemmap_start aligned to a top level region table entry */
+	vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
+	/* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
+	vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
+	/* make sure identity map doesn't overlay with vmemmap */
+	ident_map_size = min(ident_map_size, vmemmap_start);
 	vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
-	VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START);
-	vmemmap = (struct page *)vmemmap_off;
+	/* make sure vmemmap doesn't overlay with vmalloc area */
+	VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
+	vmemmap = (struct page *)vmemmap_start;
 }
 
 /*
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index ea398a0..7f3c9ac 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -74,6 +74,12 @@
 int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
 			 unsigned long addr);
 
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	void *ipl_buf;
+};
+
 extern const struct kexec_file_ops s390_kexec_image_ops;
 extern const struct kexec_file_ops s390_kexec_elf_ops;
 
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6b3c366..90824be 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -210,9 +210,11 @@
 void zpci_device_reserved(struct zpci_dev *zdev);
 bool zpci_is_device_configured(struct zpci_dev *zdev);
 
+int zpci_hot_reset_device(struct zpci_dev *zdev);
 int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 void zpci_remove_reserved_devices(void);
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
 
 /* CLP */
 int clp_setup_writeback_mio(void);
@@ -294,8 +296,10 @@
 void zpci_debug_init_device(struct zpci_dev *, const char *);
 void zpci_debug_exit_device(struct zpci_dev *);
 
-/* Error reporting */
+/* Error handling */
 int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+int zpci_clear_error_state(struct zpci_dev *zdev);
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
 
 #ifdef CONFIG_NUMA
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d72a6df..785d54c 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -191,8 +191,8 @@
 				return rc;
 		} else {
 			/* Check for swapped kdump oldmem areas */
-			if (oldmem_data.start && from - oldmem_data.size < oldmem_data.size) {
-				from -= oldmem_data.size;
+			if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
+				from -= oldmem_data.start;
 				len = min(count, oldmem_data.size - from);
 			} else if (oldmem_data.start && from < oldmem_data.size) {
 				len = min(count, oldmem_data.size - from);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index e2cc357..5ad1dde 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2156,7 +2156,7 @@
 
 	buf = vzalloc(report->size);
 	if (!buf)
-		return ERR_PTR(-ENOMEM);
+		goto out;
 	ptr = buf;
 
 	memcpy(ptr, report->ipib, report->ipib->hdr.len);
@@ -2195,6 +2195,7 @@
 	}
 
 	BUG_ON(ptr > buf + report->size);
+out:
 	return buf;
 }
 
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 528edff..9975ad2 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -12,6 +12,7 @@
 #include <linux/kexec.h>
 #include <linux/module_signature.h>
 #include <linux/verification.h>
+#include <linux/vmalloc.h>
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
 #include <asm/setup.h>
@@ -170,6 +171,7 @@
 	struct kexec_buf buf;
 	unsigned long addr;
 	void *ptr, *end;
+	int ret;
 
 	buf.image = image;
 
@@ -199,9 +201,13 @@
 		ptr += len;
 	}
 
+	ret = -ENOMEM;
 	buf.buffer = ipl_report_finish(data->report);
+	if (!buf.buffer)
+		goto out;
 	buf.bufsz = data->report->size;
 	buf.memsz = buf.bufsz;
+	image->arch.ipl_buf = buf.buffer;
 
 	data->memsz += buf.memsz;
 
@@ -209,7 +215,9 @@
 		data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
 	*lc_ipl_parmblock_ptr = (__u32)buf.mem;
 
-	return kexec_add_buffer(&buf);
+	ret = kexec_add_buffer(&buf);
+out:
+	return ret;
 }
 
 void *kexec_file_add_components(struct kimage *image,
@@ -322,3 +330,11 @@
 	}
 	return 0;
 }
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->arch.ipl_buf);
+	image->arch.ipl_buf = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 6f431fa..ee8707a 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -687,8 +687,10 @@
 						      false);
 			if (cfdiag_diffctr(cpuhw, event->hw.config_base))
 				cfdiag_push_sample(event, cpuhw);
-		} else
+		} else if (cpuhw->flags & PMU_F_RESERVED) {
+			/* Only update when PMU not hotplugged off */
 			hw_perf_event_update(event);
+		}
 		hwc->state |= PERF_HES_UPTODATE;
 	}
 }
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 40405f2..225ab2d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -606,7 +606,7 @@
 
 static void __init setup_memory_end(void)
 {
-	memblock_remove(ident_map_size, ULONG_MAX);
+	memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size);
 	max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
 	pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
 }
@@ -638,14 +638,6 @@
 #endif
 
 /*
- * Make sure that the area above identity mapping is protected
- */
-static void __init reserve_above_ident_map(void)
-{
-	memblock_reserve(ident_map_size, ULONG_MAX);
-}
-
-/*
  * Reserve memory for kdump kernel to be loaded with kexec
  */
 static void __init reserve_crashkernel(void)
@@ -785,7 +777,6 @@
 	}
 	memblock_set_bottom_up(false);
 	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
-	memblock_dump_all();
 }
 
 /*
@@ -826,9 +817,6 @@
 		storage_key_init_range(start, end);
 
 	psw_set_key(PAGE_DEFAULT_KEY);
-
-	/* Only cosmetics */
-	memblock_enforce_memory_limit(memblock_end_of_DRAM());
 }
 
 static void __init relocate_amode31_section(void)
@@ -999,24 +987,24 @@
 	setup_control_program_code();
 
 	/* Do some memory reservations *before* memory is added to memblock */
-	reserve_above_ident_map();
 	reserve_kernel();
 	reserve_initrd();
 	reserve_certificate_list();
 	reserve_mem_detect_info();
+	memblock_set_current_limit(ident_map_size);
 	memblock_allow_resize();
 
 	/* Get information about *all* installed memory */
 	memblock_add_mem_detect_info();
 
 	free_mem_detect_info();
+	setup_memory_end();
+	memblock_dump_all();
+	setup_memory();
 
 	relocate_amode31_section();
 	setup_cr();
-
 	setup_uv();
-	setup_memory_end();
-	setup_memory();
 	dma_contiguous_reserve(ident_map_size);
 	vmcp_cma_reserve();
 	if (MACHINE_HAS_EDAT2)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index df5261e..ed9c5c2 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -451,3 +451,4 @@
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease
+449  common	futex_waitv		sys_futex_waitv			sys_futex_waitv
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 035705c..2b78078 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -84,7 +84,7 @@
 {
 	if (user_mode(regs)) {
 		report_user_fault(regs, SIGSEGV, 0);
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 	} else
 		die(regs, "Unknown program exception");
 }
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index e3e6ac5..245bddf 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -22,7 +22,7 @@
 KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
 
-LDFLAGS_vdso32.so.dbg += -fPIC -shared -nostdlib -soname=linux-vdso32.so.1 \
+LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
 	--hash-style=both --build-id=sha1 -melf_s390 -T
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 6568de2..9e2b95a 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -8,8 +8,9 @@
 include $(srctree)/lib/vdso/Makefile
 obj-vdso64 = vdso_user_wrapper.o note.o
 obj-cvdso64 = vdso64_generic.o getcpu.o
-CFLAGS_REMOVE_getcpu.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
-CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
 
 # Build rules
 
@@ -25,7 +26,7 @@
 
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
-ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \
+ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
 	     --hash-style=both --build-id=sha1 -T
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c6257f6..14a18ba 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -585,6 +585,8 @@
 			r = KVM_MAX_VCPUS;
 		else if (sclp.has_esca && sclp.has_64bscao)
 			r = KVM_S390_ESCA_CPU_SLOTS;
+		if (ext == KVM_CAP_NR_VCPUS)
+			r = min_t(unsigned int, num_online_cpus(), r);
 		break;
 	case KVM_CAP_S390_COW:
 		r = MACHINE_HAS_ESOP;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 872d772..2f9b78f 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -481,6 +481,34 @@
 	spin_unlock(&zpci_iomap_lock);
 }
 
+static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
+{
+	int bar, idx;
+
+	spin_lock(&zpci_iomap_lock);
+	for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+		if (!zdev->bars[bar].size)
+			continue;
+		idx = zdev->bars[bar].map_idx;
+		if (!zpci_iomap_start[idx].count)
+			continue;
+		WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
+	}
+	spin_unlock(&zpci_iomap_lock);
+}
+
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
+{
+	if (!fh || zdev->fh == fh)
+		return;
+
+	zdev->fh = fh;
+	if (zpci_use_mio(zdev))
+		return;
+	if (zdev->has_resources && zdev_enabled(zdev))
+		zpci_do_update_iomap_fh(zdev, fh);
+}
+
 static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
 				    unsigned long size, unsigned long flags)
 {
@@ -668,7 +696,7 @@
 	if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
 		rc = -EIO;
 	else
-		zdev->fh = fh;
+		zpci_update_fh(zdev, fh);
 	return rc;
 }
 
@@ -679,14 +707,14 @@
 
 	cc = clp_disable_fh(zdev, &fh);
 	if (!cc) {
-		zdev->fh = fh;
+		zpci_update_fh(zdev, fh);
 	} else if (cc == CLP_RC_SETPCIFN_ALRDY) {
 		pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
 			zdev->fid);
 		/* Function is already disabled - update handle */
 		rc = clp_refresh_fh(zdev->fid, &fh);
 		if (!rc) {
-			zdev->fh = fh;
+			zpci_update_fh(zdev, fh);
 			rc = -EINVAL;
 		}
 	} else {
@@ -696,6 +724,65 @@
 }
 
 /**
+ * zpci_hot_reset_device - perform a reset of the given zPCI function
+ * @zdev: the slot which should be reset
+ *
+ * Performs a low level reset of the zPCI function. The reset is low level in
+ * the sense that the zPCI function can be reset without detaching it from the
+ * common PCI subsystem. The reset may be performed while under control of
+ * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
+ * table is reinstated at the end of the reset.
+ *
+ * After the reset the functions internal state is reset to an initial state
+ * equivalent to its state during boot when first probing a driver.
+ * Consequently after reset the PCI function requires re-initialization via the
+ * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
+ * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * must guard against concurrent reset attempts.
+ *
+ * In most cases this function should not be called directly but through
+ * pci_reset_function() or pci_reset_bus() which handle the save/restore and
+ * locking.
+ *
+ * Return: 0 on success and an error value otherwise
+ */
+int zpci_hot_reset_device(struct zpci_dev *zdev)
+{
+	int rc;
+
+	zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+	if (zdev_enabled(zdev)) {
+		/* Disables device access, DMAs and IRQs (reset state) */
+		rc = zpci_disable_device(zdev);
+		/*
+		 * Due to a z/VM vs LPAR inconsistency in the error state the
+		 * FH may indicate an enabled device but disable says the
+		 * device is already disabled don't treat it as an error here.
+		 */
+		if (rc == -EINVAL)
+			rc = 0;
+		if (rc)
+			return rc;
+	}
+
+	rc = zpci_enable_device(zdev);
+	if (rc)
+		return rc;
+
+	if (zdev->dma_table)
+		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+					(u64)zdev->dma_table);
+	else
+		rc = zpci_dma_init_device(zdev);
+	if (rc) {
+		zpci_disable_device(zdev);
+		return rc;
+	}
+
+	return 0;
+}
+
+/**
  * zpci_create_device() - Create a new zpci_dev and add it to the zbus
  * @fid: Function ID of the device to be created
  * @fh: Current Function Handle of the device to be created
@@ -776,7 +863,7 @@
 {
 	int rc;
 
-	zdev->fh = fh;
+	zpci_update_fh(zdev, fh);
 	/* the PCI function will be scanned once function 0 appears */
 	if (!zdev->zbus->bus)
 		return 0;
@@ -903,6 +990,59 @@
 }
 EXPORT_SYMBOL(zpci_report_error);
 
+/**
+ * zpci_clear_error_state() - Clears the zPCI error state of the device
+ * @zdev: The zdev for which the zPCI error state should be reset
+ *
+ * Clear the zPCI error state of the device. If clearing the zPCI error state
+ * fails the device is left in the error state. In this case it may make sense
+ * to call zpci_io_perm_failure() on the associated pdev if it exists.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_clear_error_state(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
+	struct zpci_fib fib = {0};
+	u8 status;
+	int cc;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc) {
+		zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * zpci_reset_load_store_blocked() - Re-enables L/S from error state
+ * @zdev: The zdev for which to unblock load/store access
+ *
+ * Re-enables load/store access for a PCI function in the error state while
+ * keeping DMA blocked. In this state drivers can poke MMIO space to determine
+ * if error recovery is possible while catching any rogue DMA access from the
+ * device.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
+	struct zpci_fib fib = {0};
+	u8 status;
+	int cc;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc) {
+		zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static int zpci_mem_init(void)
 {
 	BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 6a5bfa9..2e3e5b2 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -47,18 +47,223 @@
 	u16 pec;			/* PCI event code */
 } __packed;
 
+static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
+{
+	switch (ers_res) {
+	case PCI_ERS_RESULT_CAN_RECOVER:
+	case PCI_ERS_RESULT_RECOVERED:
+	case PCI_ERS_RESULT_NEED_RESET:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static bool is_passed_through(struct zpci_dev *zdev)
+{
+	return zdev->s390_domain;
+}
+
+static bool is_driver_supported(struct pci_driver *driver)
+{
+	if (!driver || !driver->err_handler)
+		return false;
+	if (!driver->err_handler->error_detected)
+		return false;
+	if (!driver->err_handler->slot_reset)
+		return false;
+	if (!driver->err_handler->resume)
+		return false;
+	return true;
+}
+
+static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
+							 struct pci_driver *driver)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+	ers_res = driver->err_handler->error_detected(pdev,  pdev->error_state);
+	if (ers_result_indicates_abort(ers_res))
+		pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
+	else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+		pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+
+	return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
+							struct pci_driver *driver)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+	struct zpci_dev *zdev = to_zpci(pdev);
+	int rc;
+
+	pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
+	rc = zpci_reset_load_store_blocked(zdev);
+	if (rc) {
+		pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
+		/* Let's try a full reset instead */
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	if (driver->err_handler->mmio_enabled) {
+		ers_res = driver->err_handler->mmio_enabled(pdev);
+		if (ers_result_indicates_abort(ers_res)) {
+			pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+				pci_name(pdev));
+			return ers_res;
+		} else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+			pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+			return ers_res;
+		}
+	}
+
+	pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
+	rc = zpci_clear_error_state(zdev);
+	if (!rc) {
+		pdev->error_state = pci_channel_io_normal;
+	} else {
+		pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
+		/* Let's try a full reset instead */
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
+					    struct pci_driver *driver)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+	pr_info("%s: Initiating reset\n", pci_name(pdev));
+	if (zpci_hot_reset_device(to_zpci(pdev))) {
+		pr_err("%s: The reset request failed\n", pci_name(pdev));
+		return ers_res;
+	}
+	pdev->error_state = pci_channel_io_normal;
+	ers_res = driver->err_handler->slot_reset(pdev);
+	if (ers_result_indicates_abort(ers_res)) {
+		pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
+		return ers_res;
+	}
+
+	return ers_res;
+}
+
+/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
+ * @pdev: PCI function to recover currently in the error state
+ *
+ * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
+ * With the simplification that recovery always happens per function
+ * and the platform determines which functions are affected for
+ * multi-function devices.
+ */
+static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
+{
+	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+	struct pci_driver *driver;
+
+	/*
+	 * Ensure that the PCI function is not removed concurrently, no driver
+	 * is unbound or probed and that userspace can't access its
+	 * configuration space while we perform recovery.
+	 */
+	pci_dev_lock(pdev);
+	if (pdev->error_state == pci_channel_io_perm_failure) {
+		ers_res = PCI_ERS_RESULT_DISCONNECT;
+		goto out_unlock;
+	}
+	pdev->error_state = pci_channel_io_frozen;
+
+	if (is_passed_through(to_zpci(pdev))) {
+		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
+			pci_name(pdev));
+		goto out_unlock;
+	}
+
+	driver = to_pci_driver(pdev->dev.driver);
+	if (!is_driver_supported(driver)) {
+		if (!driver)
+			pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
+				pci_name(pdev));
+		else
+			pr_info("%s: The %s driver bound to the device does not support error recovery\n",
+				pci_name(pdev),
+				driver->name);
+		goto out_unlock;
+	}
+
+	ers_res = zpci_event_notify_error_detected(pdev, driver);
+	if (ers_result_indicates_abort(ers_res))
+		goto out_unlock;
+
+	if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+		ers_res = zpci_event_do_error_state_clear(pdev, driver);
+		if (ers_result_indicates_abort(ers_res))
+			goto out_unlock;
+	}
+
+	if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+		ers_res = zpci_event_do_reset(pdev, driver);
+
+	if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+		pr_err("%s: Automatic recovery failed; operator intervention is required\n",
+		       pci_name(pdev));
+		goto out_unlock;
+	}
+
+	pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
+	if (driver->err_handler->resume)
+		driver->err_handler->resume(pdev);
+out_unlock:
+	pci_dev_unlock(pdev);
+
+	return ers_res;
+}
+
+/* zpci_event_io_failure - Report PCI channel failure state to driver
+ * @pdev: PCI function for which to report
+ * @es: PCI channel failure state to report
+ */
+static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
+{
+	struct pci_driver *driver;
+
+	pci_dev_lock(pdev);
+	pdev->error_state = es;
+	/**
+	 * While vfio-pci's error_detected callback notifies user-space QEMU
+	 * reacts to this by freezing the guest. In an s390 environment PCI
+	 * errors are rarely fatal so this is overkill. Instead in the future
+	 * we will inject the error event and let the guest recover the device
+	 * itself.
+	 */
+	if (is_passed_through(to_zpci(pdev)))
+		goto out;
+	driver = to_pci_driver(pdev->dev.driver);
+	if (driver && driver->err_handler && driver->err_handler->error_detected)
+		driver->err_handler->error_detected(pdev, pdev->error_state);
+out:
+	pci_dev_unlock(pdev);
+}
+
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
 	struct pci_dev *pdev = NULL;
+	pci_ers_result_t ers_res;
 
 	zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
 		 ccdf->fid, ccdf->fh, ccdf->pec);
 	zpci_err("error CCDF:\n");
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
-	if (zdev)
-		pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	if (zdev) {
+		zpci_update_fh(zdev, ccdf->fh);
+		if (zdev->zbus->bus)
+			pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	}
 
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
 	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -66,7 +271,20 @@
 	if (!pdev)
 		return;
 
-	pdev->error_state = pci_channel_io_perm_failure;
+	switch (ccdf->pec) {
+	case 0x003a: /* Service Action or Error Recovery Successful */
+		ers_res = zpci_event_attempt_error_recovery(pdev);
+		if (ers_res != PCI_ERS_RESULT_RECOVERED)
+			zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+		break;
+	default:
+		/*
+		 * Mark as frozen not permanently failed because the device
+		 * could be subsequently recovered by the platform.
+		 */
+		zpci_event_io_failure(pdev, pci_channel_io_frozen);
+		break;
+	}
 	pci_dev_put(pdev);
 }
 
@@ -78,7 +296,7 @@
 
 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
 {
-	zdev->fh = fh;
+	zpci_update_fh(zdev, fh);
 	/* Give the driver a hint that the function is
 	 * already unusable.
 	 */
@@ -121,7 +339,7 @@
 		if (!zdev)
 			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
 		else
-			zdev->fh = ccdf->fh;
+			zpci_update_fh(zdev, ccdf->fh);
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (zdev) {
@@ -130,7 +348,7 @@
 			 */
 			if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
 				break;
-			zdev->fh = ccdf->fh;
+			zpci_update_fh(zdev, ccdf->fh);
 			zpci_deconfigure_device(zdev);
 		}
 		break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 2e43996..28d863a 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -163,7 +163,7 @@
 			       unsigned long len)
 {
 	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
-	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
 
 	return __zpci_load(data, req, ZPCI_OFFSET(addr));
 }
@@ -244,7 +244,7 @@
 				unsigned long len)
 {
 	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
-	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+	u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
 
 	return __zpci_store(data, req, ZPCI_OFFSET(addr));
 }
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 3823e15..954bb7a 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -387,6 +387,15 @@
 		airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
 }
 
+void arch_restore_msi_irqs(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	if (!zdev->irqs_registered)
+		zpci_set_irq(zdev);
+	default_restore_msi_irqs(pdev);
+}
+
 static struct airq_struct zpci_airq = {
 	.handler = zpci_floating_irq_handler,
 	.isc = PCI_ISC,
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 6904f4b..70afb30 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -56,7 +56,6 @@
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select IRQ_FORCED_THREADING
-	select MAY_HAVE_SPARSE_IRQ
 	select MODULES_USE_ELF_RELA
 	select NEED_SG_DMA_LENGTH
 	select NO_DMA if !MMU && !DMA_COHERENT
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 958f790..10290e5 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -54,6 +54,7 @@
 
 config DWARF_UNWINDER
 	bool "Enable the DWARF unwinder for stacktraces"
+	depends on DEBUG_KERNEL
 	select FRAME_POINTER
 	default n
 	help
diff --git a/arch/sh/boards/mach-landisk/irq.c b/arch/sh/boards/mach-landisk/irq.c
index 29b8b1f..0b672b8 100644
--- a/arch/sh/boards/mach-landisk/irq.c
+++ b/arch/sh/boards/mach-landisk/irq.c
@@ -26,8 +26,8 @@
 	PCI_INTD, /* PCI int D */
 	ATA,	  /* ATA */
 	FATA,	  /* CF */
-	POWER,	  /* Power swtich */
-	BUTTON,	  /* Button swtich */
+	POWER,	  /* Power switch */
+	BUTTON,	  /* Button switch */
 };
 
 /* Vectors for LANDISK */
diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile
index c081e7e..5c123f5 100644
--- a/arch/sh/boot/Makefile
+++ b/arch/sh/boot/Makefile
@@ -27,8 +27,8 @@
 suffix-$(CONFIG_KERNEL_LZO)	:= lzo
 
 targets := zImage vmlinux.srec romImage uImage uImage.srec uImage.gz \
-	   uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin
-extra-y += vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
+	   uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin \
+	   vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
 	   vmlinux.bin.xz vmlinux.bin.lzo
 subdir- := compressed romimage
 
diff --git a/arch/sh/boot/compressed/.gitignore b/arch/sh/boot/compressed/.gitignore
index 37aa530..cd16663 100644
--- a/arch/sh/boot/compressed/.gitignore
+++ b/arch/sh/boot/compressed/.gitignore
@@ -1,7 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-ashiftrt.S
-ashldi3.c
-ashlsi3.S
-ashrsi3.S
-lshrsi3.S
 vmlinux.bin.*
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index 589d2d8..cf3174d 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -5,12 +5,18 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz \
-		   vmlinux.bin.bz2 vmlinux.bin.lzma \
-		   vmlinux.bin.xz vmlinux.bin.lzo \
-		   head_32.o misc.o piggy.o
+OBJECTS := head_32.o misc.o cache.o piggy.o \
+           ashiftrt.o ashldi3.o ashrsi3.o ashlsi3.o lshrsi3.o
 
-OBJECTS = $(obj)/head_32.o $(obj)/misc.o $(obj)/cache.o
+# These were previously generated files. When you are building the kernel
+# with O=, make sure to remove the stale files in the output tree. Otherwise,
+# the build system wrongly compiles the stale ones.
+ifdef building_out_of_srctree
+$(shell rm -f $(addprefix $(obj)/, ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S))
+endif
+
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
+           vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo $(OBJECTS)
 
 GCOV_PROFILE := n
 
@@ -33,21 +39,9 @@
 LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
 		   -T $(obj)/../../kernel/vmlinux.lds
 
-#
-# Pull in the necessary libgcc bits from the in-kernel implementation.
-#
-lib1funcs-y	:= ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S
-lib1funcs-obj   := \
-	$(addsuffix .o, $(basename $(addprefix $(obj)/, $(lib1funcs-y))))
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 
-lib1funcs-dir		:= $(srctree)/arch/$(SRCARCH)/lib
-
-KBUILD_CFLAGS += -I$(lib1funcs-dir) -DDISABLE_BRANCH_PROFILING
-
-$(addprefix $(obj)/,$(lib1funcs-y)): $(obj)/%: $(lib1funcs-dir)/% FORCE
-	$(call cmd,shipped)
-
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(lib1funcs-obj) FORCE
+$(obj)/vmlinux: $(addprefix $(obj)/, $(OBJECTS)) FORCE
 	$(call if_changed,ld)
 
 $(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/sh/boot/compressed/ashiftrt.S b/arch/sh/boot/compressed/ashiftrt.S
new file mode 100644
index 0000000..0f3b291
--- /dev/null
+++ b/arch/sh/boot/compressed/ashiftrt.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashiftrt.S"
diff --git a/arch/sh/boot/compressed/ashldi3.c b/arch/sh/boot/compressed/ashldi3.c
new file mode 100644
index 0000000..7cebd64
--- /dev/null
+++ b/arch/sh/boot/compressed/ashldi3.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/ashldi3.c"
diff --git a/arch/sh/boot/compressed/ashlsi3.S b/arch/sh/boot/compressed/ashlsi3.S
new file mode 100644
index 0000000..e354262
--- /dev/null
+++ b/arch/sh/boot/compressed/ashlsi3.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashlsi3.S"
diff --git a/arch/sh/boot/compressed/ashrsi3.S b/arch/sh/boot/compressed/ashrsi3.S
new file mode 100644
index 0000000..e564be9
--- /dev/null
+++ b/arch/sh/boot/compressed/ashrsi3.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashrsi3.S"
diff --git a/arch/sh/boot/compressed/lshrsi3.S b/arch/sh/boot/compressed/lshrsi3.S
new file mode 100644
index 0000000..5a8281b
--- /dev/null
+++ b/arch/sh/boot/compressed/lshrsi3.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/lshrsi3.S"
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index c7a97f3..481a664 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -43,7 +43,6 @@
 				 unsigned long start, unsigned long end);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 #define flush_icache_user_range flush_icache_range
 extern void flush_icache_page(struct vm_area_struct *vma,
diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h
index 1a391e3..a6501b8 100644
--- a/arch/sh/include/asm/checksum_32.h
+++ b/arch/sh/include/asm/checksum_32.h
@@ -84,7 +84,8 @@
  */
 static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
-	unsigned int sum, __dummy0, __dummy1;
+	__wsum sum;
+	unsigned int __dummy0, __dummy1;
 
 	__asm__ __volatile__(
 		"mov.l	@%1+, %0\n\t"
@@ -197,6 +198,6 @@
 {
 	if (!access_ok(dst, len))
 		return 0;
-	return csum_partial_copy_generic((__force const void *)src, dst, len);
+	return csum_partial_copy_generic(src, (__force void *)dst, len);
 }
 #endif /* __ASM_SH_CHECKSUM_H */
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index 839551c..1c49235 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -6,17 +6,6 @@
 #include <asm/machvec.h>
 
 /*
- * Only legacy non-sparseirq platforms have to set a reasonably sane
- * value here. sparseirq platforms allocate their irq_descs on the fly,
- * so will expand automatically based on the number of registered IRQs.
- */
-#ifdef CONFIG_SPARSE_IRQ
-# define NR_IRQS		8
-#else
-# define NR_IRQS		512
-#endif
-
-/*
  * This is a special IRQ number for indicating that no IRQ has been
  * triggered and to simply ignore the IRQ dispatch. This is a special
  * case that can happen with IRQ auto-distribution when multiple CPUs
diff --git a/arch/sh/include/asm/sfp-machine.h b/arch/sh/include/asm/sfp-machine.h
index cbc7cf8c..2d24234 100644
--- a/arch/sh/include/asm/sfp-machine.h
+++ b/arch/sh/include/asm/sfp-machine.h
@@ -13,6 +13,14 @@
 #ifndef _SFP_MACHINE_H
 #define _SFP_MACHINE_H
 
+#ifdef __BIG_ENDIAN__
+#define __BYTE_ORDER __BIG_ENDIAN
+#define __LITTLE_ENDIAN 0
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __BIG_ENDIAN 0
+#endif
+
 #define _FP_W_TYPE_SIZE		32
 #define _FP_W_TYPE		unsigned long
 #define _FP_WS_TYPE		signed long
diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h
index 73f3b48..8867bb0 100644
--- a/arch/sh/include/asm/uaccess.h
+++ b/arch/sh/include/asm/uaccess.h
@@ -68,7 +68,7 @@
 ({									\
 	long __gu_err = -EFAULT;					\
 	unsigned long __gu_val = 0;					\
-	const __typeof__(*(ptr)) *__gu_addr = (ptr);			\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);			\
 	if (likely(access_ok(__gu_addr, (size))))		\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
@@ -124,7 +124,7 @@
  * Clear the area and return remaining number of bytes
  * (on failure.  Usually it's 0.)
  */
-__kernel_size_t __clear_user(void *addr, __kernel_size_t size);
+__kernel_size_t __clear_user(void __user *addr, __kernel_size_t size);
 
 #define clear_user(addr,n)						\
 ({									\
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index f8a2bec..1261dc7b 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -73,8 +73,9 @@
 	BUILD_BUG_ON(SMP_MSG_NR >= 8);
 
 	for (i = 0; i < SMP_MSG_NR; i++)
-		request_irq(104 + i, ipi_interrupt_handler,
-			    IRQF_PERCPU, "IPI", (void *)(long)i);
+		if (request_irq(104 + i, ipi_interrupt_handler,
+			    IRQF_PERCPU, "IPI", (void *)(long)i))
+			pr_err("Failed to request irq %d\n", i);
 
 	for (i = 0; i < max_cpus; i++)
 		set_cpu_present(i, true);
diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c
index a908612..5b41b59 100644
--- a/arch/sh/kernel/crash_dump.c
+++ b/arch/sh/kernel/crash_dump.c
@@ -26,7 +26,7 @@
 ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
                                size_t csize, unsigned long offset, int userbuf)
 {
-	void  *vaddr;
+	void  __iomem *vaddr;
 
 	if (!csize)
 		return 0;
@@ -34,7 +34,7 @@
 	vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
 
 	if (userbuf) {
-		if (copy_to_user(buf, (vaddr + offset), csize)) {
+		if (copy_to_user((void __user *)buf, (vaddr + offset), csize)) {
 			iounmap(vaddr);
 			return -EFAULT;
 		}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 208f131..d9539d2 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -451,3 +451,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index b62ad0b..b3c715b 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -490,7 +490,7 @@
 		inc_unaligned_user_access();
 
 		oldfs = force_uaccess_begin();
-		if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1),
+		if (copy_from_user(&instruction, (insn_size_t __user *)(regs->pc & ~1),
 				   sizeof(instruction))) {
 			force_uaccess_end(oldfs);
 			goto uspace_segv;
@@ -614,7 +614,7 @@
 	unsigned short inst = 0;
 	int err;
 
-	get_user(inst, (unsigned short*)regs->pc);
+	get_user(inst, (unsigned short __user *)regs->pc);
 
 	err = do_fpu_inst(inst, regs);
 	if (!err) {
@@ -699,9 +699,9 @@
 		return;
 
 #ifdef CONFIG_SH_FPU_EMU
-	get_user(inst, (unsigned short *)regs->pc + 1);
+	get_user(inst, (unsigned short __user *)regs->pc + 1);
 	if (!do_fpu_inst(inst, regs)) {
-		get_user(inst, (unsigned short *)regs->pc);
+		get_user(inst, (unsigned short __user *)regs->pc);
 		if (!emulate_branch(inst, regs))
 			return;
 		/* fault in branch.*/
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index e8be0ec..cdaef65 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -51,8 +51,8 @@
 #define Rn	(regs->regs[n])
 #define Rm	(regs->regs[m])
 
-#define WRITE(d,a)	({if(put_user(d, (typeof (d)*)a)) return -EFAULT;})
-#define READ(d,a)	({if(get_user(d, (typeof (d)*)a)) return -EFAULT;})
+#define MWRITE(d,a)	({if(put_user(d, (typeof (d) __user *)a)) return -EFAULT;})
+#define MREAD(d,a)	({if(get_user(d, (typeof (d) __user *)a)) return -EFAULT;})
 
 #define PACK_S(r,f)	FP_PACK_SP(&r,f)
 #define UNPACK_S(f,r)	FP_UNPACK_SP(f,&r)
@@ -157,11 +157,11 @@
 {
 	if (FPSCR_SZ) {
 		FMOV_EXT(n);
-		READ(FRn, Rm + R0 + 4);
+		MREAD(FRn, Rm + R0 + 4);
 		n++;
-		READ(FRn, Rm + R0);
+		MREAD(FRn, Rm + R0);
 	} else {
-		READ(FRn, Rm + R0);
+		MREAD(FRn, Rm + R0);
 	}
 
 	return 0;
@@ -173,11 +173,11 @@
 {
 	if (FPSCR_SZ) {
 		FMOV_EXT(n);
-		READ(FRn, Rm + 4);
+		MREAD(FRn, Rm + 4);
 		n++;
-		READ(FRn, Rm);
+		MREAD(FRn, Rm);
 	} else {
-		READ(FRn, Rm);
+		MREAD(FRn, Rm);
 	}
 
 	return 0;
@@ -189,12 +189,12 @@
 {
 	if (FPSCR_SZ) {
 		FMOV_EXT(n);
-		READ(FRn, Rm + 4);
+		MREAD(FRn, Rm + 4);
 		n++;
-		READ(FRn, Rm);
+		MREAD(FRn, Rm);
 		Rm += 8;
 	} else {
-		READ(FRn, Rm);
+		MREAD(FRn, Rm);
 		Rm += 4;
 	}
 
@@ -207,11 +207,11 @@
 {
 	if (FPSCR_SZ) {
 		FMOV_EXT(m);
-		WRITE(FRm, Rn + R0 + 4);
+		MWRITE(FRm, Rn + R0 + 4);
 		m++;
-		WRITE(FRm, Rn + R0);
+		MWRITE(FRm, Rn + R0);
 	} else {
-		WRITE(FRm, Rn + R0);
+		MWRITE(FRm, Rn + R0);
 	}
 
 	return 0;
@@ -223,11 +223,11 @@
 {
 	if (FPSCR_SZ) {
 		FMOV_EXT(m);
-		WRITE(FRm, Rn + 4);
+		MWRITE(FRm, Rn + 4);
 		m++;
-		WRITE(FRm, Rn);
+		MWRITE(FRm, Rn);
 	} else {
-		WRITE(FRm, Rn);
+		MWRITE(FRm, Rn);
 	}
 
 	return 0;
@@ -240,12 +240,12 @@
 	if (FPSCR_SZ) {
 		FMOV_EXT(m);
 		Rn -= 8;
-		WRITE(FRm, Rn + 4);
+		MWRITE(FRm, Rn + 4);
 		m++;
-		WRITE(FRm, Rn);
+		MWRITE(FRm, Rn);
 	} else {
 		Rn -= 4;
-		WRITE(FRm, Rn);
+		MWRITE(FRm, Rn);
 	}
 
 	return 0;
@@ -445,11 +445,11 @@
 	case 0x4052:
 	case 0x4062:
 		Rn -= 4;
-		WRITE(*reg, Rn);
+		MWRITE(*reg, Rn);
 		break;
 	case 0x4056:
 	case 0x4066:
-		READ(*reg, Rn);
+		MREAD(*reg, Rn);
 		Rn += 4;
 		break;
 	default:
@@ -468,109 +468,6 @@
 }
 
 /**
- *	denormal_to_double - Given denormalized float number,
- *	                     store double float
- *
- *	@fpu: Pointer to sh_fpu_soft structure
- *	@n: Index to FP register
- */
-static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n)
-{
-	unsigned long du, dl;
-	unsigned long x = fpu->fpul;
-	int exp = 1023 - 126;
-
-	if (x != 0 && (x & 0x7f800000) == 0) {
-		du = (x & 0x80000000);
-		while ((x & 0x00800000) == 0) {
-			x <<= 1;
-			exp--;
-		}
-		x &= 0x007fffff;
-		du |= (exp << 20) | (x >> 3);
-		dl = x << 29;
-
-		fpu->fp_regs[n] = du;
-		fpu->fp_regs[n+1] = dl;
-	}
-}
-
-/**
- *	ieee_fpe_handler - Handle denormalized number exception
- *
- *	@regs: Pointer to register structure
- *
- *	Returns 1 when it's handled (should not cause exception).
- */
-static int ieee_fpe_handler(struct pt_regs *regs)
-{
-	unsigned short insn = *(unsigned short *)regs->pc;
-	unsigned short finsn;
-	unsigned long nextpc;
-	int nib[4] = {
-		(insn >> 12) & 0xf,
-		(insn >> 8) & 0xf,
-		(insn >> 4) & 0xf,
-		insn & 0xf};
-
-	if (nib[0] == 0xb ||
-	    (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
-		regs->pr = regs->pc + 4;
-
-	if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
-		nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
-		finsn = *(unsigned short *) (regs->pc + 2);
-	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
-		if (regs->sr & 1)
-			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
-		else
-			nextpc = regs->pc + 4;
-		finsn = *(unsigned short *) (regs->pc + 2);
-	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
-		if (regs->sr & 1)
-			nextpc = regs->pc + 4;
-		else
-			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
-		finsn = *(unsigned short *) (regs->pc + 2);
-	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
-		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
-		nextpc = regs->regs[nib[1]];
-		finsn = *(unsigned short *) (regs->pc + 2);
-	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
-		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
-		nextpc = regs->pc + 4 + regs->regs[nib[1]];
-		finsn = *(unsigned short *) (regs->pc + 2);
-	} else if (insn == 0x000b) { /* rts */
-		nextpc = regs->pr;
-		finsn = *(unsigned short *) (regs->pc + 2);
-	} else {
-		nextpc = regs->pc + 2;
-		finsn = insn;
-	}
-
-	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
-		struct task_struct *tsk = current;
-
-		if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) {
-			/* FPU error */
-			denormal_to_double (&tsk->thread.xstate->softfpu,
-					    (finsn >> 8) & 0xf);
-			tsk->thread.xstate->softfpu.fpscr &=
-				~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
-			task_thread_info(tsk)->status |= TS_USEDFPU;
-		} else {
-			force_sig_fault(SIGFPE, FPE_FLTINV,
-					(void __user *)regs->pc);
-		}
-
-		regs->pc = nextpc;
-		return 1;
-	}
-
-	return 0;
-}
-
-/**
  * fpu_init - Initialize FPU registers
  * @fpu: Pointer to software emulated FPU registers.
  */
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index 8b45044..78c4b6e 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -28,9 +28,9 @@
 	return 0;
 }
 
-__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+__kernel_size_t __clear_user(void __user *to, __kernel_size_t n)
 {
-	memset(to, 0, n);
+	memset((__force void *)to, 0, n);
 	return 0;
 }
 
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index cd677bc..ffab163 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -244,7 +244,7 @@
 		get_sigframe(ksig, regs, sigframe_size);
 
 	if (invalid_frame_pointer(sf, sigframe_size)) {
-		force_fatal_sig(SIGILL);
+		force_exit_sig(SIGILL);
 		return -EINVAL;
 	}
 
@@ -336,7 +336,7 @@
 	sf = (struct rt_signal_frame __user *)
 		get_sigframe(ksig, regs, sigframe_size);
 	if (invalid_frame_pointer(sf, sigframe_size)) {
-		force_fatal_sig(SIGILL);
+		force_exit_sig(SIGILL);
 		return -EINVAL;
 	}
 
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index c37764d..46adabc 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -494,3 +494,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
index bbbd40c..8f20862 100644
--- a/arch/sparc/kernel/windows.c
+++ b/arch/sparc/kernel/windows.c
@@ -122,7 +122,7 @@
 		if ((sp & 7) ||
 		    copy_to_user((char __user *) sp, &tp->reg_window[window],
 				 sizeof(struct reg_window32))) {
-			force_fatal_sig(SIGILL);
+			force_exit_sig(SIGILL);
 			return;
 		}
 	}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 95dd1ee..7399327 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -193,7 +193,7 @@
 	select HAVE_DYNAMIC_FTRACE_WITH_ARGS	if X86_64
 	select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	select HAVE_SAMPLE_FTRACE_DIRECT	if X86_64
-	select HAVE_SAMPLE_FTRACE_MULTI_DIRECT	if X86_64
+	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI	if X86_64
 	select HAVE_EBPF_JIT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_EISA
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 0b6b277..fd2ee94 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -226,7 +226,7 @@
 	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
 		warn_bad_vsyscall(KERN_DEBUG, regs,
 				  "seccomp tried to change syscall nr or ip");
-		force_fatal_sig(SIGSYS);
+		force_exit_sig(SIGSYS);
 		return true;
 	}
 	regs->orig_ax = -1;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6039644..ec6444f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2211,7 +2211,6 @@
 	/* must not have branches... */
 	local_irq_save(flags);
 	__intel_pmu_disable_all(false); /* we don't care about BTS */
-	__intel_pmu_pebs_disable_all();
 	__intel_pmu_lbr_disable();
 	/*            ... until here */
 	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
@@ -2225,7 +2224,6 @@
 	/* must not have branches... */
 	local_irq_save(flags);
 	__intel_pmu_disable_all(false); /* we don't care about BTS */
-	__intel_pmu_pebs_disable_all();
 	__intel_pmu_arch_lbr_disable();
 	/*            ... until here */
 	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
@@ -3048,8 +3046,10 @@
 {
 	struct event_constraint *c = &vlbr_constraint;
 
-	if (unlikely(constraint_match(c, event->hw.config)))
+	if (unlikely(constraint_match(c, event->hw.config))) {
+		event->hw.flags |= c->flags;
 		return c;
+	}
 
 	return NULL;
 }
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6b72e9b..8043213 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -265,6 +265,8 @@
 
 	cpuc->last_task_ctx = NULL;
 	cpuc->last_log_id = 0;
+	if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
+		wrmsrl(MSR_LBR_SELECT, 0);
 }
 
 /*
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index eb2c6ce..3660f69 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3608,6 +3608,9 @@
 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 	struct extra_reg *er;
 	int idx = 0;
+	/* Any of the CHA events may be filtered by Thread/Core-ID.*/
+	if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN)
+		idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID;
 
 	for (er = skx_uncore_cha_extra_regs; er->msr; er++) {
 		if (er->event != (event->hw.config & er->config_mask))
@@ -3675,6 +3678,7 @@
 	UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
 	UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
 	UNCORE_EVENT_CONSTRAINT(0xd4, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
 	EVENT_CONSTRAINT_END
 };
 
@@ -4525,6 +4529,13 @@
 	pmu_iio_cleanup_mapping(type, &snr_iio_mapping_group);
 }
 
+static struct event_constraint snr_uncore_iio_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
+	EVENT_CONSTRAINT_END
+};
+
 static struct intel_uncore_type snr_uncore_iio = {
 	.name			= "iio",
 	.num_counters		= 4,
@@ -4536,6 +4547,7 @@
 	.event_mask_ext		= SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
 	.box_ctl		= SNR_IIO_MSR_PMON_BOX_CTL,
 	.msr_offset		= SNR_IIO_MSR_OFFSET,
+	.constraints		= snr_uncore_iio_constraints,
 	.ops			= &ivbep_uncore_msr_ops,
 	.format_group		= &snr_uncore_iio_format_group,
 	.attr_update		= snr_iio_attr_update,
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 24f4a06..96eb7db 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -177,6 +177,9 @@
 		return;
 	}
 
+	if (!hv_vp_index)
+		return;
+
 	hv_reenlightenment_cb = cb;
 
 	/* Make sure callback is registered before we write to MSRs */
@@ -383,20 +386,13 @@
  */
 void __init hyperv_init(void)
 {
-	u64 guest_id, required_msrs;
+	u64 guest_id;
 	union hv_x64_msr_hypercall_contents hypercall_msr;
 	int cpuhp;
 
 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
 		return;
 
-	/* Absolutely required MSRs */
-	required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
-		HV_MSR_VP_INDEX_AVAILABLE;
-
-	if ((ms_hyperv.features & required_msrs) != required_msrs)
-		return;
-
 	if (hv_common_init())
 		return;
 
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 6053674..c2767a6a 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -102,12 +102,6 @@
  */
 extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
 
-/*
- * Tasks that are not using SVA have mm->pasid set to zero to note that they
- * will not have the valid bit set in MSR_IA32_PASID while they are running.
- */
-#define PASID_DISABLED	0
-
 /* Trap handling */
 extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
 extern void fpu_sync_fpstate(struct fpu *fpu);
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
index 79f95d3..9656a5b 100644
--- a/arch/x86/include/asm/fpu/xcr.h
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -3,6 +3,7 @@
 #define _ASM_X86_FPU_XCR_H
 
 #define XCR_XFEATURE_ENABLED_MASK	0x00000000
+#define XCR_XFEATURE_IN_USE_MASK	0x00000001
 
 static inline u64 xgetbv(u32 index)
 {
@@ -20,4 +21,15 @@
 	asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
 }
 
+/*
+ * Return a mask of xfeatures which are currently being tracked
+ * by the processor as being in the initial configuration.
+ *
+ * Callers should check X86_FEATURE_XGETBV1.
+ */
+static inline u64 xfeatures_in_use(void)
+{
+	return xgetbv(XCR_XFEATURE_IN_USE_MASK);
+}
+
 #endif /* _ASM_X86_FPU_XCR_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 0f8b90a..cd3dd17 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -92,6 +92,13 @@
 #define XFEATURE_MASK_FPSTATE	(XFEATURE_MASK_USER_RESTORE | \
 				 XFEATURE_MASK_SUPERVISOR_SUPPORTED)
 
+/*
+ * Features in this mask have space allocated in the signal frame, but may not
+ * have that space initialized when the feature is in its init state.
+ */
+#define XFEATURE_MASK_SIGFRAME_INITOPT	(XFEATURE_MASK_XTILE | \
+					 XFEATURE_MASK_USER_DYNAMIC)
+
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
 extern void __init update_regset_xstate_info(unsigned int size,
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 2715843..5a0bcf8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -108,6 +108,8 @@
 #define INTEL_FAM6_ALDERLAKE		0x97	/* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L		0x9A	/* Golden Cove / Gracemont */
 
+#define INTEL_FAM6_RAPTOR_LAKE		0xB7
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2acf37c..6ac61f8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -38,7 +38,6 @@
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
 
 #define KVM_MAX_VCPUS 1024
-#define KVM_SOFT_MAX_VCPUS 710
 
 /*
  * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
@@ -364,6 +363,7 @@
 		unsigned int cr4_smap:1;
 		unsigned int cr4_smep:1;
 		unsigned int cr4_la57:1;
+		unsigned int efer_lma:1;
 	};
 };
 
@@ -725,6 +725,7 @@
 
 	int cpuid_nent;
 	struct kvm_cpuid_entry2 *cpuid_entries;
+	u32 kvm_cpuid_base;
 
 	u64 reserved_gpa_bits;
 	int maxphyaddr;
@@ -748,7 +749,7 @@
 		u8 preempted;
 		u64 msr_val;
 		u64 last_steal;
-		struct gfn_to_pfn_cache cache;
+		struct gfn_to_hva_cache cache;
 	} st;
 
 	u64 l1_tsc_offset;
@@ -1034,6 +1035,7 @@
 #define APICV_INHIBIT_REASON_IRQWIN     3
 #define APICV_INHIBIT_REASON_PIT_REINJ  4
 #define APICV_INHIBIT_REASON_X2APIC	5
+#define APICV_INHIBIT_REASON_BLOCKIRQ	6
 
 struct kvm_arch {
 	unsigned long n_used_mmu_pages;
@@ -1476,6 +1478,7 @@
 	int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
 	int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
 	int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+	int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
 
 	int (*get_msr_feature)(struct kvm_msr_entry *entry);
 
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 6929987..56935eb 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -83,6 +83,18 @@
 	return ret;
 }
 
+static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
+				      unsigned long p2, unsigned long p3)
+{
+	long ret;
+
+	asm volatile("vmmcall"
+		     : "=a"(ret)
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+		     : "memory");
+	return ret;
+}
+
 #ifdef CONFIG_KVM_GUEST
 void kvmclock_init(void);
 void kvmclock_disable(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 2d4f5c1..e2c6f43 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -44,6 +44,8 @@
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
+					    bool enc);
 
 void __init mem_encrypt_free_decrypted_mem(void);
 
@@ -78,6 +80,8 @@
 early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void __init
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
 
 static inline void mem_encrypt_free_decrypted_mem(void) { }
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cebec95..21c4a69 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -97,6 +97,12 @@
 	PVOP_VCALL1(mmu.exit_mmap, mm);
 }
 
+static inline void notify_page_enc_status_changed(unsigned long pfn,
+						  int npages, bool enc)
+{
+	PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
+}
+
 #ifdef CONFIG_PARAVIRT_XXL
 static inline void load_sp0(unsigned long sp0)
 {
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index fc1151e..a69012e 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -168,6 +168,7 @@
 
 	/* Hook for intercepting the destruction of an mm_struct. */
 	void (*exit_mmap)(struct mm_struct *mm);
+	void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
 
 #ifdef CONFIG_PARAVIRT_XXL
 	struct paravirt_callee_save read_cr2;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 191878a..355d38c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -806,11 +806,14 @@
 static inline u32 amd_get_highest_perf(void)		{ return 0; }
 #endif
 
+#define for_each_possible_hypervisor_cpuid_base(function) \
+	for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 {
 	uint32_t base, eax, signature[3];
 
-	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+	for_each_possible_hypervisor_cpuid_base(base) {
 		cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
 
 		if (!memcmp(sig, signature, 12) &&
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 43fa081..8726175 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -83,6 +83,7 @@
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 bool kernel_page_present(struct page *page);
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
 
 extern int kernel_set_to_readonly;
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 08b0e90..81a0211 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -126,6 +126,7 @@
 
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
+void smp_prepare_cpus_common(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index cbb67b6..39ebe05 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -27,6 +27,7 @@
 	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
 	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
 	    insns "						\n"	\
+	    ".byte 0x53, 0x43, 0x54				\n"	\
 	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
 	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
 	    ".popsection					\n")
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 0575f58..e5e0fe1 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -281,13 +281,13 @@
 	return _hypercall2(int, callback_op, cmd, arg);
 }
 
-static inline int
+static __always_inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
 }
 
-static inline unsigned long
+static __always_inline unsigned long
 HYPERVISOR_get_debugreg(int reg)
 {
 	return _hypercall1(unsigned long, get_debugreg, reg);
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 4957f59..5adab89 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -64,6 +64,7 @@
 
 #ifdef CONFIG_PVH
 void __init xen_pvh_init(struct boot_params *boot_params);
+void __init mem_map_via_hcall(struct boot_params *boot_params_p);
 #endif
 
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 5146bba..6e64b27b 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -8,6 +8,7 @@
  * should be used to determine that a VM is running under KVM.
  */
 #define KVM_CPUID_SIGNATURE	0x40000000
+#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
 
 /* This CPUID returns two feature bitmaps in eax, edx. Before enabling
  * a particular paravirtualization, the appropriate feature bit should
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index cb2fdd13..c881bca 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -76,6 +76,7 @@
 	{ X86_FEATURE_SGX1,			X86_FEATURE_SGX       },
 	{ X86_FEATURE_SGX2,			X86_FEATURE_SGX1      },
 	{ X86_FEATURE_XFD,			X86_FEATURE_XSAVES    },
+	{ X86_FEATURE_XFD,			X86_FEATURE_XGETBV1   },
 	{ X86_FEATURE_AMX_TILE,			X86_FEATURE_XFD       },
 	{}
 };
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index acfd5d9..bb9a46a 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -547,12 +547,13 @@
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
-	/* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
+	/* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
 	if ((c->x86 == 6) &&
 	    ((c->x86_model == INTEL_FAM6_HASWELL) ||
 	     (c->x86_model == INTEL_FAM6_HASWELL_L) ||
 	     (c->x86_model == INTEL_FAM6_BROADWELL) ||
-	     (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
+	     (c->x86_model == INTEL_FAM6_HASWELL_G) ||
+	     (c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
 	    (m->bank == 0) &&
 	    ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
 		return true;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4794b71..ff55df6 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -163,12 +163,22 @@
 	cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
 	      &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
 
-	if (eax >= HYPERV_CPUID_MIN &&
-	    eax <= HYPERV_CPUID_MAX &&
-	    !memcmp("Microsoft Hv", hyp_signature, 12))
-		return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+	if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
+	    memcmp("Microsoft Hv", hyp_signature, 12))
+		return 0;
 
-	return 0;
+	/* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
+	eax = cpuid_eax(HYPERV_CPUID_FEATURES);
+	if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
+		pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
+		return 0;
+	}
+	if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
+		pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
+		return 0;
+	}
+
+	return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
 }
 
 static unsigned char hv_get_nmi_reason(void)
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 63d3de0..8471a8b 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -28,8 +28,7 @@
 static LIST_HEAD(sgx_active_page_list);
 static DEFINE_SPINLOCK(sgx_reclaimer_lock);
 
-/* The free page list lock protected variables prepend the lock. */
-static unsigned long sgx_nr_free_pages;
+static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
 
 /* Nodes with one or more EPC sections. */
 static nodemask_t sgx_numa_mask;
@@ -403,14 +402,15 @@
 
 		spin_lock(&node->lock);
 		list_add_tail(&epc_page->list, &node->free_page_list);
-		sgx_nr_free_pages++;
 		spin_unlock(&node->lock);
+		atomic_long_inc(&sgx_nr_free_pages);
 	}
 }
 
 static bool sgx_should_reclaim(unsigned long watermark)
 {
-	return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
+	return atomic_long_read(&sgx_nr_free_pages) < watermark &&
+	       !list_empty(&sgx_active_page_list);
 }
 
 static int ksgxd(void *p)
@@ -471,9 +471,9 @@
 
 	page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
 	list_del_init(&page->list);
-	sgx_nr_free_pages--;
 
 	spin_unlock(&node->lock);
+	atomic_long_dec(&sgx_nr_free_pages);
 
 	return page;
 }
@@ -625,9 +625,9 @@
 	spin_lock(&node->lock);
 
 	list_add_tail(&page->list, &node->free_page_list);
-	sgx_nr_free_pages++;
 
 	spin_unlock(&node->lock);
+	atomic_long_inc(&sgx_nr_free_pages);
 }
 
 static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index e18210d..86ea7c0 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -4,6 +4,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/fpu/xstate.h>
+#include <asm/fpu/xcr.h>
 
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(u64, xfd_state);
@@ -199,6 +200,32 @@
 }
 
 /*
+ * XSAVE itself always writes all requested xfeatures.  Removing features
+ * from the request bitmap reduces the features which are written.
+ * Generate a mask of features which must be written to a sigframe.  The
+ * unset features can be optimized away and not written.
+ *
+ * This optimization is user-visible.  Only use for states where
+ * uninitialized sigframe contents are tolerable, like dynamic features.
+ *
+ * Users of buffers produced with this optimization must check XSTATE_BV
+ * to determine which features have been optimized out.
+ */
+static inline u64 xfeatures_need_sigframe_write(void)
+{
+	u64 xfeaures_to_write;
+
+	/* In-use features must be written: */
+	xfeaures_to_write = xfeatures_in_use();
+
+	/* Also write all non-optimizable sigframe features: */
+	xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
+			     ~XFEATURE_MASK_SIGFRAME_INITOPT;
+
+	return xfeaures_to_write;
+}
+
+/*
  * Save xstate to user space xsave area.
  *
  * We don't use modified optimization because xrstor/xrstors might track
@@ -220,10 +247,16 @@
 	 */
 	struct fpstate *fpstate = current->thread.fpu.fpstate;
 	u64 mask = fpstate->user_xfeatures;
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
+	u32 lmask;
+	u32 hmask;
 	int err;
 
+	/* Optimize away writing unnecessary xfeatures: */
+	if (fpu_state_size_dynamic())
+		mask &= xfeatures_need_sigframe_write();
+
+	lmask = mask;
+	hmask = mask >> 32;
 	xfd_validate_state(fpstate, mask, false);
 
 	stac();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8863d19..59abbda 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -28,6 +28,7 @@
 #include <linux/swait.h>
 #include <linux/syscore_ops.h>
 #include <linux/cc_platform.h>
+#include <linux/efi.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -41,6 +42,7 @@
 #include <asm/ptrace.h>
 #include <asm/reboot.h>
 #include <asm/svm.h>
+#include <asm/e820/api.h>
 
 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
 
@@ -434,6 +436,8 @@
 	kvm_disable_steal_time();
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+	if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+		wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
 	kvm_pv_disable_apf();
 	if (!shutdown)
 		apf_task_wake_all();
@@ -548,6 +552,55 @@
 	__send_ipi_mask(local_mask, vector);
 }
 
+static int __init setup_efi_kvm_sev_migration(void)
+{
+	efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
+	efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
+	efi_status_t status;
+	unsigned long size;
+	bool enabled;
+
+	if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
+	    !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+		return 0;
+
+	if (!efi_enabled(EFI_BOOT))
+		return 0;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+		pr_info("%s : EFI runtime services are not enabled\n", __func__);
+		return 0;
+	}
+
+	size = sizeof(enabled);
+
+	/* Get variable contents into buffer */
+	status = efi.get_variable(efi_sev_live_migration_enabled,
+				  &efi_variable_guid, NULL, &size, &enabled);
+
+	if (status == EFI_NOT_FOUND) {
+		pr_info("%s : EFI live migration variable not found\n", __func__);
+		return 0;
+	}
+
+	if (status != EFI_SUCCESS) {
+		pr_info("%s : EFI variable retrieval failed\n", __func__);
+		return 0;
+	}
+
+	if (enabled == 0) {
+		pr_info("%s: live migration disabled in EFI\n", __func__);
+		return 0;
+	}
+
+	pr_info("%s : live migration enabled in EFI\n", __func__);
+	wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
+
+	return 1;
+}
+
+late_initcall(setup_efi_kvm_sev_migration);
+
 /*
  * Set the IPI entry points
  */
@@ -756,7 +809,7 @@
 		return 0;	/* So we don't blow up on old processors */
 
 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
-		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
+		return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
 
 	return 0;
 }
@@ -806,8 +859,62 @@
 	return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
 }
 
+static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
+{
+	kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
+			   KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+}
+
 static void __init kvm_init_platform(void)
 {
+	if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+	    kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
+		unsigned long nr_pages;
+		int i;
+
+		pv_ops.mmu.notify_page_enc_status_changed =
+			kvm_sev_hc_page_enc_status;
+
+		/*
+		 * Reset the host's shared pages list related to kernel
+		 * specific page encryption status settings before we load a
+		 * new kernel by kexec. Reset the page encryption status
+		 * during early boot intead of just before kexec to avoid SMP
+		 * races during kvm_pv_guest_cpu_reboot().
+		 * NOTE: We cannot reset the complete shared pages list
+		 * here as we need to retain the UEFI/OVMF firmware
+		 * specific settings.
+		 */
+
+		for (i = 0; i < e820_table->nr_entries; i++) {
+			struct e820_entry *entry = &e820_table->entries[i];
+
+			if (entry->type != E820_TYPE_RAM)
+				continue;
+
+			nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
+
+			kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
+				       nr_pages,
+				       KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+		}
+
+		/*
+		 * Ensure that _bss_decrypted section is marked as decrypted in the
+		 * shared pages list.
+		 */
+		nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
+					PAGE_SIZE);
+		early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
+						nr_pages, 0);
+
+		/*
+		 * If not booted using EFI, enable Live migration support.
+		 */
+		if (!efi_enabled(EFI_BOOT))
+			wrmsrl(MSR_KVM_MIGRATION_CONTROL,
+			       KVM_MIGRATION_READY);
+	}
 	kvmclock_init();
 	x86_platform.apic_post_init = kvm_apic_init;
 }
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7157c2d..7f7636a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -337,6 +337,7 @@
 			(void (*)(struct mmu_gather *, void *))tlb_remove_page,
 
 	.mmu.exit_mmap		= paravirt_nop,
+	.mmu.notify_page_enc_status_changed	= paravirt_nop,
 
 #ifdef CONFIG_PARAVIRT_XXL
 	.mmu.read_cr2		= __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e9ee8b5..04143a6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -964,6 +964,9 @@
 	struct unwind_state state;
 	unsigned long addr = 0;
 
+	if (!try_get_task_stack(p))
+		return 0;
+
 	for (unwind_start(&state, p, NULL, NULL); !unwind_done(&state);
 	     unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
@@ -974,6 +977,8 @@
 		break;
 	}
 
+	put_task_stack(p);
+
 	return addr;
 }
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 49b596d..6a190c7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -742,6 +742,28 @@
 	return 0;
 }
 
+static char * __init prepare_command_line(void)
+{
+#ifdef CONFIG_CMDLINE_BOOL
+#ifdef CONFIG_CMDLINE_OVERRIDE
+	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+#else
+	if (builtin_cmdline[0]) {
+		/* append boot loader cmdline to builtin */
+		strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
+		strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
+	}
+#endif
+#endif
+
+	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+
+	parse_early_param();
+
+	return command_line;
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -831,6 +853,23 @@
 	x86_init.oem.arch_setup();
 
 	/*
+	 * x86_configure_nx() is called before parse_early_param() (called by
+	 * prepare_command_line()) to detect whether hardware doesn't support
+	 * NX (so that the early EHCI debug console setup can safely call
+	 * set_fixmap()). It may then be called again from within noexec_setup()
+	 * during parsing early parameters to honor the respective command line
+	 * option.
+	 */
+	x86_configure_nx();
+
+	/*
+	 * This parses early params and it needs to run before
+	 * early_reserve_memory() because latter relies on such settings
+	 * supplied as early params.
+	 */
+	*cmdline_p = prepare_command_line();
+
+	/*
 	 * Do some memory reservations *before* memory is added to memblock, so
 	 * memblock allocations won't overwrite it.
 	 *
@@ -863,33 +902,6 @@
 	bss_resource.start = __pa_symbol(__bss_start);
 	bss_resource.end = __pa_symbol(__bss_stop)-1;
 
-#ifdef CONFIG_CMDLINE_BOOL
-#ifdef CONFIG_CMDLINE_OVERRIDE
-	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
-	if (builtin_cmdline[0]) {
-		/* append boot loader cmdline to builtin */
-		strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
-		strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
-		strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-	}
-#endif
-#endif
-
-	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-	*cmdline_p = command_line;
-
-	/*
-	 * x86_configure_nx() is called before parse_early_param() to detect
-	 * whether hardware doesn't support NX (so that the early EHCI debug
-	 * console setup can safely call set_fixmap()). It may then be called
-	 * again from within noexec_setup() during parsing early parameters
-	 * to honor the respective command line option.
-	 */
-	x86_configure_nx();
-
-	parse_early_param();
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/*
 	 * Memory used by the kernel cannot be hot-removed because Linux
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8241927..ac2909f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1350,12 +1350,7 @@
 		cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 }
 
-/*
- * Prepare for SMP bootup.
- * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
- *            for common interface support.
- */
-void __init native_smp_prepare_cpus(unsigned int max_cpus)
+void __init smp_prepare_cpus_common(void)
 {
 	unsigned int i;
 
@@ -1386,6 +1381,17 @@
 	set_sched_topology(x86_topology);
 
 	set_cpu_sibling_map(0);
+}
+
+/*
+ * Prepare for SMP bootup.
+ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ *            for common interface support.
+ */
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
+{
+	smp_prepare_cpus_common();
+
 	init_freq_invariance(false, false);
 	smp_sanity_check();
 
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ea028e7..9c407a3 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -56,10 +56,15 @@
 	text_poke_bp(insn, code, size, emulate);
 }
 
-static void __static_call_validate(void *insn, bool tail)
+static void __static_call_validate(void *insn, bool tail, bool tramp)
 {
 	u8 opcode = *(u8 *)insn;
 
+	if (tramp && memcmp(insn+5, "SCT", 3)) {
+		pr_err("trampoline signature fail");
+		BUG();
+	}
+
 	if (tail) {
 		if (opcode == JMP32_INSN_OPCODE ||
 		    opcode == RET_INSN_OPCODE)
@@ -74,7 +79,8 @@
 	/*
 	 * If we ever trigger this, our text is corrupt, we'll probably not live long.
 	 */
-	WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+	pr_err("unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+	BUG();
 }
 
 static inline enum insn_type __sc_insn(bool null, bool tail)
@@ -97,12 +103,12 @@
 	mutex_lock(&text_mutex);
 
 	if (tramp) {
-		__static_call_validate(tramp, true);
+		__static_call_validate(tramp, true, true);
 		__static_call_transform(tramp, __sc_insn(!func, true), func);
 	}
 
 	if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
-		__static_call_validate(site, tail);
+		__static_call_validate(site, tail, false);
 		__static_call_transform(site, __sc_insn(!func, tail), func);
 	}
 
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index f14f69d..c21bcd6 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -106,7 +106,7 @@
 	 */
 	local_irq_enable();
 
-	BUG_ON(!vm86 || !vm86->user_vm86);
+	BUG_ON(!vm86);
 
 	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
 	user = vm86->user_vm86;
@@ -160,7 +160,7 @@
 	user_access_end();
 Efault:
 	pr_alert("could not access userspace vm86 info\n");
-	force_fatal_sig(SIGSEGV);
+	force_exit_sig(SIGSEGV);
 	goto exit_vm86;
 }
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2d70edb..07e9215 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -99,11 +99,45 @@
 	return 0;
 }
 
+static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
+{
+	u32 function;
+	struct kvm_cpuid_entry2 *entry;
+
+	vcpu->arch.kvm_cpuid_base = 0;
+
+	for_each_possible_hypervisor_cpuid_base(function) {
+		entry = kvm_find_cpuid_entry(vcpu, function, 0);
+
+		if (entry) {
+			u32 signature[3];
+
+			signature[0] = entry->ebx;
+			signature[1] = entry->ecx;
+			signature[2] = entry->edx;
+
+			BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
+			if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
+				vcpu->arch.kvm_cpuid_base = function;
+				break;
+			}
+		}
+	}
+}
+
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+{
+	u32 base = vcpu->arch.kvm_cpuid_base;
+
+	if (!base)
+		return NULL;
+
+	return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
+}
+
 void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
 {
-	struct kvm_cpuid_entry2 *best;
-
-	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+	struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
 
 	/*
 	 * save the feature bitmap to avoid cpuid lookup for every PV
@@ -142,7 +176,7 @@
 		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
-	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+	best = kvm_find_kvm_cpuid_features(vcpu);
 	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
 		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
 		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
@@ -239,6 +273,26 @@
 	return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
 }
 
+static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
+                        int nent)
+{
+    int r;
+
+    r = kvm_check_cpuid(e2, nent);
+    if (r)
+        return r;
+
+    kvfree(vcpu->arch.cpuid_entries);
+    vcpu->arch.cpuid_entries = e2;
+    vcpu->arch.cpuid_nent = nent;
+
+    kvm_update_kvm_cpuid_base(vcpu);
+    kvm_update_cpuid_runtime(vcpu);
+    kvm_vcpu_after_set_cpuid(vcpu);
+
+    return 0;
+}
+
 /* when an old userspace process fills a new kernel module */
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			     struct kvm_cpuid *cpuid,
@@ -275,18 +329,9 @@
 		e2[i].padding[2] = 0;
 	}
 
-	r = kvm_check_cpuid(e2, cpuid->nent);
-	if (r) {
+	r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+	if (r)
 		kvfree(e2);
-		goto out_free_cpuid;
-	}
-
-	kvfree(vcpu->arch.cpuid_entries);
-	vcpu->arch.cpuid_entries = e2;
-	vcpu->arch.cpuid_nent = cpuid->nent;
-
-	kvm_update_cpuid_runtime(vcpu);
-	kvm_vcpu_after_set_cpuid(vcpu);
 
 out_free_cpuid:
 	kvfree(e);
@@ -310,20 +355,11 @@
 			return PTR_ERR(e2);
 	}
 
-	r = kvm_check_cpuid(e2, cpuid->nent);
-	if (r) {
+	r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+	if (r)
 		kvfree(e2);
-		return r;
-	}
 
-	kvfree(vcpu->arch.cpuid_entries);
-	vcpu->arch.cpuid_entries = e2;
-	vcpu->arch.cpuid_nent = cpuid->nent;
-
-	kvm_update_cpuid_runtime(vcpu);
-	kvm_vcpu_after_set_cpuid(vcpu);
-
-	return 0;
+	return r;
 }
 
 int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
@@ -871,8 +907,7 @@
 		}
 		break;
 	case KVM_CPUID_SIGNATURE: {
-		static const char signature[12] = "KVMKVMKVM\0\0";
-		const u32 *sigptr = (const u32 *)signature;
+		const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
 		entry->eax = KVM_CPUID_FEATURES;
 		entry->ebx = sigptr[0];
 		entry->ecx = sigptr[1];
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f15c01..5e19e6e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1472,7 +1472,7 @@
 
 		if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
 			hv_vcpu->hv_vapic = data;
-			if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
+			if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
 				return 1;
 			break;
 		}
@@ -1490,7 +1490,7 @@
 			return 1;
 		hv_vcpu->hv_vapic = data;
 		kvm_vcpu_mark_page_dirty(vcpu, gfn);
-		if (kvm_lapic_enable_pv_eoi(vcpu,
+		if (kvm_lapic_set_pv_eoi(vcpu,
 					    gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
 					    sizeof(struct hv_vp_assist_page)))
 			return 1;
@@ -2022,7 +2022,7 @@
 {
 	bool longmode;
 
-	longmode = is_64_bit_mode(vcpu);
+	longmode = is_64_bit_hypercall(vcpu);
 	if (longmode)
 		kvm_rax_write(vcpu, result);
 	else {
@@ -2171,7 +2171,7 @@
 	}
 
 #ifdef CONFIG_X86_64
-	if (is_64_bit_mode(vcpu)) {
+	if (is_64_bit_hypercall(vcpu)) {
 		hc.param = kvm_rcx_read(vcpu);
 		hc.ingpa = kvm_rdx_read(vcpu);
 		hc.outgpa = kvm_r8_read(vcpu);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index e66e620..539333a 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -81,7 +81,6 @@
 	unsigned long irq_states[IOAPIC_NUM_PINS];
 	struct kvm_io_device dev;
 	struct kvm *kvm;
-	void (*ack_notifier)(void *opaque, int irq);
 	spinlock_t lock;
 	struct rtc_status rtc_status;
 	struct delayed_work eoi_inject;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 650642b..c2d7cfe 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -56,7 +56,6 @@
 	struct kvm_io_device dev_master;
 	struct kvm_io_device dev_slave;
 	struct kvm_io_device dev_elcr;
-	void (*ack_notifier)(void *opaque, int irq);
 	unsigned long irq_states[PIC_NUM_PINS];
 };
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d6ac32f..f206fc3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -707,7 +707,7 @@
 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 {
 	int highest_irr;
-	if (apic->vcpu->arch.apicv_active)
+	if (kvm_x86_ops.sync_pir_to_irr)
 		highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
 	else
 		highest_irr = apic_find_highest_irr(apic);
@@ -2856,25 +2856,30 @@
 	return 0;
 }
 
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
 {
 	u64 addr = data & ~KVM_MSR_ENABLED;
 	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
 	unsigned long new_len;
+	int ret;
 
 	if (!IS_ALIGNED(addr, 4))
 		return 1;
 
+	if (data & KVM_MSR_ENABLED) {
+		if (addr == ghc->gpa && len <= ghc->len)
+			new_len = ghc->len;
+		else
+			new_len = len;
+
+		ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+		if (ret)
+			return ret;
+	}
+
 	vcpu->arch.pv_eoi.msr_val = data;
-	if (!pv_eoi_enabled(vcpu))
-		return 0;
 
-	if (addr == ghc->gpa && len <= ghc->len)
-		new_len = ghc->len;
-	else
-		new_len = len;
-
-	return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+	return 0;
 }
 
 int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d7c25d0..2b44e53 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -127,7 +127,7 @@
 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
 void kvm_lapic_exit(void);
 
 #define VEC_POS(v) ((v) & (32 - 1))
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 323b505..6354297 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1582,7 +1582,7 @@
 		flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
-		flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
+		flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
 
 	return flush;
 }
@@ -2173,10 +2173,10 @@
 	iterator->shadow_addr = root;
 	iterator->level = vcpu->arch.mmu->shadow_root_level;
 
-	if (iterator->level == PT64_ROOT_4LEVEL &&
+	if (iterator->level >= PT64_ROOT_4LEVEL &&
 	    vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
 	    !vcpu->arch.mmu->direct_map)
-		--iterator->level;
+		iterator->level = PT32E_ROOT_LEVEL;
 
 	if (iterator->level == PT32E_ROOT_LEVEL) {
 		/*
@@ -3191,17 +3191,17 @@
 			new_spte |= PT_WRITABLE_MASK;
 
 			/*
-			 * Do not fix write-permission on the large spte.  Since
-			 * we only dirty the first page into the dirty-bitmap in
+			 * Do not fix write-permission on the large spte when
+			 * dirty logging is enabled. Since we only dirty the
+			 * first page into the dirty-bitmap in
 			 * fast_pf_fix_direct_spte(), other pages are missed
 			 * if its slot has dirty logging enabled.
 			 *
 			 * Instead, we let the slow page fault path create a
 			 * normal spte to fix the access.
-			 *
-			 * See the comments in kvm_arch_commit_memory_region().
 			 */
-			if (sp->role.level > PG_LEVEL_4K)
+			if (sp->role.level > PG_LEVEL_4K &&
+			    kvm_slot_dirty_track_enabled(fault->slot))
 				break;
 		}
 
@@ -4682,6 +4682,7 @@
 		/* PKEY and LA57 are active iff long mode is active. */
 		ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
 		ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
+		ext.efer_lma = ____is_efer_lma(regs);
 	}
 
 	ext.valid = 1;
@@ -4854,7 +4855,7 @@
 	struct kvm_mmu *context = &vcpu->arch.guest_mmu;
 	struct kvm_mmu_role_regs regs = {
 		.cr0 = cr0,
-		.cr4 = cr4,
+		.cr4 = cr4 & ~X86_CR4_PKE,
 		.efer = efer,
 	};
 	union kvm_mmu_role new_role;
@@ -4918,7 +4919,7 @@
 	context->direct_map = false;
 
 	update_permission_bitmask(context, true);
-	update_pkru_bitmask(context);
+	context->pkru_mask = 0;
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
 	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
@@ -5024,6 +5025,14 @@
 	/*
 	 * Invalidate all MMU roles to force them to reinitialize as CPUID
 	 * information is factored into reserved bit calculations.
+	 *
+	 * Correctly handling multiple vCPU models with respect to paging and
+	 * physical address properties) in a single VM would require tracking
+	 * all relevant CPUID information in kvm_mmu_page_role. That is very
+	 * undesirable as it would increase the memory requirements for
+	 * gfn_track (see struct kvm_mmu_page_role comments).  For now that
+	 * problem is swept under the rug; KVM's CPUID API is horrific and
+	 * it's all but impossible to solve it without introducing a new API.
 	 */
 	vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
 	vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
@@ -5031,24 +5040,10 @@
 	kvm_mmu_reset_context(vcpu);
 
 	/*
-	 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
-	 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
-	 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
-	 * faults due to reusing SPs/SPTEs.  Alert userspace, but otherwise
-	 * sweep the problem under the rug.
-	 *
-	 * KVM's horrific CPUID ABI makes the problem all but impossible to
-	 * solve, as correctly handling multiple vCPU models (with respect to
-	 * paging and physical address properties) in a single VM would require
-	 * tracking all relevant CPUID information in kvm_mmu_page_role.  That
-	 * is very undesirable as it would double the memory requirements for
-	 * gfn_track (see struct kvm_mmu_page_role comments), and in practice
-	 * no sane VMM mucks with the core vCPU model on the fly.
+	 * Changing guest CPUID after KVM_RUN is forbidden, see the comment in
+	 * kvm_arch_vcpu_ioctl().
 	 */
-	if (vcpu->arch.last_vmentry_cpu != -1) {
-		pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
-		pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
-	}
+	KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
 }
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -5368,7 +5363,7 @@
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
+	kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
 	++vcpu->stat.invlpg;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -5853,8 +5848,6 @@
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 				   const struct kvm_memory_slot *slot)
 {
-	bool flush = false;
-
 	if (kvm_memslots_have_rmaps(kvm)) {
 		write_lock(&kvm->mmu_lock);
 		/*
@@ -5862,17 +5855,14 @@
 		 * logging at a 4k granularity and never creates collapsible
 		 * 2m SPTEs during dirty logging.
 		 */
-		flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
-		if (flush)
+		if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
 			kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
 		write_unlock(&kvm->mmu_lock);
 	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
 		read_lock(&kvm->mmu_lock);
-		flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
-		if (flush)
-			kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+		kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
 		read_unlock(&kvm->mmu_lock);
 	}
 }
@@ -6181,23 +6171,46 @@
 	mmu_audit_disable();
 }
 
+/*
+ * Calculate the effective recovery period, accounting for '0' meaning "let KVM
+ * select a halving time of 1 hour".  Returns true if recovery is enabled.
+ */
+static bool calc_nx_huge_pages_recovery_period(uint *period)
+{
+	/*
+	 * Use READ_ONCE to get the params, this may be called outside of the
+	 * param setters, e.g. by the kthread to compute its next timeout.
+	 */
+	bool enabled = READ_ONCE(nx_huge_pages);
+	uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+
+	if (!enabled || !ratio)
+		return false;
+
+	*period = READ_ONCE(nx_huge_pages_recovery_period_ms);
+	if (!*period) {
+		/* Make sure the period is not less than one second.  */
+		ratio = min(ratio, 3600u);
+		*period = 60 * 60 * 1000 / ratio;
+	}
+	return true;
+}
+
 static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
 {
 	bool was_recovery_enabled, is_recovery_enabled;
 	uint old_period, new_period;
 	int err;
 
-	was_recovery_enabled = nx_huge_pages_recovery_ratio;
-	old_period = nx_huge_pages_recovery_period_ms;
+	was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
 
 	err = param_set_uint(val, kp);
 	if (err)
 		return err;
 
-	is_recovery_enabled = nx_huge_pages_recovery_ratio;
-	new_period = nx_huge_pages_recovery_period_ms;
+	is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period);
 
-	if (READ_ONCE(nx_huge_pages) && is_recovery_enabled &&
+	if (is_recovery_enabled &&
 	    (!was_recovery_enabled || old_period > new_period)) {
 		struct kvm *kvm;
 
@@ -6261,18 +6274,13 @@
 
 static long get_nx_lpage_recovery_timeout(u64 start_time)
 {
-	uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
-	uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
+	bool enabled;
+	uint period;
 
-	if (!period && ratio) {
-		/* Make sure the period is not less than one second.  */
-		ratio = min(ratio, 3600u);
-		period = 60 * 60 * 1000 / ratio;
-	}
+	enabled = calc_nx_huge_pages_recovery_period(&period);
 
-	return READ_ONCE(nx_huge_pages) && ratio
-		? start_time + msecs_to_jiffies(period) - get_jiffies_64()
-		: MAX_SCHEDULE_TIMEOUT;
+	return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
+		       : MAX_SCHEDULE_TIMEOUT;
 }
 
 static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7c5dd83..1db8496 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -317,9 +317,6 @@
 	struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
 	int level = sp->role.level;
 	gfn_t base_gfn = sp->gfn;
-	u64 old_child_spte;
-	u64 *sptep;
-	gfn_t gfn;
 	int i;
 
 	trace_kvm_mmu_prepare_zap_page(sp);
@@ -327,8 +324,9 @@
 	tdp_mmu_unlink_page(kvm, sp, shared);
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
-		sptep = rcu_dereference(pt) + i;
-		gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
+		u64 *sptep = rcu_dereference(pt) + i;
+		gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
+		u64 old_child_spte;
 
 		if (shared) {
 			/*
@@ -374,7 +372,7 @@
 				    shared);
 	}
 
-	kvm_flush_remote_tlbs_with_address(kvm, gfn,
+	kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
 					   KVM_PAGES_PER_HPAGE(level + 1));
 
 	call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
@@ -897,7 +895,7 @@
 					  struct kvm_page_fault *fault,
 					  struct tdp_iter *iter)
 {
-	struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
+	struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
 	u64 new_spte;
 	int ret = RET_PF_FIXED;
 	bool wrprot = false;
@@ -1033,9 +1031,9 @@
 {
 	struct kvm_mmu_page *root;
 
-	for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
-		flush |= zap_gfn_range(kvm, root, range->start, range->end,
-				       range->may_block, flush, false);
+	for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
+		flush = zap_gfn_range(kvm, root, range->start, range->end,
+				      range->may_block, flush, false);
 
 	return flush;
 }
@@ -1364,10 +1362,9 @@
  * Clear leaf entries which could be replaced by large mappings, for
  * GFNs within the slot.
  */
-static bool zap_collapsible_spte_range(struct kvm *kvm,
+static void zap_collapsible_spte_range(struct kvm *kvm,
 				       struct kvm_mmu_page *root,
-				       const struct kvm_memory_slot *slot,
-				       bool flush)
+				       const struct kvm_memory_slot *slot)
 {
 	gfn_t start = slot->base_gfn;
 	gfn_t end = start + slot->npages;
@@ -1378,10 +1375,8 @@
 
 	tdp_root_for_each_pte(iter, root, start, end) {
 retry:
-		if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
-			flush = false;
+		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
 			continue;
-		}
 
 		if (!is_shadow_present_pte(iter.old_spte) ||
 		    !is_last_spte(iter.old_spte, iter.level))
@@ -1393,6 +1388,7 @@
 							    pfn, PG_LEVEL_NUM))
 			continue;
 
+		/* Note, a successful atomic zap also does a remote TLB flush. */
 		if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
 			/*
 			 * The iter must explicitly re-read the SPTE because
@@ -1401,30 +1397,24 @@
 			iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
 			goto retry;
 		}
-		flush = true;
 	}
 
 	rcu_read_unlock();
-
-	return flush;
 }
 
 /*
  * Clear non-leaf entries (and free associated page tables) which could
  * be replaced by large mappings, for GFNs within the slot.
  */
-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
-				       const struct kvm_memory_slot *slot,
-				       bool flush)
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+				       const struct kvm_memory_slot *slot)
 {
 	struct kvm_mmu_page *root;
 
 	lockdep_assert_held_read(&kvm->mmu_lock);
 
 	for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
-		flush = zap_collapsible_spte_range(kvm, root, slot, flush);
-
-	return flush;
+		zap_collapsible_spte_range(kvm, root, slot);
 }
 
 /*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 476b133..3899004 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -64,9 +64,8 @@
 				       struct kvm_memory_slot *slot,
 				       gfn_t gfn, unsigned long mask,
 				       bool wrprot);
-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
-				       const struct kvm_memory_slot *slot,
-				       bool flush);
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+				       const struct kvm_memory_slot *slot);
 
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn,
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 0772bad..09873f6 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -319,7 +319,7 @@
 }
 
 /* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
 }
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 0e4f2b1..59d6b76 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -32,7 +32,7 @@
 	struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
 		unsigned int idx, u64 *mask);
 	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
-	int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
+	bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
 	bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
 	int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -149,7 +149,7 @@
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 8052d92..9d6066e 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -904,7 +904,8 @@
 			  BIT(APICV_INHIBIT_REASON_NESTED) |
 			  BIT(APICV_INHIBIT_REASON_IRQWIN) |
 			  BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
-			  BIT(APICV_INHIBIT_REASON_X2APIC);
+			  BIT(APICV_INHIBIT_REASON_X2APIC) |
+			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
 
 	return supported & BIT(bit);
 }
@@ -988,16 +989,18 @@
 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	int cpu = get_cpu();
 
+	WARN_ON(cpu != vcpu->cpu);
 	svm->avic_is_running = is_run;
 
-	if (!kvm_vcpu_apicv_active(vcpu))
-		return;
-
-	if (is_run)
-		avic_vcpu_load(vcpu, vcpu->cpu);
-	else
-		avic_vcpu_put(vcpu);
+	if (kvm_vcpu_apicv_active(vcpu)) {
+		if (is_run)
+			avic_vcpu_load(vcpu, cpu);
+		else
+			avic_vcpu_put(vcpu);
+	}
+	put_cpu();
 }
 
 void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index fdf587f..871c426 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -181,14 +181,13 @@
 	return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
 }
 
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
 	idx &= ~(3u << 30);
 
-	return (idx >= pmu->nr_arch_gp_counters);
+	return idx < pmu->nr_arch_gp_counters;
 }
 
 /* idx is the ECX register of RDPMC instruction */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1964b9a..59727a96 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -120,16 +120,26 @@
 	return true;
 }
 
+static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
+{
+	enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+	return misc_cg_try_charge(type, sev->misc_cg, 1);
+}
+
+static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
+{
+	enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+	misc_cg_uncharge(type, sev->misc_cg, 1);
+}
+
 static int sev_asid_new(struct kvm_sev_info *sev)
 {
 	int asid, min_asid, max_asid, ret;
 	bool retry = true;
-	enum misc_res_type type;
 
-	type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
 	WARN_ON(sev->misc_cg);
 	sev->misc_cg = get_current_misc_cg();
-	ret = misc_cg_try_charge(type, sev->misc_cg, 1);
+	ret = sev_misc_cg_try_charge(sev);
 	if (ret) {
 		put_misc_cg(sev->misc_cg);
 		sev->misc_cg = NULL;
@@ -162,7 +172,7 @@
 
 	return asid;
 e_uncharge:
-	misc_cg_uncharge(type, sev->misc_cg, 1);
+	sev_misc_cg_uncharge(sev);
 	put_misc_cg(sev->misc_cg);
 	sev->misc_cg = NULL;
 	return ret;
@@ -179,7 +189,6 @@
 {
 	struct svm_cpu_data *sd;
 	int cpu;
-	enum misc_res_type type;
 
 	mutex_lock(&sev_bitmap_lock);
 
@@ -192,8 +201,7 @@
 
 	mutex_unlock(&sev_bitmap_lock);
 
-	type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
-	misc_cg_uncharge(type, sev->misc_cg, 1);
+	sev_misc_cg_uncharge(sev);
 	put_misc_cg(sev->misc_cg);
 	sev->misc_cg = NULL;
 }
@@ -229,7 +237,6 @@
 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-	bool es_active = argp->id == KVM_SEV_ES_INIT;
 	int asid, ret;
 
 	if (kvm->created_vcpus)
@@ -239,7 +246,8 @@
 	if (unlikely(sev->active))
 		return ret;
 
-	sev->es_active = es_active;
+	sev->active = true;
+	sev->es_active = argp->id == KVM_SEV_ES_INIT;
 	asid = sev_asid_new(sev);
 	if (asid < 0)
 		goto e_no_asid;
@@ -249,8 +257,6 @@
 	if (ret)
 		goto e_free;
 
-	sev->active = true;
-	sev->asid = asid;
 	INIT_LIST_HEAD(&sev->regions_list);
 
 	return 0;
@@ -260,6 +266,7 @@
 	sev->asid = 0;
 e_no_asid:
 	sev->es_active = false;
+	sev->active = false;
 	return ret;
 }
 
@@ -590,7 +597,7 @@
 	 * traditional VMSA as it has been built so far (in prep
 	 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
 	 */
-	memcpy(svm->vmsa, save, sizeof(*save));
+	memcpy(svm->sev_es.vmsa, save, sizeof(*save));
 
 	return 0;
 }
@@ -612,11 +619,11 @@
 	 * the VMSA memory content (i.e it will write the same memory region
 	 * with the guest's key), so invalidate it first.
 	 */
-	clflush_cache_range(svm->vmsa, PAGE_SIZE);
+	clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
 
 	vmsa.reserved = 0;
 	vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
-	vmsa.address = __sme_pa(svm->vmsa);
+	vmsa.address = __sme_pa(svm->sev_es.vmsa);
 	vmsa.len = PAGE_SIZE;
 	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
 	if (ret)
@@ -1522,7 +1529,7 @@
 	return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
 }
 
-static bool cmd_allowed_from_miror(u32 cmd_id)
+static bool is_cmd_allowed_from_mirror(u32 cmd_id)
 {
 	/*
 	 * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
@@ -1536,6 +1543,223 @@
 	return false;
 }
 
+static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
+{
+	struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
+	struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+	int r = -EBUSY;
+
+	if (dst_kvm == src_kvm)
+		return -EINVAL;
+
+	/*
+	 * Bail if these VMs are already involved in a migration to avoid
+	 * deadlock between two VMs trying to migrate to/from each other.
+	 */
+	if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
+		return -EBUSY;
+
+	if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
+		goto release_dst;
+
+	r = -EINTR;
+	if (mutex_lock_killable(&dst_kvm->lock))
+		goto release_src;
+	if (mutex_lock_killable(&src_kvm->lock))
+		goto unlock_dst;
+	return 0;
+
+unlock_dst:
+	mutex_unlock(&dst_kvm->lock);
+release_src:
+	atomic_set_release(&src_sev->migration_in_progress, 0);
+release_dst:
+	atomic_set_release(&dst_sev->migration_in_progress, 0);
+	return r;
+}
+
+static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
+{
+	struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
+	struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+
+	mutex_unlock(&dst_kvm->lock);
+	mutex_unlock(&src_kvm->lock);
+	atomic_set_release(&dst_sev->migration_in_progress, 0);
+	atomic_set_release(&src_sev->migration_in_progress, 0);
+}
+
+
+static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int i, j;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (mutex_lock_killable(&vcpu->mutex))
+			goto out_unlock;
+	}
+
+	return 0;
+
+out_unlock:
+	kvm_for_each_vcpu(j, vcpu, kvm) {
+		if (i == j)
+			break;
+
+		mutex_unlock(&vcpu->mutex);
+	}
+	return -EINTR;
+}
+
+static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_unlock(&vcpu->mutex);
+	}
+}
+
+static void sev_migrate_from(struct kvm_sev_info *dst,
+			      struct kvm_sev_info *src)
+{
+	dst->active = true;
+	dst->asid = src->asid;
+	dst->handle = src->handle;
+	dst->pages_locked = src->pages_locked;
+	dst->enc_context_owner = src->enc_context_owner;
+
+	src->asid = 0;
+	src->active = false;
+	src->handle = 0;
+	src->pages_locked = 0;
+	src->enc_context_owner = NULL;
+
+	list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
+}
+
+static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
+{
+	int i;
+	struct kvm_vcpu *dst_vcpu, *src_vcpu;
+	struct vcpu_svm *dst_svm, *src_svm;
+
+	if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
+		return -EINVAL;
+
+	kvm_for_each_vcpu(i, src_vcpu, src) {
+		if (!src_vcpu->arch.guest_state_protected)
+			return -EINVAL;
+	}
+
+	kvm_for_each_vcpu(i, src_vcpu, src) {
+		src_svm = to_svm(src_vcpu);
+		dst_vcpu = kvm_get_vcpu(dst, i);
+		dst_svm = to_svm(dst_vcpu);
+
+		/*
+		 * Transfer VMSA and GHCB state to the destination.  Nullify and
+		 * clear source fields as appropriate, the state now belongs to
+		 * the destination.
+		 */
+		memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
+		dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
+		dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
+		dst_vcpu->arch.guest_state_protected = true;
+
+		memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
+		src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
+		src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+		src_vcpu->arch.guest_state_protected = false;
+	}
+	to_kvm_svm(src)->sev_info.es_active = false;
+	to_kvm_svm(dst)->sev_info.es_active = true;
+
+	return 0;
+}
+
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
+{
+	struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
+	struct kvm_sev_info *src_sev, *cg_cleanup_sev;
+	struct file *source_kvm_file;
+	struct kvm *source_kvm;
+	bool charged = false;
+	int ret;
+
+	source_kvm_file = fget(source_fd);
+	if (!file_is_kvm(source_kvm_file)) {
+		ret = -EBADF;
+		goto out_fput;
+	}
+
+	source_kvm = source_kvm_file->private_data;
+	ret = sev_lock_two_vms(kvm, source_kvm);
+	if (ret)
+		goto out_fput;
+
+	if (sev_guest(kvm) || !sev_guest(source_kvm)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	src_sev = &to_kvm_svm(source_kvm)->sev_info;
+
+	/*
+	 * VMs mirroring src's encryption context rely on it to keep the
+	 * ASID allocated, but below we are clearing src_sev->asid.
+	 */
+	if (src_sev->num_mirrored_vms) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	dst_sev->misc_cg = get_current_misc_cg();
+	cg_cleanup_sev = dst_sev;
+	if (dst_sev->misc_cg != src_sev->misc_cg) {
+		ret = sev_misc_cg_try_charge(dst_sev);
+		if (ret)
+			goto out_dst_cgroup;
+		charged = true;
+	}
+
+	ret = sev_lock_vcpus_for_migration(kvm);
+	if (ret)
+		goto out_dst_cgroup;
+	ret = sev_lock_vcpus_for_migration(source_kvm);
+	if (ret)
+		goto out_dst_vcpu;
+
+	if (sev_es_guest(source_kvm)) {
+		ret = sev_es_migrate_from(kvm, source_kvm);
+		if (ret)
+			goto out_source_vcpu;
+	}
+	sev_migrate_from(dst_sev, src_sev);
+	kvm_vm_dead(source_kvm);
+	cg_cleanup_sev = src_sev;
+	ret = 0;
+
+out_source_vcpu:
+	sev_unlock_vcpus_for_migration(source_kvm);
+out_dst_vcpu:
+	sev_unlock_vcpus_for_migration(kvm);
+out_dst_cgroup:
+	/* Operates on the source on success, on the destination on failure.  */
+	if (charged)
+		sev_misc_cg_uncharge(cg_cleanup_sev);
+	put_misc_cg(cg_cleanup_sev->misc_cg);
+	cg_cleanup_sev->misc_cg = NULL;
+out_unlock:
+	sev_unlock_two_vms(kvm, source_kvm);
+out_fput:
+	if (source_kvm_file)
+		fput(source_kvm_file);
+	return ret;
+}
+
 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_sev_cmd sev_cmd;
@@ -1554,7 +1778,7 @@
 
 	/* Only the enc_context_owner handles some memory enc operations. */
 	if (is_mirroring_enc_context(kvm) &&
-	    !cmd_allowed_from_miror(sev_cmd.id)) {
+	    !is_cmd_allowed_from_mirror(sev_cmd.id)) {
 		r = -EINVAL;
 		goto out;
 	}
@@ -1751,71 +1975,60 @@
 {
 	struct file *source_kvm_file;
 	struct kvm *source_kvm;
-	struct kvm_sev_info source_sev, *mirror_sev;
+	struct kvm_sev_info *source_sev, *mirror_sev;
 	int ret;
 
 	source_kvm_file = fget(source_fd);
 	if (!file_is_kvm(source_kvm_file)) {
 		ret = -EBADF;
-		goto e_source_put;
+		goto e_source_fput;
 	}
 
 	source_kvm = source_kvm_file->private_data;
-	mutex_lock(&source_kvm->lock);
+	ret = sev_lock_two_vms(kvm, source_kvm);
+	if (ret)
+		goto e_source_fput;
 
-	if (!sev_guest(source_kvm)) {
+	/*
+	 * Mirrors of mirrors should work, but let's not get silly.  Also
+	 * disallow out-of-band SEV/SEV-ES init if the target is already an
+	 * SEV guest, or if vCPUs have been created.  KVM relies on vCPUs being
+	 * created after SEV/SEV-ES initialization, e.g. to init intercepts.
+	 */
+	if (sev_guest(kvm) || !sev_guest(source_kvm) ||
+	    is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
 		ret = -EINVAL;
-		goto e_source_unlock;
+		goto e_unlock;
 	}
 
-	/* Mirrors of mirrors should work, but let's not get silly */
-	if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
-		ret = -EINVAL;
-		goto e_source_unlock;
-	}
-
-	memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
-	       sizeof(source_sev));
-
 	/*
 	 * The mirror kvm holds an enc_context_owner ref so its asid can't
 	 * disappear until we're done with it
 	 */
+	source_sev = &to_kvm_svm(source_kvm)->sev_info;
 	kvm_get_kvm(source_kvm);
-
-	fput(source_kvm_file);
-	mutex_unlock(&source_kvm->lock);
-	mutex_lock(&kvm->lock);
-
-	if (sev_guest(kvm)) {
-		ret = -EINVAL;
-		goto e_mirror_unlock;
-	}
+	source_sev->num_mirrored_vms++;
 
 	/* Set enc_context_owner and copy its encryption context over */
 	mirror_sev = &to_kvm_svm(kvm)->sev_info;
 	mirror_sev->enc_context_owner = source_kvm;
 	mirror_sev->active = true;
-	mirror_sev->asid = source_sev.asid;
-	mirror_sev->fd = source_sev.fd;
-	mirror_sev->es_active = source_sev.es_active;
-	mirror_sev->handle = source_sev.handle;
+	mirror_sev->asid = source_sev->asid;
+	mirror_sev->fd = source_sev->fd;
+	mirror_sev->es_active = source_sev->es_active;
+	mirror_sev->handle = source_sev->handle;
+	INIT_LIST_HEAD(&mirror_sev->regions_list);
+	ret = 0;
+
 	/*
 	 * Do not copy ap_jump_table. Since the mirror does not share the same
 	 * KVM contexts as the original, and they may have different
 	 * memory-views.
 	 */
 
-	mutex_unlock(&kvm->lock);
-	return 0;
-
-e_mirror_unlock:
-	mutex_unlock(&kvm->lock);
-	kvm_put_kvm(source_kvm);
-	return ret;
-e_source_unlock:
-	mutex_unlock(&source_kvm->lock);
-e_source_put:
+e_unlock:
+	sev_unlock_two_vms(kvm, source_kvm);
+e_source_fput:
 	if (source_kvm_file)
 		fput(source_kvm_file);
 	return ret;
@@ -1827,17 +2040,24 @@
 	struct list_head *head = &sev->regions_list;
 	struct list_head *pos, *q;
 
+	WARN_ON(sev->num_mirrored_vms);
+
 	if (!sev_guest(kvm))
 		return;
 
 	/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
 	if (is_mirroring_enc_context(kvm)) {
-		kvm_put_kvm(sev->enc_context_owner);
+		struct kvm *owner_kvm = sev->enc_context_owner;
+		struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
+
+		mutex_lock(&owner_kvm->lock);
+		if (!WARN_ON(!owner_sev->num_mirrored_vms))
+			owner_sev->num_mirrored_vms--;
+		mutex_unlock(&owner_kvm->lock);
+		kvm_put_kvm(owner_kvm);
 		return;
 	}
 
-	mutex_lock(&kvm->lock);
-
 	/*
 	 * Ensure that all guest tagged cache entries are flushed before
 	 * releasing the pages back to the system for use. CLFLUSH will
@@ -1857,8 +2077,6 @@
 		}
 	}
 
-	mutex_unlock(&kvm->lock);
-
 	sev_unbind_asid(kvm, sev->handle);
 	sev_asid_free(sev);
 }
@@ -2038,16 +2256,16 @@
 	svm = to_svm(vcpu);
 
 	if (vcpu->arch.guest_state_protected)
-		sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
-	__free_page(virt_to_page(svm->vmsa));
+		sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+	__free_page(virt_to_page(svm->sev_es.vmsa));
 
-	if (svm->ghcb_sa_free)
-		kfree(svm->ghcb_sa);
+	if (svm->sev_es.ghcb_sa_free)
+		kfree(svm->sev_es.ghcb_sa);
 }
 
 static void dump_ghcb(struct vcpu_svm *svm)
 {
-	struct ghcb *ghcb = svm->ghcb;
+	struct ghcb *ghcb = svm->sev_es.ghcb;
 	unsigned int nbits;
 
 	/* Re-use the dump_invalid_vmcb module parameter */
@@ -2073,7 +2291,7 @@
 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
-	struct ghcb *ghcb = svm->ghcb;
+	struct ghcb *ghcb = svm->sev_es.ghcb;
 
 	/*
 	 * The GHCB protocol so far allows for the following data
@@ -2093,7 +2311,7 @@
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct kvm_vcpu *vcpu = &svm->vcpu;
-	struct ghcb *ghcb = svm->ghcb;
+	struct ghcb *ghcb = svm->sev_es.ghcb;
 	u64 exit_code;
 
 	/*
@@ -2140,7 +2358,7 @@
 	struct ghcb *ghcb;
 	u64 exit_code = 0;
 
-	ghcb = svm->ghcb;
+	ghcb = svm->sev_es.ghcb;
 
 	/* Only GHCB Usage code 0 is supported */
 	if (ghcb->ghcb_usage)
@@ -2258,33 +2476,34 @@
 
 void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 {
-	if (!svm->ghcb)
+	if (!svm->sev_es.ghcb)
 		return;
 
-	if (svm->ghcb_sa_free) {
+	if (svm->sev_es.ghcb_sa_free) {
 		/*
 		 * The scratch area lives outside the GHCB, so there is a
 		 * buffer that, depending on the operation performed, may
 		 * need to be synced, then freed.
 		 */
-		if (svm->ghcb_sa_sync) {
+		if (svm->sev_es.ghcb_sa_sync) {
 			kvm_write_guest(svm->vcpu.kvm,
-					ghcb_get_sw_scratch(svm->ghcb),
-					svm->ghcb_sa, svm->ghcb_sa_len);
-			svm->ghcb_sa_sync = false;
+					ghcb_get_sw_scratch(svm->sev_es.ghcb),
+					svm->sev_es.ghcb_sa,
+					svm->sev_es.ghcb_sa_len);
+			svm->sev_es.ghcb_sa_sync = false;
 		}
 
-		kfree(svm->ghcb_sa);
-		svm->ghcb_sa = NULL;
-		svm->ghcb_sa_free = false;
+		kfree(svm->sev_es.ghcb_sa);
+		svm->sev_es.ghcb_sa = NULL;
+		svm->sev_es.ghcb_sa_free = false;
 	}
 
-	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
 
 	sev_es_sync_to_ghcb(svm);
 
-	kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
-	svm->ghcb = NULL;
+	kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
+	svm->sev_es.ghcb = NULL;
 }
 
 void pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -2314,7 +2533,7 @@
 static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
-	struct ghcb *ghcb = svm->ghcb;
+	struct ghcb *ghcb = svm->sev_es.ghcb;
 	u64 ghcb_scratch_beg, ghcb_scratch_end;
 	u64 scratch_gpa_beg, scratch_gpa_end;
 	void *scratch_va;
@@ -2350,7 +2569,7 @@
 			return false;
 		}
 
-		scratch_va = (void *)svm->ghcb;
+		scratch_va = (void *)svm->sev_es.ghcb;
 		scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
 	} else {
 		/*
@@ -2380,12 +2599,12 @@
 		 * the vCPU next time (i.e. a read was requested so the data
 		 * must be written back to the guest memory).
 		 */
-		svm->ghcb_sa_sync = sync;
-		svm->ghcb_sa_free = true;
+		svm->sev_es.ghcb_sa_sync = sync;
+		svm->sev_es.ghcb_sa_free = true;
 	}
 
-	svm->ghcb_sa = scratch_va;
-	svm->ghcb_sa_len = len;
+	svm->sev_es.ghcb_sa = scratch_va;
+	svm->sev_es.ghcb_sa_len = len;
 
 	return true;
 }
@@ -2504,15 +2723,15 @@
 		return -EINVAL;
 	}
 
-	if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+	if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
 		/* Unable to map GHCB from guest */
 		vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
 			    ghcb_gpa);
 		return -EINVAL;
 	}
 
-	svm->ghcb = svm->ghcb_map.hva;
-	ghcb = svm->ghcb_map.hva;
+	svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
+	ghcb = svm->sev_es.ghcb_map.hva;
 
 	trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
 
@@ -2535,7 +2754,7 @@
 		ret = kvm_sev_es_mmio_read(vcpu,
 					   control->exit_info_1,
 					   control->exit_info_2,
-					   svm->ghcb_sa);
+					   svm->sev_es.ghcb_sa);
 		break;
 	case SVM_VMGEXIT_MMIO_WRITE:
 		if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
@@ -2544,7 +2763,7 @@
 		ret = kvm_sev_es_mmio_write(vcpu,
 					    control->exit_info_1,
 					    control->exit_info_2,
-					    svm->ghcb_sa);
+					    svm->sev_es.ghcb_sa);
 		break;
 	case SVM_VMGEXIT_NMI_COMPLETE:
 		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
@@ -2604,7 +2823,8 @@
 	if (!setup_vmgexit_scratch(svm, in, bytes))
 		return -EINVAL;
 
-	return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
+	return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
+				    count, in);
 }
 
 void sev_es_init_vmcb(struct vcpu_svm *svm)
@@ -2619,7 +2839,7 @@
 	 * VMCB page. Do not include the encryption mask on the VMSA physical
 	 * address since hardware will access it using the guest key.
 	 */
-	svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+	svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
 
 	/* Can't intercept CR register access, HV can't modify CR registers */
 	svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -2691,8 +2911,8 @@
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	/* First SIPI: Use the values as initially set by the VMM */
-	if (!svm->received_first_sipi) {
-		svm->received_first_sipi = true;
+	if (!svm->sev_es.received_first_sipi) {
+		svm->sev_es.received_first_sipi = true;
 		return;
 	}
 
@@ -2701,8 +2921,8 @@
 	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
 	 * non-zero value.
 	 */
-	if (!svm->ghcb)
+	if (!svm->sev_es.ghcb)
 		return;
 
-	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
 }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b36ca4e..d0f68d1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1452,7 +1452,7 @@
 	svm_switch_vmcb(svm, &svm->vmcb01);
 
 	if (vmsa_page)
-		svm->vmsa = page_address(vmsa_page);
+		svm->sev_es.vmsa = page_address(vmsa_page);
 
 	svm->guest_state_loaded = false;
 
@@ -2835,11 +2835,11 @@
 static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
+	if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
 		return kvm_complete_insn_gp(vcpu, err);
 
-	ghcb_set_sw_exit_info_1(svm->ghcb, 1);
-	ghcb_set_sw_exit_info_2(svm->ghcb,
+	ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
+	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
 				X86_TRAP_GP |
 				SVM_EVTINJ_TYPE_EXEPT |
 				SVM_EVTINJ_VALID);
@@ -3121,11 +3121,6 @@
 	type = svm->vmcb->control.exit_info_2;
 	gva = svm->vmcb->control.exit_info_1;
 
-	if (type > 3) {
-		kvm_inject_gp(vcpu, 0);
-		return 1;
-	}
-
 	return kvm_handle_invpcid(vcpu, type, gva);
 }
 
@@ -4656,7 +4651,6 @@
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.hwapic_irr_update = svm_hwapic_irr_update,
 	.hwapic_isr_update = svm_hwapic_isr_update,
-	.sync_pir_to_irr = kvm_lapic_find_highest_irr,
 	.apicv_post_state_restore = avic_post_state_restore,
 
 	.set_tss_addr = svm_set_tss_addr,
@@ -4701,6 +4695,7 @@
 	.mem_enc_unreg_region = svm_unregister_enc_region,
 
 	.vm_copy_enc_context_from = svm_vm_copy_asid_from,
+	.vm_move_enc_context_from = svm_vm_migrate_from,
 
 	.can_emulate_instruction = svm_can_emulate_instruction,
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5e9510d..1c7306c 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -79,7 +79,9 @@
 	struct list_head regions_list;  /* List of registered regions */
 	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
 	struct kvm *enc_context_owner; /* Owner of copied encryption context */
+	unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
 	struct misc_cg *misc_cg; /* For misc cgroup accounting */
+	atomic_t migration_in_progress;
 };
 
 struct kvm_svm {
@@ -123,6 +125,20 @@
 	bool initialized;
 };
 
+struct vcpu_sev_es_state {
+	/* SEV-ES support */
+	struct vmcb_save_area *vmsa;
+	struct ghcb *ghcb;
+	struct kvm_host_map ghcb_map;
+	bool received_first_sipi;
+
+	/* SEV-ES scratch area support */
+	void *ghcb_sa;
+	u32 ghcb_sa_len;
+	bool ghcb_sa_sync;
+	bool ghcb_sa_free;
+};
+
 struct vcpu_svm {
 	struct kvm_vcpu vcpu;
 	/* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
@@ -186,17 +202,7 @@
 		DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
 	} shadow_msr_intercept;
 
-	/* SEV-ES support */
-	struct vmcb_save_area *vmsa;
-	struct ghcb *ghcb;
-	struct kvm_host_map ghcb_map;
-	bool received_first_sipi;
-
-	/* SEV-ES scratch area support */
-	void *ghcb_sa;
-	u32 ghcb_sa_len;
-	bool ghcb_sa_sync;
-	bool ghcb_sa_free;
+	struct vcpu_sev_es_state sev_es;
 
 	bool guest_state_loaded;
 };
@@ -242,7 +248,7 @@
 #ifdef CONFIG_KVM_AMD_SEV
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
-	return sev_guest(kvm) && sev->es_active;
+	return sev->es_active && !WARN_ON_ONCE(!sev->active);
 #else
 	return false;
 #endif
@@ -558,6 +564,7 @@
 int svm_unregister_enc_region(struct kvm *kvm,
 			      struct kvm_enc_region *range);
 int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd);
 void pre_sev_run(struct vcpu_svm *svm, int cpu);
 void __init sev_set_cpu_caps(void);
 void __init sev_hardware_setup(void);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b4ee5e9..64f2828 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -525,67 +525,19 @@
 }
 
 /*
- * Check if MSR is intercepted for L01 MSR bitmap.
+ * For x2APIC MSRs, ignore the vmcs01 bitmap.  L1 can enable x2APIC without L1
+ * itself utilizing x2APIC.  All MSRs were previously set to be intercepted,
+ * only the "disable intercept" case needs to be handled.
  */
-static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
+							unsigned long *msr_bitmap_l0,
+							u32 msr, int type)
 {
-	unsigned long *msr_bitmap;
-	int f = sizeof(unsigned long);
+	if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
+		vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
 
-	if (!cpu_has_vmx_msr_bitmap())
-		return true;
-
-	msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
-
-	if (msr <= 0x1fff) {
-		return !!test_bit(msr, msr_bitmap + 0x800 / f);
-	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-		msr &= 0x1fff;
-		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
-	}
-
-	return true;
-}
-
-/*
- * If a msr is allowed by L0, we should check whether it is allowed by L1.
- * The corresponding bit will be cleared unless both of L0 and L1 allow it.
- */
-static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
-					       unsigned long *msr_bitmap_nested,
-					       u32 msr, int type)
-{
-	int f = sizeof(unsigned long);
-
-	/*
-	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-	 * have the write-low and read-high bitmap offsets the wrong way round.
-	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-	 */
-	if (msr <= 0x1fff) {
-		if (type & MSR_TYPE_R &&
-		   !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
-			/* read-low */
-			__clear_bit(msr, msr_bitmap_nested + 0x000 / f);
-
-		if (type & MSR_TYPE_W &&
-		   !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
-			/* write-low */
-			__clear_bit(msr, msr_bitmap_nested + 0x800 / f);
-
-	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-		msr &= 0x1fff;
-		if (type & MSR_TYPE_R &&
-		   !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
-			/* read-high */
-			__clear_bit(msr, msr_bitmap_nested + 0x400 / f);
-
-		if (type & MSR_TYPE_W &&
-		   !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
-			/* write-high */
-			__clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
-
-	}
+	if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
+		vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
 }
 
 static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
@@ -600,6 +552,34 @@
 	}
 }
 
+#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw)					\
+static inline									\
+void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx,			\
+					 unsigned long *msr_bitmap_l1,		\
+					 unsigned long *msr_bitmap_l0, u32 msr)	\
+{										\
+	if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) ||		\
+	    vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr))			\
+		vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr);			\
+	else									\
+		vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr);			\
+}
+BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
+BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
+
+static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
+						    unsigned long *msr_bitmap_l1,
+						    unsigned long *msr_bitmap_l0,
+						    u32 msr, int types)
+{
+	if (types & MSR_TYPE_R)
+		nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
+						  msr_bitmap_l0, msr);
+	if (types & MSR_TYPE_W)
+		nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
+						   msr_bitmap_l0, msr);
+}
+
 /*
  * Merge L0's and L1's MSR bitmap, return false to indicate that
  * we do not use the hardware.
@@ -607,10 +587,11 @@
 static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 						 struct vmcs12 *vmcs12)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int msr;
 	unsigned long *msr_bitmap_l1;
-	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
-	struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
+	unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
+	struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
 
 	/* Nothing to do if the MSR bitmap is not in use.  */
 	if (!cpu_has_vmx_msr_bitmap() ||
@@ -625,7 +606,7 @@
 	/*
 	 * To keep the control flow simple, pay eight 8-byte writes (sixteen
 	 * 4-byte writes on 32-bit systems) up front to enable intercepts for
-	 * the x2APIC MSR range and selectively disable them below.
+	 * the x2APIC MSR range and selectively toggle those relevant to L2.
 	 */
 	enable_x2apic_msr_intercepts(msr_bitmap_l0);
 
@@ -644,61 +625,44 @@
 			}
 		}
 
-		nested_vmx_disable_intercept_for_msr(
+		nested_vmx_disable_intercept_for_x2apic_msr(
 			msr_bitmap_l1, msr_bitmap_l0,
 			X2APIC_MSR(APIC_TASKPRI),
 			MSR_TYPE_R | MSR_TYPE_W);
 
 		if (nested_cpu_has_vid(vmcs12)) {
-			nested_vmx_disable_intercept_for_msr(
+			nested_vmx_disable_intercept_for_x2apic_msr(
 				msr_bitmap_l1, msr_bitmap_l0,
 				X2APIC_MSR(APIC_EOI),
 				MSR_TYPE_W);
-			nested_vmx_disable_intercept_for_msr(
+			nested_vmx_disable_intercept_for_x2apic_msr(
 				msr_bitmap_l1, msr_bitmap_l0,
 				X2APIC_MSR(APIC_SELF_IPI),
 				MSR_TYPE_W);
 		}
 	}
 
-	/* KVM unconditionally exposes the FS/GS base MSRs to L1. */
-#ifdef CONFIG_X86_64
-	nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
-					     MSR_FS_BASE, MSR_TYPE_RW);
-
-	nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
-					     MSR_GS_BASE, MSR_TYPE_RW);
-
-	nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
-					     MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
-#endif
-
 	/*
-	 * Checking the L0->L1 bitmap is trying to verify two things:
-	 *
-	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
-	 *    ensures that we do not accidentally generate an L02 MSR bitmap
-	 *    from the L12 MSR bitmap that is too permissive.
-	 * 2. That L1 or L2s have actually used the MSR. This avoids
-	 *    unnecessarily merging of the bitmap if the MSR is unused. This
-	 *    works properly because we only update the L01 MSR bitmap lazily.
-	 *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
-	 *    updated to reflect this when L1 (or its L2s) actually write to
-	 *    the MSR.
+	 * Always check vmcs01's bitmap to honor userspace MSR filters and any
+	 * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
 	 */
-	if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
-		nested_vmx_disable_intercept_for_msr(
-					msr_bitmap_l1, msr_bitmap_l0,
-					MSR_IA32_SPEC_CTRL,
-					MSR_TYPE_R | MSR_TYPE_W);
+#ifdef CONFIG_X86_64
+	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+					 MSR_FS_BASE, MSR_TYPE_RW);
 
-	if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
-		nested_vmx_disable_intercept_for_msr(
-					msr_bitmap_l1, msr_bitmap_l0,
-					MSR_IA32_PRED_CMD,
-					MSR_TYPE_W);
+	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+					 MSR_GS_BASE, MSR_TYPE_RW);
 
-	kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
+	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+					 MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+#endif
+	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+					 MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
+
+	nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+					 MSR_IA32_PRED_CMD, MSR_TYPE_W);
+
+	kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
 
 	return true;
 }
@@ -706,33 +670,39 @@
 static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
 				       struct vmcs12 *vmcs12)
 {
-	struct kvm_host_map map;
-	struct vmcs12 *shadow;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
 
 	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
 	    vmcs12->vmcs_link_pointer == INVALID_GPA)
 		return;
 
-	shadow = get_shadow_vmcs12(vcpu);
-
-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+	if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+	    kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+				      vmcs12->vmcs_link_pointer, VMCS12_SIZE))
 		return;
 
-	memcpy(shadow, map.hva, VMCS12_SIZE);
-	kvm_vcpu_unmap(vcpu, &map, false);
+	kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+			      VMCS12_SIZE);
 }
 
 static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
 					      struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
 
 	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
 	    vmcs12->vmcs_link_pointer == INVALID_GPA)
 		return;
 
-	kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
-			get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+	if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+	    kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+				      vmcs12->vmcs_link_pointer, VMCS12_SIZE))
+		return;
+
+	kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+			       VMCS12_SIZE);
 }
 
 /*
@@ -1192,29 +1162,26 @@
 	WARN_ON(!enable_vpid);
 
 	/*
-	 * If VPID is enabled and used by vmc12, but L2 does not have a unique
-	 * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
-	 * a VPID for L2, flush the current context as the effective ASID is
-	 * common to both L1 and L2.
-	 *
-	 * Defer the flush so that it runs after vmcs02.EPTP has been set by
-	 * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
-	 * redundant flushes further down the nested pipeline.
-	 *
-	 * If a TLB flush isn't required due to any of the above, and vpid12 is
-	 * changing then the new "virtual" VPID (vpid12) will reuse the same
-	 * "real" VPID (vpid02), and so needs to be flushed.  There's no direct
-	 * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
-	 * all nested vCPUs.  Remember, a flush on VM-Enter does not invalidate
-	 * guest-physical mappings, so there is no need to sync the nEPT MMU.
+	 * VPID is enabled and in use by vmcs12.  If vpid12 is changing, then
+	 * emulate a guest TLB flush as KVM does not track vpid12 history nor
+	 * is the VPID incorporated into the MMU context.  I.e. KVM must assume
+	 * that the new vpid12 has never been used and thus represents a new
+	 * guest ASID that cannot have entries in the TLB.
 	 */
-	if (!nested_has_guest_tlb_tag(vcpu)) {
-		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-	} else if (is_vmenter &&
-		   vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+	if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
 		vmx->nested.last_vpid = vmcs12->virtual_processor_id;
-		vpid_sync_context(nested_get_vpid02(vcpu));
+		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
+		return;
 	}
+
+	/*
+	 * If VPID is enabled, used by vmc12, and vpid12 is not changing but
+	 * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
+	 * KVM was unable to allocate a VPID for L2, flush the current context
+	 * as the effective ASID is common to both L1 and L2.
+	 */
+	if (!nested_has_guest_tlb_tag(vcpu))
+		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 }
 
 static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
@@ -2866,6 +2833,17 @@
 	return 0;
 }
 
+static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
+				       struct vmcs12 *vmcs12)
+{
+#ifdef CONFIG_X86_64
+	if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
+		!!(vcpu->arch.efer & EFER_LMA)))
+		return -EINVAL;
+#endif
+	return 0;
+}
+
 static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 				       struct vmcs12 *vmcs12)
 {
@@ -2890,18 +2868,16 @@
 		return -EINVAL;
 
 #ifdef CONFIG_X86_64
-	ia32e = !!(vcpu->arch.efer & EFER_LMA);
+	ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
 #else
 	ia32e = false;
 #endif
 
 	if (ia32e) {
-		if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
-		    CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
 			return -EINVAL;
 	} else {
-		if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
-		    CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+		if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
 		    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
 		    CC((vmcs12->host_rip) >> 32))
 			return -EINVAL;
@@ -2946,9 +2922,9 @@
 static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
 					  struct vmcs12 *vmcs12)
 {
-	int r = 0;
-	struct vmcs12 *shadow;
-	struct kvm_host_map map;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
+	struct vmcs_hdr hdr;
 
 	if (vmcs12->vmcs_link_pointer == INVALID_GPA)
 		return 0;
@@ -2956,17 +2932,21 @@
 	if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
 		return -EINVAL;
 
-	if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
+	if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+	    CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+					 vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
+                return -EINVAL;
+
+	if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+					    offsetof(struct vmcs12, hdr),
+					    sizeof(hdr))))
 		return -EINVAL;
 
-	shadow = map.hva;
+	if (CC(hdr.revision_id != VMCS12_REVISION) ||
+	    CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
+		return -EINVAL;
 
-	if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
-	    CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
-		r = -EINVAL;
-
-	kvm_vcpu_unmap(vcpu, &map, false);
-	return r;
+	return 0;
 }
 
 /*
@@ -3361,8 +3341,7 @@
 	};
 	u32 failed_index;
 
-	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-		kvm_vcpu_flush_tlb_current(vcpu);
+	kvm_service_local_tlb_flush_requests(vcpu);
 
 	evaluate_pending_interrupts = exec_controls_get(vmx) &
 		(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
@@ -3571,6 +3550,9 @@
 	if (nested_vmx_check_controls(vcpu, vmcs12))
 		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 
+	if (nested_vmx_check_address_space_size(vcpu, vmcs12))
+		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+
 	if (nested_vmx_check_host_state(vcpu, vmcs12))
 		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
 
@@ -4516,9 +4498,8 @@
 		(void)nested_get_evmcs_page(vcpu);
 	}
 
-	/* Service the TLB flush request for L2 before switching to L1. */
-	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-		kvm_vcpu_flush_tlb_current(vcpu);
+	/* Service pending TLB flush requests for L2 before switching to L1. */
+	kvm_service_local_tlb_flush_requests(vcpu);
 
 	/*
 	 * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
@@ -4871,6 +4852,7 @@
 	if (!vmx->nested.cached_vmcs12)
 		goto out_cached_vmcs12;
 
+	vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
 	vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
 	if (!vmx->nested.cached_shadow_vmcs12)
 		goto out_cached_shadow_vmcs12;
@@ -5300,10 +5282,10 @@
 		return 1;
 
 	if (vmx->nested.current_vmptr != vmptr) {
-		struct kvm_host_map map;
-		struct vmcs12 *new_vmcs12;
+		struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
+		struct vmcs_hdr hdr;
 
-		if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
 			/*
 			 * Reads from an unbacked page return all 1s,
 			 * which means that the 32 bits located at the
@@ -5314,12 +5296,16 @@
 				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
 		}
 
-		new_vmcs12 = map.hva;
+		if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+						 offsetof(struct vmcs12, hdr),
+						 sizeof(hdr))) {
+			return nested_vmx_fail(vcpu,
+				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+		}
 
-		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
-		    (new_vmcs12->hdr.shadow_vmcs &&
+		if (hdr.revision_id != VMCS12_REVISION ||
+		    (hdr.shadow_vmcs &&
 		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
-			kvm_vcpu_unmap(vcpu, &map, false);
 			return nested_vmx_fail(vcpu,
 				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
 		}
@@ -5330,8 +5316,11 @@
 		 * Load VMCS12 from guest memory since it is not already
 		 * cached.
 		 */
-		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
-		kvm_vcpu_unmap(vcpu, &map, false);
+		if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
+					  VMCS12_SIZE)) {
+			return nested_vmx_fail(vcpu,
+				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+		}
 
 		set_current_vmptr(vmx, vmptr);
 	}
@@ -5379,7 +5368,7 @@
 	struct {
 		u64 eptp, gpa;
 	} operand;
-	int i, r;
+	int i, r, gpr_index;
 
 	if (!(vmx->nested.msrs.secondary_ctls_high &
 	      SECONDARY_EXEC_ENABLE_EPT) ||
@@ -5392,7 +5381,8 @@
 		return 1;
 
 	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-	type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+	gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+	type = kvm_register_read(vcpu, gpr_index);
 
 	types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
@@ -5459,7 +5449,7 @@
 		u64 gla;
 	} operand;
 	u16 vpid02;
-	int r;
+	int r, gpr_index;
 
 	if (!(vmx->nested.msrs.secondary_ctls_high &
 	      SECONDARY_EXEC_ENABLE_VPID) ||
@@ -5472,7 +5462,8 @@
 		return 1;
 
 	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-	type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+	gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+	type = kvm_register_read(vcpu, gpr_index);
 
 	types = (vmx->nested.msrs.vpid_caps &
 			VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index b8e0d21..1b7456b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -118,16 +118,15 @@
 	}
 }
 
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	bool fixed = idx & (1u << 30);
 
 	idx &= ~(3u << 30);
 
-	return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
-		(fixed && idx >= pmu->nr_arch_fixed_counters);
+	return fixed ? idx < pmu->nr_arch_fixed_counters
+		     : idx < pmu->nr_arch_gp_counters;
 }
 
 static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 5f81ef0..1c94783 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -5,6 +5,7 @@
 #include <asm/cpu.h>
 
 #include "lapic.h"
+#include "irq.h"
 #include "posted_intr.h"
 #include "trace.h"
 #include "vmx.h"
@@ -77,13 +78,18 @@
 		pi_set_on(pi_desc);
 }
 
+static bool vmx_can_use_vtd_pi(struct kvm *kvm)
+{
+	return irqchip_in_kernel(kvm) && enable_apicv &&
+		kvm_arch_has_assigned_device(kvm) &&
+		irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
 void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
 {
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
-		!kvm_vcpu_apicv_active(vcpu))
+	if (!vmx_can_use_vtd_pi(vcpu->kvm))
 		return;
 
 	/* Set SN when the vCPU is preempted */
@@ -141,9 +147,7 @@
 	struct pi_desc old, new;
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
-	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-		!irq_remapping_cap(IRQ_POSTING_CAP)  ||
-		!kvm_vcpu_apicv_active(vcpu))
+	if (!vmx_can_use_vtd_pi(vcpu->kvm))
 		return 0;
 
 	WARN_ON(irqs_disabled());
@@ -270,9 +274,7 @@
 	struct vcpu_data vcpu_info;
 	int idx, ret = 0;
 
-	if (!kvm_arch_has_assigned_device(kvm) ||
-	    !irq_remapping_cap(IRQ_POSTING_CAP) ||
-	    !kvm_vcpu_apicv_active(kvm->vcpus[0]))
+	if (!vmx_can_use_vtd_pi(kvm))
 		return 0;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 76861b6..f904488 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -769,24 +769,13 @@
 /*
  * Check if MSR is intercepted for currently loaded MSR bitmap.
  */
-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
+static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
 {
-	unsigned long *msr_bitmap;
-	int f = sizeof(unsigned long);
-
-	if (!cpu_has_vmx_msr_bitmap())
+	if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
 		return true;
 
-	msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
-
-	if (msr <= 0x1fff) {
-		return !!test_bit(msr, msr_bitmap + 0x800 / f);
-	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-		msr &= 0x1fff;
-		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
-	}
-
-	return true;
+	return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
+					 MSR_IA32_SPEC_CTRL);
 }
 
 static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -2929,6 +2918,13 @@
 	}
 }
 
+static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
+{
+	if (is_guest_mode(vcpu))
+		return nested_get_vpid02(vcpu);
+	return to_vmx(vcpu)->vpid;
+}
+
 static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -2941,31 +2937,29 @@
 	if (enable_ept)
 		ept_sync_context(construct_eptp(vcpu, root_hpa,
 						mmu->shadow_root_level));
-	else if (!is_guest_mode(vcpu))
-		vpid_sync_context(to_vmx(vcpu)->vpid);
 	else
-		vpid_sync_context(nested_get_vpid02(vcpu));
+		vpid_sync_context(vmx_get_current_vpid(vcpu));
 }
 
 static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
 {
 	/*
-	 * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
+	 * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
 	 * vmx_flush_tlb_guest() for an explanation of why this is ok.
 	 */
-	vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
+	vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
 }
 
 static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
 {
 	/*
-	 * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
-	 * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
-	 * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+	 * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
+	 * vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit are
+	 * required to flush GVA->{G,H}PA mappings from the TLB if vpid is
 	 * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
 	 * i.e. no explicit INVVPID is necessary.
 	 */
-	vpid_sync_context(to_vmx(vcpu)->vpid);
+	vpid_sync_context(vmx_get_current_vpid(vcpu));
 }
 
 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -3697,46 +3691,6 @@
 	spin_unlock(&vmx_vpid_lock);
 }
 
-static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
-	int f = sizeof(unsigned long);
-
-	if (msr <= 0x1fff)
-		__clear_bit(msr, msr_bitmap + 0x000 / f);
-	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-		__clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
-	int f = sizeof(unsigned long);
-
-	if (msr <= 0x1fff)
-		__clear_bit(msr, msr_bitmap + 0x800 / f);
-	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-		__clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
-static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
-	int f = sizeof(unsigned long);
-
-	if (msr <= 0x1fff)
-		__set_bit(msr, msr_bitmap + 0x000 / f);
-	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-		__set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
-	int f = sizeof(unsigned long);
-
-	if (msr <= 0x1fff)
-		__set_bit(msr, msr_bitmap + 0x800 / f);
-	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-		__set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
 void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5494,6 +5448,7 @@
 		u64 pcid;
 		u64 gla;
 	} operand;
+	int gpr_index;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5501,12 +5456,8 @@
 	}
 
 	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-	type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
-
-	if (type > 3) {
-		kvm_inject_gp(vcpu, 0);
-		return 1;
-	}
+	gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+	type = kvm_register_read(vcpu, gpr_index);
 
 	/* According to the Intel instruction reference, the memory operand
 	 * is read even if it isn't needed (e.g., for type==all)
@@ -6316,9 +6267,9 @@
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int max_irr;
-	bool max_irr_updated;
+	bool got_posted_interrupt;
 
-	if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
+	if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
 		return -EIO;
 
 	if (pi_test_on(&vmx->pi_desc)) {
@@ -6328,22 +6279,33 @@
 		 * But on x86 this is just a compiler barrier anyway.
 		 */
 		smp_mb__after_atomic();
-		max_irr_updated =
+		got_posted_interrupt =
 			kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
-
-		/*
-		 * If we are running L2 and L1 has a new pending interrupt
-		 * which can be injected, this may cause a vmexit or it may
-		 * be injected into L2.  Either way, this interrupt will be
-		 * processed via KVM_REQ_EVENT, not RVI, because we do not use
-		 * virtual interrupt delivery to inject L1 interrupts into L2.
-		 */
-		if (is_guest_mode(vcpu) && max_irr_updated)
-			kvm_make_request(KVM_REQ_EVENT, vcpu);
 	} else {
 		max_irr = kvm_lapic_find_highest_irr(vcpu);
+		got_posted_interrupt = false;
 	}
-	vmx_hwapic_irr_update(vcpu, max_irr);
+
+	/*
+	 * Newly recognized interrupts are injected via either virtual interrupt
+	 * delivery (RVI) or KVM_REQ_EVENT.  Virtual interrupt delivery is
+	 * disabled in two cases:
+	 *
+	 * 1) If L2 is running and the vCPU has a new pending interrupt.  If L1
+	 * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
+	 * VM-Exit to L1.  If L1 doesn't want to exit, the interrupt is injected
+	 * into L2, but KVM doesn't use virtual interrupt delivery to inject
+	 * interrupts into L2, and so KVM_REQ_EVENT is again needed.
+	 *
+	 * 2) If APICv is disabled for this vCPU, assigned devices may still
+	 * attempt to post interrupts.  The posted interrupt vector will cause
+	 * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
+	 */
+	if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
+		vmx_set_rvi(max_irr);
+	else if (got_posted_interrupt)
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+
 	return max_irr;
 }
 
@@ -6749,7 +6711,7 @@
 	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
 	 * save it.
 	 */
-	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+	if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
 		vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
 	x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
@@ -7563,7 +7525,8 @@
 static bool vmx_check_apicv_inhibit_reasons(ulong bit)
 {
 	ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
-			  BIT(APICV_INHIBIT_REASON_HYPERV);
+			  BIT(APICV_INHIBIT_REASON_HYPERV) |
+			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
 
 	return supported & BIT(bit);
 }
@@ -7814,10 +7777,10 @@
 		ple_window_shrink = 0;
 	}
 
-	if (!cpu_has_vmx_apicv()) {
+	if (!cpu_has_vmx_apicv())
 		enable_apicv = 0;
+	if (!enable_apicv)
 		vmx_x86_ops.sync_pir_to_irr = NULL;
-	}
 
 	if (cpu_has_vmx_tsc_scaling()) {
 		kvm_has_tsc_control = true;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index e7db42e..4df2ac2 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -142,6 +142,16 @@
 	struct vmcs12 *cached_shadow_vmcs12;
 
 	/*
+	 * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
+	 */
+	struct gfn_to_hva_cache shadow_vmcs12_cache;
+
+	/*
+	 * GPA to HVA cache for VMCS12
+	 */
+	struct gfn_to_hva_cache vmcs12_cache;
+
+	/*
 	 * Indicates if the shadow vmcs or enlightened vmcs must be updated
 	 * with the data held by struct vmcs12.
 	 */
@@ -400,6 +410,34 @@
 
 void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
 
+/*
+ * Note, early Intel manuals have the write-low and read-high bitmap offsets
+ * the wrong way round.  The bitmaps control MSRs 0x00000000-0x00001fff and
+ * 0xc0000000-0xc0001fff.  The former (low) uses bytes 0-0x3ff for reads and
+ * 0x800-0xbff for writes.  The latter (high) uses 0x400-0x7ff for reads and
+ * 0xc00-0xfff for writes.  MSRs not covered by either of the ranges always
+ * VM-Exit.
+ */
+#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base)      \
+static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap,  \
+						       u32 msr)		       \
+{									       \
+	int f = sizeof(unsigned long);					       \
+									       \
+	if (msr <= 0x1fff)						       \
+		return bitop##_bit(msr, bitmap + base / f);		       \
+	else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))		       \
+		return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
+	return (rtype)true;						       \
+}
+#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop)		       \
+	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read,  0x0)     \
+	__BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
+
+BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
+
 static inline u8 vmx_get_rvi(void)
 {
 	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
@@ -522,4 +560,9 @@
 
 void dump_vmcs(struct kvm_vcpu *vcpu);
 
+static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
+{
+	return (vmx_instr_info >> 28) & 0xf;
+}
+
 #endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c1c4e2b..0ee1a03 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3258,10 +3258,36 @@
 	static_call(kvm_x86_tlb_flush_guest)(vcpu);
 }
 
+
+static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+	++vcpu->stat.tlb_flush;
+	static_call(kvm_x86_tlb_flush_current)(vcpu);
+}
+
+/*
+ * Service "local" TLB flush requests, which are specific to the current MMU
+ * context.  In addition to the generic event handling in vcpu_enter_guest(),
+ * TLB flushes that are targeted at an MMU context also need to be serviced
+ * prior before nested VM-Enter/VM-Exit.
+ */
+void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
+{
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+		kvm_vcpu_flush_tlb_current(vcpu);
+
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
+		kvm_vcpu_flush_tlb_guest(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
+
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
-	struct kvm_host_map map;
-	struct kvm_steal_time *st;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+	struct kvm_steal_time __user *st;
+	struct kvm_memslots *slots;
+	u64 steal;
+	u32 version;
 
 	if (kvm_xen_msr_enabled(vcpu->kvm)) {
 		kvm_xen_runstate_set_running(vcpu);
@@ -3271,47 +3297,86 @@
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
-	/* -EAGAIN is returned in atomic context so we can just return. */
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
-			&map, &vcpu->arch.st.cache, false))
+	if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
 		return;
 
-	st = map.hva +
-		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+	slots = kvm_memslots(vcpu->kvm);
 
+	if (unlikely(slots->generation != ghc->generation ||
+		     kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
+		gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
+
+		/* We rely on the fact that it fits in a single page. */
+		BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
+
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+		    kvm_is_error_hva(ghc->hva) || !ghc->memslot)
+			return;
+	}
+
+	st = (struct kvm_steal_time __user *)ghc->hva;
 	/*
 	 * Doing a TLB flush here, on the guest's behalf, can avoid
 	 * expensive IPIs.
 	 */
 	if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
-		u8 st_preempted = xchg(&st->preempted, 0);
+		u8 st_preempted = 0;
+		int err = -EFAULT;
+
+		if (!user_access_begin(st, sizeof(*st)))
+			return;
+
+		asm volatile("1: xchgb %0, %2\n"
+			     "xor %1, %1\n"
+			     "2:\n"
+			     _ASM_EXTABLE_UA(1b, 2b)
+			     : "+q" (st_preempted),
+			       "+&r" (err),
+			       "+m" (st->preempted));
+		if (err)
+			goto out;
+
+		user_access_end();
+
+		vcpu->arch.st.preempted = 0;
 
 		trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
 				       st_preempted & KVM_VCPU_FLUSH_TLB);
 		if (st_preempted & KVM_VCPU_FLUSH_TLB)
 			kvm_vcpu_flush_tlb_guest(vcpu);
+
+		if (!user_access_begin(st, sizeof(*st)))
+			goto dirty;
 	} else {
-		st->preempted = 0;
+		if (!user_access_begin(st, sizeof(*st)))
+			return;
+
+		unsafe_put_user(0, &st->preempted, out);
+		vcpu->arch.st.preempted = 0;
 	}
 
-	vcpu->arch.st.preempted = 0;
+	unsafe_get_user(version, &st->version, out);
+	if (version & 1)
+		version += 1;  /* first time write, random junk */
 
-	if (st->version & 1)
-		st->version += 1;  /* first time write, random junk */
-
-	st->version += 1;
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
 
 	smp_wmb();
 
-	st->steal += current->sched_info.run_delay -
+	unsafe_get_user(steal, &st->steal, out);
+	steal += current->sched_info.run_delay -
 		vcpu->arch.st.last_steal;
 	vcpu->arch.st.last_steal = current->sched_info.run_delay;
+	unsafe_put_user(steal, &st->steal, out);
 
-	smp_wmb();
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
 
-	st->version += 1;
-
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ out:
+	user_access_end();
+ dirty:
+	mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3517,7 +3582,7 @@
 		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
 			return 1;
 
-		if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
+		if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
 			return 1;
 		break;
 
@@ -4091,6 +4156,7 @@
 	case KVM_CAP_SGX_ATTRIBUTE:
 #endif
 	case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
+	case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
 	case KVM_CAP_SREGS2:
 	case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
 	case KVM_CAP_VCPU_ATTRIBUTES:
@@ -4137,7 +4203,7 @@
 		r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
 		break;
 	case KVM_CAP_NR_VCPUS:
-		r = KVM_SOFT_MAX_VCPUS;
+		r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
@@ -4351,8 +4417,10 @@
 
 static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 {
-	struct kvm_host_map map;
-	struct kvm_steal_time *st;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+	struct kvm_steal_time __user *st;
+	struct kvm_memslots *slots;
+	static const u8 preempted = KVM_VCPU_PREEMPTED;
 
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
@@ -4360,16 +4428,23 @@
 	if (vcpu->arch.st.preempted)
 		return;
 
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
-			&vcpu->arch.st.cache, true))
+	/* This happens on process exit */
+	if (unlikely(current->mm != vcpu->kvm->mm))
 		return;
 
-	st = map.hva +
-		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+	slots = kvm_memslots(vcpu->kvm);
 
-	st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+	if (unlikely(slots->generation != ghc->generation ||
+		     kvm_is_error_hva(ghc->hva) || !ghc->memslot))
+		return;
 
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+	st = (struct kvm_steal_time __user *)ghc->hva;
+	BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
+
+	if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
+		vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+
+	mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -4397,8 +4472,7 @@
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
-	if (vcpu->arch.apicv_active)
-		static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+	static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
 
 	return kvm_apic_get_state(vcpu, s);
 }
@@ -5073,6 +5147,17 @@
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
 
+		/*
+		 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+		 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+		 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
+		 * faults due to reusing SPs/SPTEs.  In practice no sane VMM mucks with
+		 * the core vCPU model on the fly, so fail.
+		 */
+		r = -EINVAL;
+		if (vcpu->arch.last_vmentry_cpu != -1)
+			goto out;
+
 		r = -EFAULT;
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
@@ -5083,6 +5168,14 @@
 		struct kvm_cpuid2 __user *cpuid_arg = argp;
 		struct kvm_cpuid2 cpuid;
 
+		/*
+		 * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
+		 * KVM_SET_CPUID case above.
+		 */
+		r = -EINVAL;
+		if (vcpu->arch.last_vmentry_cpu != -1)
+			goto out;
+
 		r = -EFAULT;
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
@@ -5728,6 +5821,12 @@
 		if (kvm_x86_ops.vm_copy_enc_context_from)
 			r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
 		return r;
+	case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
+		r = -EINVAL;
+		if (kvm_x86_ops.vm_move_enc_context_from)
+			r = kvm_x86_ops.vm_move_enc_context_from(
+				kvm, cap->args[0]);
+		return r;
 	case KVM_CAP_EXIT_HYPERCALL:
 		if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
 			r = -EINVAL;
@@ -7328,7 +7427,9 @@
 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
 			      u32 pmc)
 {
-	return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
+	if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
+		return 0;
+	return -EINVAL;
 }
 
 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -8789,7 +8890,7 @@
 
 	trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
-	op_64_bit = is_64_bit_mode(vcpu);
+	op_64_bit = is_64_bit_hypercall(vcpu);
 	if (!op_64_bit) {
 		nr &= 0xFFFFFFFF;
 		a0 &= 0xFFFFFFFF;
@@ -9469,8 +9570,7 @@
 	if (irqchip_split(vcpu->kvm))
 		kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
 	else {
-		if (vcpu->arch.apicv_active)
-			static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+		static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
 		if (ioapic_in_kernel(vcpu->kvm))
 			kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
@@ -9488,12 +9588,16 @@
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		return;
 
-	if (to_hv_vcpu(vcpu))
+	if (to_hv_vcpu(vcpu)) {
 		bitmap_or((ulong *)eoi_exit_bitmap,
 			  vcpu->arch.ioapic_handled_vectors,
 			  to_hv_synic(vcpu)->vec_bitmap, 256);
+		static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+		return;
+	}
 
-	static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+	static_call(kvm_x86_load_eoi_exitmap)(
+		vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
 }
 
 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
@@ -9552,7 +9656,7 @@
 	}
 
 	if (kvm_request_pending(vcpu)) {
-		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+		if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
 			r = -EIO;
 			goto out;
 		}
@@ -9585,10 +9689,7 @@
 			/* Flushing all ASIDs flushes the current ASID... */
 			kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 		}
-		if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-			kvm_vcpu_flush_tlb_current(vcpu);
-		if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
-			kvm_vcpu_flush_tlb_guest(vcpu);
+		kvm_service_local_tlb_flush_requests(vcpu);
 
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -9739,10 +9840,12 @@
 
 	/*
 	 * This handles the case where a posted interrupt was
-	 * notified with kvm_vcpu_kick.
+	 * notified with kvm_vcpu_kick.  Assigned devices can
+	 * use the POSTED_INTR_VECTOR even if APICv is disabled,
+	 * so do it even if APICv is disabled on this vCPU.
 	 */
-	if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
-		static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+	if (kvm_lapic_enabled(vcpu))
+		static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
 
 	if (kvm_vcpu_exit_request(vcpu)) {
 		vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -9786,8 +9889,8 @@
 		if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
 			break;
 
-		if (vcpu->arch.apicv_active)
-			static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+		if (kvm_lapic_enabled(vcpu))
+			static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
 
 		if (unlikely(kvm_vcpu_exit_request(vcpu))) {
 			exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
@@ -10564,6 +10667,24 @@
 	return ret;
 }
 
+static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
+{
+	bool inhibit = false;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	down_write(&kvm->arch.apicv_update_lock);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
+			inhibit = true;
+			break;
+		}
+	}
+	__kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
+	up_write(&kvm->arch.apicv_update_lock);
+}
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg)
 {
@@ -10616,6 +10737,8 @@
 
 	static_call(kvm_x86_update_exception_bitmap)(vcpu);
 
+	kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
+
 	r = 0;
 
 out:
@@ -10859,11 +10982,8 @@
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-	struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
 	int idx;
 
-	kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
 	kvmclock_reset(vcpu);
 
 	static_call(kvm_x86_vcpu_free)(vcpu);
@@ -12275,7 +12395,8 @@
 		return kvm_skip_emulated_instruction(vcpu);
 
 	default:
-		BUG(); /* We have already checked above that type <= 3 */
+		kvm_inject_gp(vcpu, 0);
+		return 1;
 	}
 }
 EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index ea264c4..4abcd8d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -103,6 +103,7 @@
 
 #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
 
+void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
 int kvm_check_nested_events(struct kvm_vcpu *vcpu);
 
 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@@ -153,12 +154,24 @@
 {
 	int cs_db, cs_l;
 
+	WARN_ON_ONCE(vcpu->arch.guest_state_protected);
+
 	if (!is_long_mode(vcpu))
 		return false;
 	static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
 	return cs_l;
 }
 
+static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * If running with protected guest state, the CS register is not
+	 * accessible. The hypercall register values will have had to been
+	 * provided in 64-bit mode, so assume the guest is in 64-bit.
+	 */
+	return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
+}
+
 static inline bool x86_exception_has_error_code(unsigned int vector)
 {
 	static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
@@ -173,12 +186,6 @@
 	return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
 }
 
-static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
-{
-	++vcpu->stat.tlb_flush;
-	static_call(kvm_x86_tlb_flush_current)(vcpu);
-}
-
 static inline int is_pae(struct kvm_vcpu *vcpu)
 {
 	return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 8f62bae..dff2bdf 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -127,9 +127,9 @@
 	state_entry_time = vx->runstate_entry_time;
 	state_entry_time |= XEN_RUNSTATE_UPDATE;
 
-	BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
 		     sizeof(state_entry_time));
-	BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
 		     sizeof(state_entry_time));
 
 	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -144,9 +144,9 @@
 	 */
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
 		     offsetof(struct compat_vcpu_runstate_info, state));
-	BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
 		     sizeof(vx->current_runstate));
-	BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
 		     sizeof(vx->current_runstate));
 
 	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -163,9 +163,9 @@
 		     offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
 	BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
 		     offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
-	BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
-		     sizeof(((struct compat_vcpu_runstate_info *)0)->time));
-	BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
+		     sizeof_field(struct compat_vcpu_runstate_info, time));
+	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
 		     sizeof(vx->runstate_times));
 
 	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -205,9 +205,9 @@
 	BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
 		     offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
 	BUILD_BUG_ON(sizeof(rc) !=
-		     sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+		     sizeof_field(struct vcpu_info, evtchn_upcall_pending));
 	BUILD_BUG_ON(sizeof(rc) !=
-		     sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+		     sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
 
 	/*
 	 * For efficiency, this mirrors the checks for using the valid
@@ -299,7 +299,7 @@
 		break;
 
 	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-		data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
+		data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
 		r = 0;
 		break;
 
@@ -698,7 +698,7 @@
 	    kvm_hv_hypercall_enabled(vcpu))
 		return kvm_hv_hypercall(vcpu);
 
-	longmode = is_64_bit_mode(vcpu);
+	longmode = is_64_bit_hypercall(vcpu);
 	if (!longmode) {
 		params[0] = (u32)kvm_rbx_read(vcpu);
 		params[1] = (u32)kvm_rcx_read(vcpu);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 23d54b8..3548730 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -229,28 +229,75 @@
 	swiotlb_adjust_size(size);
 }
 
+static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
+{
+	unsigned long pfn = 0;
+	pgprot_t prot;
+
+	switch (level) {
+	case PG_LEVEL_4K:
+		pfn = pte_pfn(*kpte);
+		prot = pte_pgprot(*kpte);
+		break;
+	case PG_LEVEL_2M:
+		pfn = pmd_pfn(*(pmd_t *)kpte);
+		prot = pmd_pgprot(*(pmd_t *)kpte);
+		break;
+	case PG_LEVEL_1G:
+		pfn = pud_pfn(*(pud_t *)kpte);
+		prot = pud_pgprot(*(pud_t *)kpte);
+		break;
+	default:
+		WARN_ONCE(1, "Invalid level for kpte\n");
+		return 0;
+	}
+
+	if (ret_prot)
+		*ret_prot = prot;
+
+	return pfn;
+}
+
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+{
+#ifdef CONFIG_PARAVIRT
+	unsigned long sz = npages << PAGE_SHIFT;
+	unsigned long vaddr_end = vaddr + sz;
+
+	while (vaddr < vaddr_end) {
+		int psize, pmask, level;
+		unsigned long pfn;
+		pte_t *kpte;
+
+		kpte = lookup_address(vaddr, &level);
+		if (!kpte || pte_none(*kpte)) {
+			WARN_ONCE(1, "kpte lookup for vaddr\n");
+			return;
+		}
+
+		pfn = pg_level_to_pfn(level, kpte, NULL);
+		if (!pfn)
+			continue;
+
+		psize = page_level_size(level);
+		pmask = page_level_mask(level);
+
+		notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
+
+		vaddr = (vaddr & pmask) + psize;
+	}
+#endif
+}
+
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
 	pgprot_t old_prot, new_prot;
 	unsigned long pfn, pa, size;
 	pte_t new_pte;
 
-	switch (level) {
-	case PG_LEVEL_4K:
-		pfn = pte_pfn(*kpte);
-		old_prot = pte_pgprot(*kpte);
-		break;
-	case PG_LEVEL_2M:
-		pfn = pmd_pfn(*(pmd_t *)kpte);
-		old_prot = pmd_pgprot(*(pmd_t *)kpte);
-		break;
-	case PG_LEVEL_1G:
-		pfn = pud_pfn(*(pud_t *)kpte);
-		old_prot = pud_pgprot(*(pud_t *)kpte);
-		break;
-	default:
+	pfn = pg_level_to_pfn(level, kpte, &old_prot);
+	if (!pfn)
 		return;
-	}
 
 	new_prot = old_prot;
 	if (enc)
@@ -286,12 +333,13 @@
 static int __init early_set_memory_enc_dec(unsigned long vaddr,
 					   unsigned long size, bool enc)
 {
-	unsigned long vaddr_end, vaddr_next;
+	unsigned long vaddr_end, vaddr_next, start;
 	unsigned long psize, pmask;
 	int split_page_size_mask;
 	int level, ret;
 	pte_t *kpte;
 
+	start = vaddr;
 	vaddr_next = vaddr;
 	vaddr_end = vaddr + size;
 
@@ -346,6 +394,7 @@
 
 	ret = 0;
 
+	notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
 out:
 	__flush_tlb_all();
 	return ret;
@@ -361,6 +410,11 @@
 	return early_set_memory_enc_dec(vaddr, size, true);
 }
 
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+{
+	notify_range_enc_status_changed(vaddr, npages, enc);
+}
+
 /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
 bool force_dma_unencrypted(struct device *dev)
 {
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 934dc5b..b407211 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2023,6 +2023,12 @@
 	 */
 	cpa_flush(&cpa, 0);
 
+	/*
+	 * Notify hypervisor that a given memory range is mapped encrypted
+	 * or decrypted.
+	 */
+	notify_range_enc_status_changed(addr, numpages, enc);
+
 	return ret;
 }
 
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 9e55bcb..6a8f3b5 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -225,7 +225,6 @@
 static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned cpu;
-	unsigned int i;
 
 	if (skip_ioapic_setup) {
 		char *m = (max_cpus == 0) ?
@@ -238,16 +237,9 @@
 	}
 	xen_init_lock_cpu(0);
 
-	smp_store_boot_cpu_info();
-	cpu_data(0).x86_max_cores = 1;
+	smp_prepare_cpus_common();
 
-	for_each_possible_cpu(i) {
-		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
-		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
-		zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
-		zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
-	}
-	set_cpu_sibling_map(0);
+	cpu_data(0).x86_max_cores = 1;
 
 	speculative_store_bypass_ht_init();
 
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index a8a0416..7b43593 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -121,7 +121,6 @@
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *);
 
 void local_flush_cache_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end);
@@ -138,9 +137,7 @@
 #define flush_cache_vunmap(start,end)			do { } while (0)
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
 #define flush_dcache_page(page)				do { } while (0)
-static inline void flush_dcache_folio(struct folio *folio) { }
 
 #define flush_icache_range local_flush_icache_range
 #define flush_cache_page(vma, addr, pfn)		do { } while (0)
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 104b327..3e3e1a5 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -419,3 +419,4 @@
 446	common	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
diff --git a/block/bdev.c b/block/bdev.c
index b4dab2f..b1d087e 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -753,8 +753,7 @@
 
 	if (!bdev)
 		return NULL;
-	if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
-	    !try_module_get(bdev->bd_disk->fops->owner)) {
+	if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) {
 		put_device(&bdev->bd_device);
 		return NULL;
 	}
@@ -764,7 +763,6 @@
 
 void blkdev_put_no_open(struct block_device *bdev)
 {
-	module_put(bdev->bd_disk->fops->owner);
 	put_device(&bdev->bd_device);
 }
 
@@ -820,12 +818,14 @@
 	ret = -ENXIO;
 	if (!disk_live(disk))
 		goto abort_claiming;
+	if (!try_module_get(disk->fops->owner))
+		goto abort_claiming;
 	if (bdev_is_partition(bdev))
 		ret = blkdev_get_part(bdev, mode);
 	else
 		ret = blkdev_get_whole(bdev, mode);
 	if (ret)
-		goto abort_claiming;
+		goto put_module;
 	if (mode & FMODE_EXCL) {
 		bd_finish_claiming(bdev, holder);
 
@@ -847,7 +847,8 @@
 	if (unblock_events)
 		disk_unblock_events(disk);
 	return bdev;
-
+put_module:
+	module_put(disk->fops->owner);
 abort_claiming:
 	if (mode & FMODE_EXCL)
 		bd_abort_claiming(bdev, holder);
@@ -956,6 +957,7 @@
 		blkdev_put_whole(bdev, mode);
 	mutex_unlock(&disk->open_mutex);
 
+	module_put(disk->fops->owner);
 	blkdev_put_no_open(bdev);
 }
 EXPORT_SYMBOL(blkdev_put);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 88b1fce..663aabf 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -640,7 +640,7 @@
 	 */
 	ret = blk_queue_enter(q, 0);
 	if (ret)
-		return ret;
+		goto fail;
 
 	rcu_read_lock();
 	spin_lock_irq(&q->queue_lock);
@@ -676,13 +676,13 @@
 		new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
 		if (unlikely(!new_blkg)) {
 			ret = -ENOMEM;
-			goto fail;
+			goto fail_exit_queue;
 		}
 
 		if (radix_tree_preload(GFP_KERNEL)) {
 			blkg_free(new_blkg);
 			ret = -ENOMEM;
-			goto fail;
+			goto fail_exit_queue;
 		}
 
 		rcu_read_lock();
@@ -722,9 +722,10 @@
 fail_unlock:
 	spin_unlock_irq(&q->queue_lock);
 	rcu_read_unlock();
+fail_exit_queue:
+	blk_queue_exit(q);
 fail:
 	blkdev_put_no_open(bdev);
-	blk_queue_exit(q);
 	/*
 	 * If queue was bypassing, we should retry.  Do so after a
 	 * short msleep().  It isn't strictly necessary but queue
diff --git a/block/blk-core.c b/block/blk-core.c
index b043de2..1378d08 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -363,8 +363,10 @@
 	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
 
 	blk_sync_queue(q);
-	if (queue_is_mq(q))
+	if (queue_is_mq(q)) {
+		blk_mq_cancel_work_sync(q);
 		blk_mq_exit_queue(q);
+	}
 
 	/*
 	 * In theory, request pool of sched_tags belongs to request queue.
@@ -809,10 +811,8 @@
 	if (unlikely(!current->io_context))
 		create_task_io_context(current, GFP_ATOMIC, q->node);
 
-	if (blk_throtl_bio(bio)) {
-		blkcg_bio_issue_init(bio);
+	if (blk_throtl_bio(bio))
 		return false;
-	}
 
 	blk_cgroup_bio_start(bio);
 	blkcg_bio_issue_init(bio);
@@ -1017,6 +1017,7 @@
 /**
  * bio_poll - poll for BIO completions
  * @bio: bio to poll for
+ * @iob: batches of IO
  * @flags: BLK_POLL_* flags that control the behavior
  *
  * Poll for completions on queue associated with the bio. Returns number of
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8e364bd..1fce6d1 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -379,7 +379,7 @@
  * @rq is being submitted.  Analyze what needs to be done and put it on the
  * right queue.
  */
-bool blk_insert_flush(struct request *rq)
+void blk_insert_flush(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	unsigned long fflags = q->queue_flags;	/* may change, cache */
@@ -409,7 +409,7 @@
 	 */
 	if (!policy) {
 		blk_mq_end_request(rq, 0);
-		return true;
+		return;
 	}
 
 	BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
@@ -420,8 +420,10 @@
 	 * for normal execution.
 	 */
 	if ((policy & REQ_FSEQ_DATA) &&
-	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH)))
-		return false;
+	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
+		blk_mq_request_bypass_insert(rq, false, true);
+		return;
+	}
 
 	/*
 	 * @rq should go through flush machinery.  Mark it part of flush
@@ -437,8 +439,6 @@
 	spin_lock_irq(&fq->mq_flush_lock);
 	blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
 	spin_unlock_irq(&fq->mq_flush_lock);
-
-	return true;
 }
 
 /**
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
index c246c42..b925f3d 100644
--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -104,8 +104,8 @@
 };
 
 /**
- * disk_register_ia_ranges - register with sysfs a set of independent
- *			    access ranges
+ * disk_register_independent_access_ranges - register with sysfs a set of
+ *		independent access ranges
  * @disk:	Target disk
  * @new_iars:	New set of independent access ranges
  *
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 4be652f..ba214494 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -370,9 +370,6 @@
 	bool ret = false;
 	enum hctx_type type;
 
-	if (bio_queue_enter(bio))
-		return false;
-
 	if (e && e->type->ops.bio_merge) {
 		ret = e->type->ops.bio_merge(q, bio, nr_segs);
 		goto out_put;
@@ -397,7 +394,6 @@
 
 	spin_unlock(&ctx->lock);
 out_put:
-	blk_queue_exit(q);
 	return ret;
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 629cf42..8874a63 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -860,13 +860,14 @@
 		if (iob->need_ts)
 			__blk_mq_end_request_acct(rq, now);
 
+		rq_qos_done(rq->q, rq);
+
 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
 		if (!refcount_dec_and_test(&rq->ref))
 			continue;
 
 		blk_crypto_free_request(rq);
 		blk_pm_mark_last_busy(rq);
-		rq_qos_done(rq->q, rq);
 
 		if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) {
 			if (cur_hctx)
@@ -2495,8 +2496,9 @@
 	return BLK_MAX_REQUEST_COUNT;
 }
 
-static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio,
-				  unsigned int nr_segs, bool *same_queue_rq)
+static bool blk_mq_attempt_bio_merge(struct request_queue *q,
+				     struct bio *bio, unsigned int nr_segs,
+				     bool *same_queue_rq)
 {
 	if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
 		if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq))
@@ -2520,12 +2522,8 @@
 	};
 	struct request *rq;
 
-	if (unlikely(bio_queue_enter(bio)))
+	if (blk_mq_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
 		return NULL;
-	if (unlikely(!submit_bio_checks(bio)))
-		goto put_exit;
-	if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
-		goto put_exit;
 
 	rq_qos_throttle(q, bio);
 
@@ -2542,26 +2540,42 @@
 	rq_qos_cleanup(q, bio);
 	if (bio->bi_opf & REQ_NOWAIT)
 		bio_wouldblock_error(bio);
-put_exit:
-	blk_queue_exit(q);
+
 	return NULL;
 }
 
+static inline bool blk_mq_can_use_cached_rq(struct request *rq, struct bio *bio)
+{
+	if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
+		return false;
+
+	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+		return false;
+
+	return true;
+}
+
 static inline struct request *blk_mq_get_request(struct request_queue *q,
 						 struct blk_plug *plug,
 						 struct bio *bio,
 						 unsigned int nsegs,
 						 bool *same_queue_rq)
 {
-	if (plug) {
-		struct request *rq;
+	struct request *rq;
+	bool checked = false;
 
+	if (plug) {
 		rq = rq_list_peek(&plug->cached_rq);
 		if (rq && rq->q == q) {
 			if (unlikely(!submit_bio_checks(bio)))
 				return NULL;
-			if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
+			if (blk_mq_attempt_bio_merge(q, bio, nsegs,
+						same_queue_rq))
 				return NULL;
+			checked = true;
+			if (!blk_mq_can_use_cached_rq(rq, bio))
+				goto fallback;
+			rq->cmd_flags = bio->bi_opf;
 			plug->cached_rq = rq_list_next(rq);
 			INIT_LIST_HEAD(&rq->queuelist);
 			rq_qos_throttle(q, bio);
@@ -2569,7 +2583,17 @@
 		}
 	}
 
-	return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+fallback:
+	if (unlikely(bio_queue_enter(bio)))
+		return NULL;
+	if (unlikely(!checked && !submit_bio_checks(bio)))
+		goto out_put;
+	rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+	if (rq)
+		return rq;
+out_put:
+	blk_queue_exit(q);
+	return NULL;
 }
 
 /**
@@ -2624,8 +2648,10 @@
 		return;
 	}
 
-	if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
+	if (op_is_flush(bio->bi_opf)) {
+		blk_insert_flush(rq);
 		return;
+	}
 
 	if (plug && (q->nr_hw_queues == 1 ||
 	    blk_mq_is_shared_tags(rq->mq_hctx->flags) ||
@@ -4394,6 +4420,19 @@
 }
 EXPORT_SYMBOL(blk_mq_rq_cpu);
 
+void blk_mq_cancel_work_sync(struct request_queue *q)
+{
+	if (queue_is_mq(q)) {
+		struct blk_mq_hw_ctx *hctx;
+		int i;
+
+		cancel_delayed_work_sync(&q->requeue_work);
+
+		queue_for_each_hw_ctx(q, hctx, i)
+			cancel_delayed_work_sync(&hctx->run_work);
+	}
+}
+
 static int __init blk_mq_init(void)
 {
 	int i;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cb0b548..afcf993 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -89,15 +89,7 @@
 	return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
 }
 
-/*
- * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
- * @q: request queue
- * @flags: request command flags
- * @ctx: software queue cpu ctx
- */
-static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
-						     unsigned int flags,
-						     struct blk_mq_ctx *ctx)
+static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
 {
 	enum hctx_type type = HCTX_TYPE_DEFAULT;
 
@@ -108,8 +100,20 @@
 		type = HCTX_TYPE_POLL;
 	else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
 		type = HCTX_TYPE_READ;
-	
-	return ctx->hctxs[type];
+	return type;
+}
+
+/*
+ * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
+ * @q: request queue
+ * @flags: request command flags
+ * @ctx: software queue cpu ctx
+ */
+static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
+						     unsigned int flags,
+						     struct blk_mq_ctx *ctx)
+{
+	return ctx->hctxs[blk_mq_get_hctx_type(flags)];
 }
 
 /*
@@ -124,6 +128,8 @@
 void blk_mq_free_plug_rqs(struct blk_plug *plug);
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
+void blk_mq_cancel_work_sync(struct request_queue *q);
+
 void blk_mq_release(struct request_queue *q);
 
 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
@@ -149,7 +155,7 @@
 	blk_mq_req_flags_t flags;
 	unsigned int shallow_depth;
 	unsigned int cmd_flags;
-	unsigned int rq_flags;
+	req_flags_t rq_flags;
 
 	/* allocate multiple requests/tags in one go */
 	unsigned int nr_tags;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cef1f71..cd75b0f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -791,16 +791,6 @@
 
 	blk_free_queue_stats(q->stats);
 
-	if (queue_is_mq(q)) {
-		struct blk_mq_hw_ctx *hctx;
-		int i;
-
-		cancel_delayed_work_sync(&q->requeue_work);
-
-		queue_for_each_hw_ctx(q, hctx, i)
-			cancel_delayed_work_sync(&hctx->run_work);
-	}
-
 	blk_exit_queue(q);
 
 	blk_queue_free_zone_bitmaps(q);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 1d0c76c..774ecc5 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -429,9 +429,10 @@
 		op = REQ_OP_ZONE_RESET;
 
 		/* Invalidate the page cache, including dirty pages. */
+		filemap_invalidate_lock(bdev->bd_inode->i_mapping);
 		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
 		if (ret)
-			return ret;
+			goto fail;
 		break;
 	case BLKOPENZONE:
 		op = REQ_OP_ZONE_OPEN;
@@ -449,15 +450,9 @@
 	ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
 			       GFP_KERNEL);
 
-	/*
-	 * Invalidate the page cache again for zone reset: writes can only be
-	 * direct for zoned devices so concurrent writes would not add any page
-	 * to the page cache after/during reset. The page cache may be filled
-	 * again due to concurrent reads though and dropping the pages for
-	 * these is fine.
-	 */
-	if (!ret && cmd == BLKRESETZONE)
-		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+fail:
+	if (cmd == BLKRESETZONE)
+		filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
 
 	return ret;
 }
diff --git a/block/blk.h b/block/blk.h
index b4fed20..ccde6e6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -271,7 +271,7 @@
  */
 #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
 
-bool blk_insert_flush(struct request *rq);
+void blk_insert_flush(struct request *rq);
 
 int elevator_switch_mq(struct request_queue *q,
 			      struct elevator_type *new_e);
diff --git a/block/elevator.c b/block/elevator.c
index 1f39f6e..19a78d5 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -694,12 +694,18 @@
 	if (!e)
 		return;
 
+	/*
+	 * We are called before adding disk, when there isn't any FS I/O,
+	 * so freezing queue plus canceling dispatch work is enough to
+	 * drain any dispatch activities originated from passthrough
+	 * requests, then no need to quiesce queue which may add long boot
+	 * latency, especially when lots of disks are involved.
+	 */
 	blk_mq_freeze_queue(q);
-	blk_mq_quiesce_queue(q);
+	blk_mq_cancel_work_sync(q);
 
 	err = blk_mq_init_sched(q, e);
 
-	blk_mq_unquiesce_queue(q);
 	blk_mq_unfreeze_queue(q);
 
 	if (err) {
diff --git a/block/genhd.c b/block/genhd.c
index ca2fbab..30362ae 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -394,8 +394,8 @@
  * This function registers the partitioning information in @disk
  * with the kernel.
  */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-		     const struct attribute_group **groups)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups)
 
 {
 	struct device *ddev = disk_to_dev(disk);
@@ -544,7 +544,7 @@
 out_free_ext_minor:
 	if (disk->major == BLOCK_EXT_MAJOR)
 		blk_free_ext_minor(disk->first_minor);
-	return WARN_ON_ONCE(ret); /* keep until all callers handle errors */
+	return ret;
 }
 EXPORT_SYMBOL(device_add_disk);
 
@@ -1111,6 +1111,8 @@
 	might_sleep();
 	WARN_ON_ONCE(disk_live(disk));
 
+	blk_mq_cancel_work_sync(disk->queue);
+
 	disk_release_events(disk);
 	kfree(disk->random);
 	xa_destroy(&disk->part_tbl);
diff --git a/block/ioctl.c b/block/ioctl.c
index d6af0ac..0a1d10a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -113,6 +113,7 @@
 	uint64_t range[2];
 	uint64_t start, len;
 	struct request_queue *q = bdev_get_queue(bdev);
+	struct inode *inode = bdev->bd_inode;
 	int err;
 
 	if (!(mode & FMODE_WRITE))
@@ -135,12 +136,17 @@
 	if (start + len > bdev_nr_bytes(bdev))
 		return -EINVAL;
 
+	filemap_invalidate_lock(inode->i_mapping);
 	err = truncate_bdev_range(bdev, mode, start, start + len - 1);
 	if (err)
-		return err;
+		goto fail;
 
-	return blkdev_issue_discard(bdev, start >> 9, len >> 9,
-				    GFP_KERNEL, flags);
+	err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
+				   GFP_KERNEL, flags);
+
+fail:
+	filemap_invalidate_unlock(inode->i_mapping);
+	return err;
 }
 
 static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
@@ -148,6 +154,7 @@
 {
 	uint64_t range[2];
 	uint64_t start, end, len;
+	struct inode *inode = bdev->bd_inode;
 	int err;
 
 	if (!(mode & FMODE_WRITE))
@@ -170,12 +177,17 @@
 		return -EINVAL;
 
 	/* Invalidate the page cache, including dirty pages */
+	filemap_invalidate_lock(inode->i_mapping);
 	err = truncate_bdev_range(bdev, mode, start, end);
 	if (err)
-		return err;
+		goto fail;
 
-	return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
-			BLKDEV_ZERO_NOUNMAP);
+	err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
+				   BLKDEV_ZERO_NOUNMAP);
+
+fail:
+	filemap_invalidate_unlock(inode->i_mapping);
+	return err;
 }
 
 static int put_ushort(unsigned short __user *argp, unsigned short val)
diff --git a/block/ioprio.c b/block/ioprio.c
index 0e4ff24..313c14a 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -69,7 +69,14 @@
 
 	switch (class) {
 		case IOPRIO_CLASS_RT:
-			if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
+			/*
+			 * Originally this only checked for CAP_SYS_ADMIN,
+			 * which was implicitly allowed for pid 0 by security
+			 * modules such as SELinux. Make sure we check
+			 * CAP_SYS_ADMIN first to avoid a denial/avc for
+			 * possibly missing CAP_SYS_NICE permission.
+			 */
+			if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
 				return -EPERM;
 			fallthrough;
 			/* rt has prio field too */
diff --git a/crypto/algapi.c b/crypto/algapi.c
index d379fd9..a366cb3 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -284,6 +284,8 @@
 
 	if (larval)
 		list_add(&larval->alg.cra_list, &crypto_alg_list);
+	else
+		alg->cra_flags |= CRYPTO_ALG_TESTED;
 
 	crypto_stats_init(alg);
 
diff --git a/crypto/zstd.c b/crypto/zstd.c
index 1a3309f..154a969 100644
--- a/crypto/zstd.c
+++ b/crypto/zstd.c
@@ -18,22 +18,22 @@
 #define ZSTD_DEF_LEVEL	3
 
 struct zstd_ctx {
-	ZSTD_CCtx *cctx;
-	ZSTD_DCtx *dctx;
+	zstd_cctx *cctx;
+	zstd_dctx *dctx;
 	void *cwksp;
 	void *dwksp;
 };
 
-static ZSTD_parameters zstd_params(void)
+static zstd_parameters zstd_params(void)
 {
-	return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
+	return zstd_get_params(ZSTD_DEF_LEVEL, 0);
 }
 
 static int zstd_comp_init(struct zstd_ctx *ctx)
 {
 	int ret = 0;
-	const ZSTD_parameters params = zstd_params();
-	const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
+	const zstd_parameters params = zstd_params();
+	const size_t wksp_size = zstd_cctx_workspace_bound(&params.cParams);
 
 	ctx->cwksp = vzalloc(wksp_size);
 	if (!ctx->cwksp) {
@@ -41,7 +41,7 @@
 		goto out;
 	}
 
-	ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
+	ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size);
 	if (!ctx->cctx) {
 		ret = -EINVAL;
 		goto out_free;
@@ -56,7 +56,7 @@
 static int zstd_decomp_init(struct zstd_ctx *ctx)
 {
 	int ret = 0;
-	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+	const size_t wksp_size = zstd_dctx_workspace_bound();
 
 	ctx->dwksp = vzalloc(wksp_size);
 	if (!ctx->dwksp) {
@@ -64,7 +64,7 @@
 		goto out;
 	}
 
-	ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
+	ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size);
 	if (!ctx->dctx) {
 		ret = -EINVAL;
 		goto out_free;
@@ -152,10 +152,10 @@
 {
 	size_t out_len;
 	struct zstd_ctx *zctx = ctx;
-	const ZSTD_parameters params = zstd_params();
+	const zstd_parameters params = zstd_params();
 
-	out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
-	if (ZSTD_isError(out_len))
+	out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, &params);
+	if (zstd_is_error(out_len))
 		return -EINVAL;
 	*dlen = out_len;
 	return 0;
@@ -182,8 +182,8 @@
 	size_t out_len;
 	struct zstd_ctx *zctx = ctx;
 
-	out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
-	if (ZSTD_isError(out_len))
+	out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen);
+	if (zstd_is_error(out_len))
 		return -EINVAL;
 	*dlen = out_len;
 	return 0;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index a85c351..b62c87b 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -998,7 +998,14 @@
 static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
 {
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
-	struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx];
+	struct cpc_register_resource *reg;
+
+	if (!cpc_desc) {
+		pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
+		return -ENODEV;
+	}
+
+	reg = &cpc_desc->cpc_regs[reg_idx];
 
 	if (CPC_IN_PCC(reg)) {
 		int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 7cd0009..ef10480 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -347,28 +347,3 @@
 
 	acpi_unbind_one(dev);
 }
-
-int acpi_dev_turn_off_if_unused(struct device *dev, void *not_used)
-{
-	struct acpi_device *adev = to_acpi_device(dev);
-
-	/*
-	 * Skip device objects with device IDs, because they may be in use even
-	 * if they are not companions of any physical device objects.
-	 */
-	if (adev->pnp.type.hardware_id)
-		return 0;
-
-	mutex_lock(&adev->physical_node_lock);
-
-	/*
-	 * Device objects without device IDs are not in use if they have no
-	 * corresponding physical device objects.
-	 */
-	if (list_empty(&adev->physical_node_list))
-		acpi_device_set_power(adev, ACPI_STATE_D3_COLD);
-
-	mutex_unlock(&adev->physical_node_lock);
-
-	return 0;
-}
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 8fbdc17..d91b560 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -117,7 +117,6 @@
 bool acpi_device_is_first_physical_node(struct acpi_device *adev,
 					const struct device *dev);
 int acpi_bus_register_early_device(int type);
-int acpi_dev_turn_off_if_unused(struct device *dev, void *not_used);
 
 /* --------------------------------------------------------------------------
                      Device Matching and Notification
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index e312eba..2366f54 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1084,21 +1084,17 @@
  * Returns parent node of an ACPI device or data firmware node or %NULL if
  * not available.
  */
-struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode)
+static struct fwnode_handle *
+acpi_node_get_parent(const struct fwnode_handle *fwnode)
 {
 	if (is_acpi_data_node(fwnode)) {
 		/* All data nodes have parent pointer so just return that */
 		return to_acpi_data_node(fwnode)->parent;
 	} else if (is_acpi_device_node(fwnode)) {
-		acpi_handle handle, parent_handle;
+		struct device *dev = to_acpi_device_node(fwnode)->dev.parent;
 
-		handle = to_acpi_device_node(fwnode)->handle;
-		if (ACPI_SUCCESS(acpi_get_parent(handle, &parent_handle))) {
-			struct acpi_device *adev;
-
-			if (!acpi_bus_get_device(parent_handle, &adev))
-				return acpi_fwnode_handle(adev);
-		}
+		if (dev)
+			return acpi_fwnode_handle(to_acpi_device(dev));
 	}
 
 	return NULL;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index a50f196..2c80765 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2564,12 +2564,6 @@
 		}
 	}
 
-	/*
-	 * Make sure that power management resources are not blocked by ACPI
-	 * device objects with no users.
-	 */
-	bus_for_each_dev(&acpi_bus_type, NULL, NULL, acpi_dev_turn_off_if_unused);
-
 	acpi_turn_off_unused_power_resources();
 
 	acpi_scan_initialized = true;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 49fb741..cffbe57 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2710,7 +2710,7 @@
 		t->from = thread;
 	else
 		t->from = NULL;
-	t->sender_euid = proc->cred->euid;
+	t->sender_euid = task_euid(proc->tsk);
 	t->to_proc = target_proc;
 	t->to_thread = target_thread;
 	t->code = tr->code;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index d60f347..1e1167e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -438,6 +438,7 @@
 	/* AMD */
 	{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
 	{ PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */
+	{ PCI_VDEVICE(AMD, 0x7901), board_ahci_mobile }, /* AMD Green Sardine */
 	/* AMD is using RAID class only for ahci controllers */
 	{ PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
 	  PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 28430c0..f76b841 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -131,7 +131,7 @@
 };
 EXPORT_SYMBOL_GPL(ahci_shost_groups);
 
-struct attribute *ahci_sdev_attrs[] = {
+static struct attribute *ahci_sdev_attrs[] = {
 	&dev_attr_sw_activity.attr,
 	&dev_attr_unload_heads.attr,
 	&dev_attr_ncq_prio_supported.attr,
@@ -2323,6 +2323,18 @@
 EXPORT_SYMBOL_GPL(ahci_port_resume);
 
 #ifdef CONFIG_PM
+static void ahci_handle_s2idle(struct ata_port *ap)
+{
+	void __iomem *port_mmio = ahci_port_base(ap);
+	u32 devslp;
+
+	if (pm_suspend_via_firmware())
+		return;
+	devslp = readl(port_mmio + PORT_DEVSLP);
+	if ((devslp & PORT_DEVSLP_ADSE))
+		ata_msleep(ap, devslp_idle_timeout);
+}
+
 static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg)
 {
 	const char *emsg = NULL;
@@ -2336,6 +2348,9 @@
 		ata_port_freeze(ap);
 	}
 
+	if (acpi_storage_d3(ap->host->dev))
+		ahci_handle_s2idle(ap);
+
 	ahci_rpm_put_port(ap);
 	return rc;
 }
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3018ca8..59ad8c9 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2031,8 +2031,9 @@
 			dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
 			goto retry;
 		}
-		ata_dev_err(dev, "Read log page 0x%02x failed, Emask 0x%x\n",
-			    (unsigned int)page, err_mask);
+		ata_dev_err(dev,
+			    "Read log 0x%02x page 0x%02x failed, Emask 0x%x\n",
+			    (unsigned int)log, (unsigned int)page, err_mask);
 	}
 
 	return err_mask;
@@ -2052,8 +2053,19 @@
 	struct ata_port *ap = dev->link->ap;
 	unsigned int err, i;
 
+	if (dev->horkage & ATA_HORKAGE_NO_ID_DEV_LOG)
+		return false;
+
 	if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE)) {
-		ata_dev_warn(dev, "ATA Identify Device Log not supported\n");
+		/*
+		 * IDENTIFY DEVICE data log is defined as mandatory starting
+		 * with ACS-3 (ATA version 10). Warn about the missing log
+		 * for drives which implement this ATA level or above.
+		 */
+		if (ata_id_major_version(dev->id) >= 10)
+			ata_dev_warn(dev,
+				"ATA Identify Device Log not supported\n");
+		dev->horkage |= ATA_HORKAGE_NO_ID_DEV_LOG;
 		return false;
 	}
 
@@ -2166,6 +2178,9 @@
 	struct ata_port *ap = dev->link->ap;
 	unsigned int err_mask;
 
+	if (!ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
+		return;
+
 	err_mask = ata_read_log_page(dev,
 				     ATA_LOG_IDENTIFY_DEVICE,
 				     ATA_LOG_SATA_SETTINGS,
@@ -2442,7 +2457,8 @@
 	 * Check device sleep capability. Get DevSlp timing variables
 	 * from SATA Settings page of Identify Device Data Log.
 	 */
-	if (!ata_id_has_devslp(dev->id))
+	if (!ata_id_has_devslp(dev->id) ||
+	    !ata_identify_page_supported(dev, ATA_LOG_SATA_SETTINGS))
 		return;
 
 	err_mask = ata_read_log_page(dev,
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index bf9c4b6..1d4a6f1 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -93,6 +93,12 @@
 	ULONG_MAX,
 };
 
+static const unsigned long ata_eh_revalidate_timeouts[] = {
+	15000,	/* Some drives are slow to read log pages when waking-up */
+	15000,  /* combined time till here is enough even for media access */
+	ULONG_MAX,
+};
+
 static const unsigned long ata_eh_flush_timeouts[] = {
 	15000,	/* be generous with flush */
 	15000,  /* ditto */
@@ -129,6 +135,8 @@
 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
 	{ .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
 	  .timeouts = ata_eh_identify_timeouts, },
+	{ .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT),
+	  .timeouts = ata_eh_revalidate_timeouts, },
 	{ .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
 	  .timeouts = ata_eh_other_timeouts, },
 	{ .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index 4e88597..5b78e86 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -922,7 +922,7 @@
 	    ata_ncq_prio_enable_show, ata_ncq_prio_enable_store);
 EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
 
-struct attribute *ata_ncq_sdev_attrs[] = {
+static struct attribute *ata_ncq_sdev_attrs[] = {
 	&dev_attr_unload_heads.attr,
 	&dev_attr_ncq_prio_enable.attr,
 	&dev_attr_ncq_prio_supported.attr,
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index 8440203..b29d3f1 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -469,10 +469,8 @@
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "no irq\n");
+	if (irq < 0)
 		return irq;
-	}
 	if (!irq)
 		return -EINVAL;
 
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 981e72a..ff16a36 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -677,6 +677,8 @@
 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
 	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
 		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
+	for_each_cpu(sibling, topology_cluster_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_cluster_cpumask(sibling));
 	for_each_cpu(sibling, topology_llc_cpumask(cpu))
 		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 97bf051..6ae3877 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -316,7 +316,7 @@
 	struct request *req = bd->rq;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
-	unsigned int num;
+	int num;
 	int qid = hctx->queue_num;
 	bool notify = false;
 	blk_status_t status;
@@ -1049,7 +1049,6 @@
 	.feature_table_size		= ARRAY_SIZE(features),
 	.feature_table_legacy		= features_legacy,
 	.feature_table_size_legacy	= ARRAY_SIZE(features_legacy),
-	.suppress_used_validation	= true,
 	.driver.name			= KBUILD_MODNAME,
 	.driver.owner			= THIS_MODULE,
 	.id_table			= id_table,
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 08d7953..2507112 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1853,12 +1853,14 @@
 	.owner = THIS_MODULE
 };
 
+#ifdef CONFIG_ZRAM_WRITEBACK
 static const struct block_device_operations zram_wb_devops = {
 	.open = zram_open,
 	.submit_bio = zram_submit_bio,
 	.swap_slot_free_notify = zram_slot_free_notify,
 	.owner = THIS_MODULE
 };
+#endif
 
 static DEVICE_ATTR_WO(compact);
 static DEVICE_ATTR_RW(disksize);
diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c
index 4c2f7d6..183d5cc 100644
--- a/drivers/bus/brcmstb_gisb.c
+++ b/drivers/bus/brcmstb_gisb.c
@@ -485,7 +485,7 @@
 	list_add_tail(&gdev->next, &brcmstb_gisb_arb_device_list);
 
 #ifdef CONFIG_MIPS
-	board_be_handler = brcmstb_bus_error_handler;
+	mips_set_be_handler(brcmstb_bus_error_handler);
 #endif
 
 	if (list_is_singular(&brcmstb_gisb_arb_device_list)) {
diff --git a/drivers/clk/actions/owl-factor.c b/drivers/clk/actions/owl-factor.c
index f15e262..64f316c 100644
--- a/drivers/clk/actions/owl-factor.c
+++ b/drivers/clk/actions/owl-factor.c
@@ -10,7 +10,6 @@
 
 #include <linux/clk-provider.h>
 #include <linux/regmap.h>
-#include <linux/slab.h>
 
 #include "owl-factor.h"
 
diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c
index bc3be5f..24dab23 100644
--- a/drivers/clk/clk-ast2600.c
+++ b/drivers/clk/clk-ast2600.c
@@ -51,6 +51,8 @@
 static struct clk_hw_onecell_data *aspeed_g6_clk_data;
 
 static void __iomem *scu_g6_base;
+/* AST2600 revision: A0, A1, A2, etc */
+static u8 soc_rev;
 
 /*
  * Clocks marked with CLK_IS_CRITICAL:
@@ -191,9 +193,8 @@
 static struct clk_hw *ast2600_calc_apll(const char *name, u32 val)
 {
 	unsigned int mult, div;
-	u32 chip_id = readl(scu_g6_base + ASPEED_G6_SILICON_REV);
 
-	if (((chip_id & CHIP_REVISION_ID) >> 16) >= 2) {
+	if (soc_rev >= 2) {
 		if (val & BIT(24)) {
 			/* Pass through mode */
 			mult = div = 1;
@@ -707,7 +708,7 @@
 static void __init aspeed_g6_cc(struct regmap *map)
 {
 	struct clk_hw *hw;
-	u32 val, div, divbits, chip_id, axi_div, ahb_div;
+	u32 val, div, divbits, axi_div, ahb_div;
 
 	clk_hw_register_fixed_rate(NULL, "clkin", NULL, 0, 25000000);
 
@@ -738,8 +739,7 @@
 		axi_div = 2;
 
 	divbits = (val >> 11) & 0x3;
-	regmap_read(map, ASPEED_G6_SILICON_REV, &chip_id);
-	if (chip_id & BIT(16)) {
+	if (soc_rev >= 1) {
 		if (!divbits) {
 			ahb_div = ast2600_a1_axi_ahb200_tbl[(val >> 8) & 0x3];
 			if (val & BIT(16))
@@ -784,6 +784,8 @@
 	if (!scu_g6_base)
 		return;
 
+	soc_rev = (readl(scu_g6_base + ASPEED_G6_SILICON_REV) & CHIP_REVISION_ID) >> 16;
+
 	aspeed_g6_clk_data = kzalloc(struct_size(aspeed_g6_clk_data, hws,
 				      ASPEED_G6_NUM_CLKS), GFP_KERNEL);
 	if (!aspeed_g6_clk_data)
diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index c04ae0e..b9c5f90 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -97,6 +97,7 @@
 				return ret;
 
 			req->rate = tmp_req.rate;
+			req->best_parent_hw = tmp_req.best_parent_hw;
 			req->best_parent_rate = tmp_req.best_parent_rate;
 
 			return 0;
diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c
index 57e4597..93fa8c9 100644
--- a/drivers/clk/clk-si5351.c
+++ b/drivers/clk/clk-si5351.c
@@ -1,15 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * clk-si5351.c: Silicon Laboratories Si5351A/B/C I2C Clock Generator
+ * clk-si5351.c: Skyworks / Silicon Labs Si5351A/B/C I2C Clock Generator
  *
  * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  * Rabeeh Khoury <rabeeh@solid-run.com>
  *
  * References:
  * [1] "Si5351A/B/C Data Sheet"
- *     https://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf
- * [2] "Manually Generating an Si5351 Register Map"
- *     https://www.silabs.com/Support%20Documents/TechnicalDocs/AN619.pdf
+ *     https://www.skyworksinc.com/-/media/Skyworks/SL/documents/public/data-sheets/Si5351-B.pdf
+ * [2] "AN619: Manually Generating an Si5351 Register Map"
+ *     https://www.skyworksinc.com/-/media/Skyworks/SL/documents/public/application-notes/AN619.pdf
  */
 
 #include <linux/module.h>
diff --git a/drivers/clk/clk-si5351.h b/drivers/clk/clk-si5351.h
index 73dc8ef..e9e2bfd 100644
--- a/drivers/clk/clk-si5351.h
+++ b/drivers/clk/clk-si5351.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
- * clk-si5351.h: Silicon Laboratories Si5351A/B/C I2C Clock Generator
+ * clk-si5351.h: Skyworks / Silicon Labs Si5351A/B/C I2C Clock Generator
  *
  * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  * Rabeeh Khoury <rabeeh@solid-run.com>
diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c
index c6d3b1a..e7be3e5 100644
--- a/drivers/clk/clk-versaclock5.c
+++ b/drivers/clk/clk-versaclock5.c
@@ -905,7 +905,7 @@
 
 static const struct of_device_id clk_vc5_of_match[];
 
-static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int vc5_probe(struct i2c_client *client)
 {
 	unsigned int oe, sd, src_mask = 0, src_val = 0;
 	struct vc5_driver_data *vc5;
@@ -1244,7 +1244,7 @@
 		.pm	= &vc5_pm_ops,
 		.of_match_table = clk_vc5_of_match,
 	},
-	.probe		= vc5_probe,
+	.probe_new	= vc5_probe,
 	.remove		= vc5_remove,
 	.id_table	= vc5_id,
 };
diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h
index 8199499..7d220a0 100644
--- a/drivers/clk/imx/clk.h
+++ b/drivers/clk/imx/clk.h
@@ -391,11 +391,11 @@
 
 #define imx8m_clk_hw_composite(name, parent_names, reg) \
 	_imx8m_clk_hw_composite(name, parent_names, reg, \
-			IMX_COMPOSITE_CORE, IMX_COMPOSITE_CLK_FLAGS_DEFAULT)
+			0, IMX_COMPOSITE_CLK_FLAGS_DEFAULT)
 
 #define imx8m_clk_hw_composite_critical(name, parent_names, reg) \
 	_imx8m_clk_hw_composite(name, parent_names, reg, \
-			IMX_COMPOSITE_CORE, IMX_COMPOSITE_CLK_FLAGS_CRITICAL)
+			0, IMX_COMPOSITE_CLK_FLAGS_CRITICAL)
 
 #define imx8m_clk_hw_composite_bus(name, parent_names, reg)	\
 	_imx8m_clk_hw_composite(name, parent_names, reg, \
diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c
index 266c759..af31633 100644
--- a/drivers/clk/ingenic/cgu.c
+++ b/drivers/clk/ingenic/cgu.c
@@ -453,15 +453,15 @@
 	}
 
 	/* Impose hardware constraints */
-	div = min_t(unsigned, div, 1 << clk_info->div.bits);
-	div = max_t(unsigned, div, 1);
+	div = clamp_t(unsigned int, div, clk_info->div.div,
+		      clk_info->div.div << clk_info->div.bits);
 
 	/*
 	 * If the divider value itself must be divided before being written to
 	 * the divider register, we must ensure we don't have any bits set that
 	 * would be lost as a result of doing so.
 	 */
-	div /= clk_info->div.div;
+	div = DIV_ROUND_UP(div, clk_info->div.div);
 	div *= clk_info->div.div;
 
 	return div;
diff --git a/drivers/clk/ingenic/jz4725b-cgu.c b/drivers/clk/ingenic/jz4725b-cgu.c
index 5154b0c..744d136 100644
--- a/drivers/clk/ingenic/jz4725b-cgu.c
+++ b/drivers/clk/ingenic/jz4725b-cgu.c
@@ -10,7 +10,7 @@
 #include <linux/delay.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4725b-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
diff --git a/drivers/clk/ingenic/jz4740-cgu.c b/drivers/clk/ingenic/jz4740-cgu.c
index cd878f0..43ffb62 100644
--- a/drivers/clk/ingenic/jz4740-cgu.c
+++ b/drivers/clk/ingenic/jz4740-cgu.c
@@ -11,7 +11,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4740-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4740-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
diff --git a/drivers/clk/ingenic/jz4760-cgu.c b/drivers/clk/ingenic/jz4760-cgu.c
index 1448379..080d492 100644
--- a/drivers/clk/ingenic/jz4760-cgu.c
+++ b/drivers/clk/ingenic/jz4760-cgu.c
@@ -12,7 +12,7 @@
 
 #include <linux/clk.h>
 
-#include <dt-bindings/clock/jz4760-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4760-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
diff --git a/drivers/clk/ingenic/jz4770-cgu.c b/drivers/clk/ingenic/jz4770-cgu.c
index 2321742..8c6c120 100644
--- a/drivers/clk/ingenic/jz4770-cgu.c
+++ b/drivers/clk/ingenic/jz4770-cgu.c
@@ -10,7 +10,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4770-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4770-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
diff --git a/drivers/clk/ingenic/jz4780-cgu.c b/drivers/clk/ingenic/jz4780-cgu.c
index 0268d23..e357c22 100644
--- a/drivers/clk/ingenic/jz4780-cgu.c
+++ b/drivers/clk/ingenic/jz4780-cgu.c
@@ -13,7 +13,7 @@
 #include <linux/iopoll.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
diff --git a/drivers/clk/ingenic/x1000-cgu.c b/drivers/clk/ingenic/x1000-cgu.c
index 9aa20b5..3c4d5a7 100644
--- a/drivers/clk/ingenic/x1000-cgu.c
+++ b/drivers/clk/ingenic/x1000-cgu.c
@@ -9,7 +9,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/x1000-cgu.h>
+#include <dt-bindings/clock/ingenic,x1000-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
diff --git a/drivers/clk/ingenic/x1830-cgu.c b/drivers/clk/ingenic/x1830-cgu.c
index 950aee2..e01ec2d 100644
--- a/drivers/clk/ingenic/x1830-cgu.c
+++ b/drivers/clk/ingenic/x1830-cgu.c
@@ -9,7 +9,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/x1830-cgu.h>
+#include <dt-bindings/clock/ingenic,x1830-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
diff --git a/drivers/clk/mediatek/clk-mt8195-imp_iic_wrap.c b/drivers/clk/mediatek/clk-mt8195-imp_iic_wrap.c
index 0e2ac0a..4ab312e 100644
--- a/drivers/clk/mediatek/clk-mt8195-imp_iic_wrap.c
+++ b/drivers/clk/mediatek/clk-mt8195-imp_iic_wrap.c
@@ -10,8 +10,6 @@
 #include <linux/clk-provider.h>
 #include <linux/platform_device.h>
 
-#include <dt-bindings/clock/mt8195-clk.h>
-
 static const struct mtk_gate_regs imp_iic_wrap_cg_regs = {
 	.set_ofs = 0xe08,
 	.clr_ofs = 0xe04,
diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c
index 3c3a7ff..9b1674b 100644
--- a/drivers/clk/qcom/gcc-msm8996.c
+++ b/drivers/clk/qcom/gcc-msm8996.c
@@ -2937,20 +2937,6 @@
 	},
 };
 
-static struct clk_branch gcc_aggre1_pnoc_ahb_clk = {
-	.halt_reg = 0x82014,
-	.clkr = {
-		.enable_reg = 0x82014,
-		.enable_mask = BIT(0),
-		.hw.init = &(struct clk_init_data){
-			.name = "gcc_aggre1_pnoc_ahb_clk",
-			.parent_names = (const char *[]){ "periph_noc_clk_src" },
-			.num_parents = 1,
-			.ops = &clk_branch2_ops,
-		},
-	},
-};
-
 static struct clk_branch gcc_aggre2_ufs_axi_clk = {
 	.halt_reg = 0x83014,
 	.clkr = {
@@ -3474,7 +3460,6 @@
 	[GCC_AGGRE0_CNOC_AHB_CLK] = &gcc_aggre0_cnoc_ahb_clk.clkr,
 	[GCC_SMMU_AGGRE0_AXI_CLK] = &gcc_smmu_aggre0_axi_clk.clkr,
 	[GCC_SMMU_AGGRE0_AHB_CLK] = &gcc_smmu_aggre0_ahb_clk.clkr,
-	[GCC_AGGRE1_PNOC_AHB_CLK] = &gcc_aggre1_pnoc_ahb_clk.clkr,
 	[GCC_AGGRE2_UFS_AXI_CLK] = &gcc_aggre2_ufs_axi_clk.clkr,
 	[GCC_AGGRE2_USB3_AXI_CLK] = &gcc_aggre2_usb3_axi_clk.clkr,
 	[GCC_QSPI_AHB_CLK] = &gcc_qspi_ahb_clk.clkr,
diff --git a/drivers/clk/rockchip/Kconfig b/drivers/clk/rockchip/Kconfig
index 2dfd6a3..3067bdb 100644
--- a/drivers/clk/rockchip/Kconfig
+++ b/drivers/clk/rockchip/Kconfig
@@ -80,14 +80,14 @@
 	  Build the driver for RK3368 Clock Driver.
 
 config CLK_RK3399
-	tristate "Rockchip RK3399 clock controller support"
+	bool "Rockchip RK3399 clock controller support"
 	depends on ARM64 || COMPILE_TEST
 	default y
 	help
 	  Build the driver for RK3399 Clock Driver.
 
 config CLK_RK3568
-	tristate "Rockchip RK3568 clock controller support"
+	bool "Rockchip RK3568 clock controller support"
 	depends on ARM64 || COMPILE_TEST
 	default y
 	help
diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 7924598..306910a 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1630,7 +1630,6 @@
 	},
 	{ }
 };
-MODULE_DEVICE_TABLE(of, clk_rk3399_match_table);
 
 static int __init clk_rk3399_probe(struct platform_device *pdev)
 {
@@ -1656,7 +1655,4 @@
 		.suppress_bind_attrs = true,
 	},
 };
-module_platform_driver_probe(clk_rk3399_driver, clk_rk3399_probe);
-
-MODULE_DESCRIPTION("Rockchip RK3399 Clock Driver");
-MODULE_LICENSE("GPL");
+builtin_platform_driver_probe(clk_rk3399_driver, clk_rk3399_probe);
diff --git a/drivers/clk/rockchip/clk-rk3568.c b/drivers/clk/rockchip/clk-rk3568.c
index 939e707..69a9e80 100644
--- a/drivers/clk/rockchip/clk-rk3568.c
+++ b/drivers/clk/rockchip/clk-rk3568.c
@@ -1693,7 +1693,6 @@
 	},
 	{ }
 };
-MODULE_DEVICE_TABLE(of, clk_rk3568_match_table);
 
 static int __init clk_rk3568_probe(struct platform_device *pdev)
 {
@@ -1719,7 +1718,4 @@
 		.suppress_bind_attrs = true,
 	},
 };
-module_platform_driver_probe(clk_rk3568_driver, clk_rk3568_probe);
-
-MODULE_DESCRIPTION("Rockchip RK3568 Clock Driver");
-MODULE_LICENSE("GPL");
+builtin_platform_driver_probe(clk_rk3568_driver, clk_rk3568_probe);
diff --git a/drivers/clk/uniphier/clk-uniphier-core.c b/drivers/clk/uniphier/clk-uniphier-core.c
index 1238023..46c66fa 100644
--- a/drivers/clk/uniphier/clk-uniphier-core.c
+++ b/drivers/clk/uniphier/clk-uniphier-core.c
@@ -132,6 +132,10 @@
 		.compatible = "socionext,uniphier-pxs3-clock",
 		.data = uniphier_pxs3_sys_clk_data,
 	},
+	{
+		.compatible = "socionext,uniphier-nx1-clock",
+		.data = uniphier_nx1_sys_clk_data,
+	},
 	/* Media I/O clock, SD clock */
 	{
 		.compatible = "socionext,uniphier-ld4-mio-clock",
@@ -165,6 +169,10 @@
 		.compatible = "socionext,uniphier-pxs3-sd-clock",
 		.data = uniphier_pro5_sd_clk_data,
 	},
+	{
+		.compatible = "socionext,uniphier-nx1-sd-clock",
+		.data = uniphier_pro5_sd_clk_data,
+	},
 	/* Peripheral clock */
 	{
 		.compatible = "socionext,uniphier-ld4-peri-clock",
@@ -198,6 +206,15 @@
 		.compatible = "socionext,uniphier-pxs3-peri-clock",
 		.data = uniphier_pro4_peri_clk_data,
 	},
+	{
+		.compatible = "socionext,uniphier-nx1-peri-clock",
+		.data = uniphier_pro4_peri_clk_data,
+	},
+	/* SoC-glue clock */
+	{
+		.compatible = "socionext,uniphier-pro4-sg-clock",
+		.data = uniphier_pro4_sg_clk_data,
+	},
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/clk/uniphier/clk-uniphier-sys.c b/drivers/clk/uniphier/clk-uniphier-sys.c
index 32b3017..0180470 100644
--- a/drivers/clk/uniphier/clk-uniphier-sys.c
+++ b/drivers/clk/uniphier/clk-uniphier-sys.c
@@ -20,6 +20,10 @@
 	UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 10),		\
 	UNIPHIER_CLK_FACTOR("sd-133m", -1, "spll", 1, 15)
 
+#define UNIPHIER_NX1_SYS_CLK_SD						\
+	UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 4),		\
+	UNIPHIER_CLK_FACTOR("sd-133m", -1, "spll", 1, 6)
+
 #define UNIPHIER_LD4_SYS_CLK_NAND(idx)					\
 	UNIPHIER_CLK_FACTOR("nand-50m", -1, "spll", 1, 32),		\
 	UNIPHIER_CLK_GATE("nand", (idx), "nand-50m", 0x2104, 2)
@@ -288,6 +292,8 @@
 	UNIPHIER_CLK_GATE("sata0", 28, NULL, 0x210c, 7),
 	UNIPHIER_CLK_GATE("sata1", 29, NULL, 0x210c, 8),
 	UNIPHIER_CLK_GATE("sata-phy", 30, NULL, 0x210c, 21),
+	UNIPHIER_LD11_SYS_CLK_AIO(40),
+	UNIPHIER_LD11_SYS_CLK_EXIV(42),
 	/* CPU gears */
 	UNIPHIER_CLK_DIV4("cpll", 2, 3, 4, 8),
 	UNIPHIER_CLK_DIV4("spll", 2, 3, 4, 8),
@@ -300,3 +306,44 @@
 			     "spll/4", "spll/8", "s2pll/4", "s2pll/8"),
 	{ /* sentinel */ }
 };
+
+const struct uniphier_clk_data uniphier_nx1_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("cpll", -1, "ref", 100, 1),		/* ARM: 2500 MHz */
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 32, 1),		/* 800 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 6),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+	UNIPHIER_NX1_SYS_CLK_SD,
+	UNIPHIER_CLK_GATE("emmc", 4, NULL, 0x2108, 8),
+	UNIPHIER_CLK_GATE("ether", 6, NULL, 0x210c, 0),
+	UNIPHIER_CLK_GATE("usb30-0", 12, NULL, 0x210c, 16),	/* =GIO */
+	UNIPHIER_CLK_GATE("usb30-1", 13, NULL, 0x210c, 20),	/* =GIO1P */
+	UNIPHIER_CLK_GATE("usb30-hsphy0", 16, NULL, 0x210c, 24),
+	UNIPHIER_CLK_GATE("usb30-ssphy0", 17, NULL, 0x210c, 25),
+	UNIPHIER_CLK_GATE("usb30-ssphy1", 18, NULL, 0x210c, 26),
+	UNIPHIER_CLK_GATE("pcie", 24, NULL, 0x210c, 8),
+	UNIPHIER_CLK_GATE("voc", 52, NULL, 0x2110, 0),
+	UNIPHIER_CLK_GATE("hdmitx", 58, NULL, 0x2110, 8),
+	/* CPU gears */
+	UNIPHIER_CLK_DIV5("cpll", 2, 4, 8, 16, 32),
+	UNIPHIER_CLK_CPUGEAR("cpu-ca53", 33, 0x8080, 0xf, 5,
+			     "cpll/2", "cpll/4", "cpll/8", "cpll/16",
+			     "cpll/32"),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_pro4_sg_clk_data[] = {
+	UNIPHIER_CLK_DIV("gpll", 4),
+	{
+		.name = "sata-ref",
+		.type = UNIPHIER_CLK_TYPE_MUX,
+		.idx = 0,
+		.data.mux = {
+			.parent_names = { "gpll/4", "ref", },
+			.num_parents = 2,
+			.reg = 0x1a28,
+			.masks = { 0x1, 0x1, },
+			.vals  = { 0x0, 0x1, },
+		},
+	},
+	{ /* sentinel */ }
+};
diff --git a/drivers/clk/uniphier/clk-uniphier.h b/drivers/clk/uniphier/clk-uniphier.h
index 9e30362..dea0c78 100644
--- a/drivers/clk/uniphier/clk-uniphier.h
+++ b/drivers/clk/uniphier/clk-uniphier.h
@@ -119,6 +119,10 @@
 	UNIPHIER_CLK_DIV2(parent, div0, div1),			\
 	UNIPHIER_CLK_DIV2(parent, div2, div3)
 
+#define UNIPHIER_CLK_DIV5(parent, div0, div1, div2, div3, div4)	\
+	UNIPHIER_CLK_DIV4(parent, div0, div1, div2, div3),	\
+	UNIPHIER_CLK_DIV(parent, div4)
+
 struct clk_hw *uniphier_clk_register_cpugear(struct device *dev,
 					     struct regmap *regmap,
 					     const char *name,
@@ -146,9 +150,11 @@
 extern const struct uniphier_clk_data uniphier_ld11_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld20_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_pxs3_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_nx1_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld4_mio_clk_data[];
 extern const struct uniphier_clk_data uniphier_pro5_sd_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld4_peri_clk_data[];
 extern const struct uniphier_clk_data uniphier_pro4_peri_clk_data[];
+extern const struct uniphier_clk_data uniphier_pro4_sg_clk_data[];
 
 #endif /* __CLK_UNIPHIER_H__ */
diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c
index 77fd0ec..d52f976 100644
--- a/drivers/clk/versatile/clk-icst.c
+++ b/drivers/clk/versatile/clk-icst.c
@@ -484,7 +484,7 @@
 	struct device_node *parent;
 	struct regmap *map;
 	struct clk_icst_desc icst_desc;
-	const char *name = np->name;
+	const char *name;
 	const char *parent_name;
 	struct clk *regclk;
 	enum icst_control_type ctype;
@@ -533,15 +533,17 @@
 		icst_desc.params = &icst525_apcp_cm_params;
 		ctype = ICST_INTEGRATOR_CP_CM_MEM;
 	} else {
-		pr_err("unknown ICST clock %s\n", name);
+		pr_err("unknown ICST clock %pOF\n", np);
 		return;
 	}
 
 	/* Parent clock name is not the same as node parent */
 	parent_name = of_clk_get_parent_name(np, 0);
+	name = kasprintf(GFP_KERNEL, "%pOFP", np);
 
 	regclk = icst_clk_setup(NULL, &icst_desc, name, parent_name, map, ctype);
 	if (IS_ERR(regclk)) {
+		kfree(name);
 		pr_err("error setting up syscon ICST clock %s\n", name);
 		return;
 	}
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 815df3d..dec2a56 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -338,6 +338,8 @@
 
 static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
 
+#define CPPC_MAX_PERF	U8_MAX
+
 static void intel_pstate_set_itmt_prio(int cpu)
 {
 	struct cppc_perf_caps cppc_perf;
@@ -349,6 +351,14 @@
 		return;
 
 	/*
+	 * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
+	 * In this case we can't use CPPC.highest_perf to enable ITMT.
+	 * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+	 */
+	if (cppc_perf.highest_perf == CPPC_MAX_PERF)
+		cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
+
+	/*
 	 * The priorities can be set regardless of whether or not
 	 * sched_set_itmt_support(true) has been called and it is valid to
 	 * update them at any time after it has been called.
@@ -1006,6 +1016,12 @@
 		 */
 		value &= ~GENMASK_ULL(31, 24);
 		value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
+		/*
+		 * However, make sure that EPP will be set to "performance" when
+		 * the CPU is brought back online again and the "performance"
+		 * scaling algorithm is still in effect.
+		 */
+		cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN;
 	}
 
 	/*
@@ -2353,6 +2369,7 @@
 	X86_MATCH(BROADWELL_D,		core_funcs),
 	X86_MATCH(BROADWELL_X,		core_funcs),
 	X86_MATCH(SKYLAKE_X,		core_funcs),
+	X86_MATCH(ICELAKE_X,		core_funcs),
 	{}
 };
 
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 2ea59cb..6437b2e 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -67,12 +67,9 @@
 	BUG_ON(dmabuf->vmapping_counter);
 
 	/*
-	 * Any fences that a dma-buf poll can wait on should be signaled
-	 * before releasing dma-buf. This is the responsibility of each
-	 * driver that uses the reservation objects.
-	 *
-	 * If you hit this BUG() it means someone dropped their ref to the
-	 * dma-buf while still having pending operation to the buffer.
+	 * If you hit this BUG() it could mean:
+	 * * There's a file reference imbalance in dma_buf_poll / dma_buf_poll_cb or somewhere else
+	 * * dmabuf->cb_in/out.active are non-0 despite no pending fence callback
 	 */
 	BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
 
@@ -200,6 +197,7 @@
 static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb;
+	struct dma_buf *dmabuf = container_of(dcb->poll, struct dma_buf, poll);
 	unsigned long flags;
 
 	spin_lock_irqsave(&dcb->poll->lock, flags);
@@ -207,21 +205,18 @@
 	dcb->active = 0;
 	spin_unlock_irqrestore(&dcb->poll->lock, flags);
 	dma_fence_put(fence);
+	/* Paired with get_file in dma_buf_poll */
+	fput(dmabuf->file);
 }
 
-static bool dma_buf_poll_shared(struct dma_resv *resv,
+static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write,
 				struct dma_buf_poll_cb_t *dcb)
 {
-	struct dma_resv_list *fobj = dma_resv_shared_list(resv);
+	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
-	int i, r;
+	int r;
 
-	if (!fobj)
-		return false;
-
-	for (i = 0; i < fobj->shared_count; ++i) {
-		fence = rcu_dereference_protected(fobj->shared[i],
-						  dma_resv_held(resv));
+	dma_resv_for_each_fence(&cursor, resv, write, fence) {
 		dma_fence_get(fence);
 		r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
 		if (!r)
@@ -232,24 +227,6 @@
 	return false;
 }
 
-static bool dma_buf_poll_excl(struct dma_resv *resv,
-			      struct dma_buf_poll_cb_t *dcb)
-{
-	struct dma_fence *fence = dma_resv_excl_fence(resv);
-	int r;
-
-	if (!fence)
-		return false;
-
-	dma_fence_get(fence);
-	r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
-	if (!r)
-		return true;
-	dma_fence_put(fence);
-
-	return false;
-}
-
 static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
 {
 	struct dma_buf *dmabuf;
@@ -282,8 +259,10 @@
 		spin_unlock_irq(&dmabuf->poll.lock);
 
 		if (events & EPOLLOUT) {
-			if (!dma_buf_poll_shared(resv, dcb) &&
-			    !dma_buf_poll_excl(resv, dcb))
+			/* Paired with fput in dma_buf_poll_cb */
+			get_file(dmabuf->file);
+
+			if (!dma_buf_poll_add_cb(resv, true, dcb))
 				/* No callback queued, wake up any other waiters */
 				dma_buf_poll_cb(NULL, &dcb->cb);
 			else
@@ -303,7 +282,10 @@
 		spin_unlock_irq(&dmabuf->poll.lock);
 
 		if (events & EPOLLIN) {
-			if (!dma_buf_poll_excl(resv, dcb))
+			/* Paired with fput in dma_buf_poll_cb */
+			get_file(dmabuf->file);
+
+			if (!dma_buf_poll_add_cb(resv, false, dcb))
 				/* No callback queued, wake up any other waiters */
 				dma_buf_poll_cb(NULL, &dcb->cb);
 			else
@@ -1356,10 +1338,9 @@
 {
 	struct dma_buf *buf_obj;
 	struct dma_buf_attachment *attach_obj;
-	struct dma_resv *robj;
-	struct dma_resv_list *fobj;
+	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
-	int count = 0, attach_count, shared_count, i;
+	int count = 0, attach_count;
 	size_t size = 0;
 	int ret;
 
@@ -1378,6 +1359,8 @@
 		if (ret)
 			goto error_unlock;
 
+
+		spin_lock(&buf_obj->name_lock);
 		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n",
 				buf_obj->size,
 				buf_obj->file->f_flags, buf_obj->file->f_mode,
@@ -1385,22 +1368,12 @@
 				buf_obj->exp_name,
 				file_inode(buf_obj->file)->i_ino,
 				buf_obj->name ?: "");
+		spin_unlock(&buf_obj->name_lock);
 
-		robj = buf_obj->resv;
-		fence = dma_resv_excl_fence(robj);
-		if (fence)
-			seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n",
-				   fence->ops->get_driver_name(fence),
-				   fence->ops->get_timeline_name(fence),
-				   dma_fence_is_signaled(fence) ? "" : "un");
-
-		fobj = rcu_dereference_protected(robj->fence,
-						 dma_resv_held(robj));
-		shared_count = fobj ? fobj->shared_count : 0;
-		for (i = 0; i < shared_count; i++) {
-			fence = rcu_dereference_protected(fobj->shared[i],
-							  dma_resv_held(robj));
-			seq_printf(s, "\tShared fence: %s %s %ssignalled\n",
+		dma_resv_for_each_fence(&cursor, buf_obj->resv, true, fence) {
+			seq_printf(s, "\t%s fence: %s %s %ssignalled\n",
+				   dma_resv_iter_is_exclusive(&cursor) ?
+					"Exclusive" : "Shared",
 				   fence->ops->get_driver_name(fence),
 				   fence->ops->get_timeline_name(fence),
 				   dma_fence_is_signaled(fence) ? "" : "un");
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index a480af9..9eb2baa 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -333,10 +333,14 @@
 {
 	cursor->seq = read_seqcount_begin(&cursor->obj->seq);
 	cursor->index = -1;
-	if (cursor->all_fences)
+	cursor->shared_count = 0;
+	if (cursor->all_fences) {
 		cursor->fences = dma_resv_shared_list(cursor->obj);
-	else
+		if (cursor->fences)
+			cursor->shared_count = cursor->fences->shared_count;
+	} else {
 		cursor->fences = NULL;
+	}
 	cursor->is_restarted = true;
 }
 
@@ -363,7 +367,7 @@
 				continue;
 
 		} else if (!cursor->fences ||
-			   cursor->index >= cursor->fences->shared_count) {
+			   cursor->index >= cursor->shared_count) {
 			cursor->fence = NULL;
 			break;
 
@@ -424,6 +428,57 @@
 EXPORT_SYMBOL(dma_resv_iter_next_unlocked);
 
 /**
+ * dma_resv_iter_first - first fence from a locked dma_resv object
+ * @cursor: cursor to record the current position
+ *
+ * Return the first fence in the dma_resv object while holding the
+ * &dma_resv.lock.
+ */
+struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor)
+{
+	struct dma_fence *fence;
+
+	dma_resv_assert_held(cursor->obj);
+
+	cursor->index = 0;
+	if (cursor->all_fences)
+		cursor->fences = dma_resv_shared_list(cursor->obj);
+	else
+		cursor->fences = NULL;
+
+	fence = dma_resv_excl_fence(cursor->obj);
+	if (!fence)
+		fence = dma_resv_iter_next(cursor);
+
+	cursor->is_restarted = true;
+	return fence;
+}
+EXPORT_SYMBOL_GPL(dma_resv_iter_first);
+
+/**
+ * dma_resv_iter_next - next fence from a locked dma_resv object
+ * @cursor: cursor to record the current position
+ *
+ * Return the next fences from the dma_resv object while holding the
+ * &dma_resv.lock.
+ */
+struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor)
+{
+	unsigned int idx;
+
+	dma_resv_assert_held(cursor->obj);
+
+	cursor->is_restarted = false;
+	if (!cursor->fences || cursor->index >= cursor->fences->shared_count)
+		return NULL;
+
+	idx = cursor->index++;
+	return rcu_dereference_protected(cursor->fences->shared[idx],
+					 dma_resv_held(cursor->obj));
+}
+EXPORT_SYMBOL_GPL(dma_resv_iter_next);
+
+/**
  * dma_resv_copy_fences - Copy all fences from src to dst.
  * @dst: the destination reservation object
  * @src: the source reservation object
@@ -448,10 +503,8 @@
 			dma_resv_list_free(list);
 			dma_fence_put(excl);
 
-			if (cursor.fences) {
-				unsigned int cnt = cursor.fences->shared_count;
-
-				list = dma_resv_list_alloc(cnt);
+			if (cursor.shared_count) {
+				list = dma_resv_list_alloc(cursor.shared_count);
 				if (!list) {
 					dma_resv_iter_end(&cursor);
 					return -ENOMEM;
@@ -522,7 +575,7 @@
 			if (fence_excl)
 				dma_fence_put(*fence_excl);
 
-			count = cursor.fences ? cursor.fences->shared_count : 0;
+			count = cursor.shared_count;
 			count += fence_excl ? 0 : 1;
 
 			/* Eventually re-allocate the array */
diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c
index de416f9..f521933 100644
--- a/drivers/firmware/arm_scmi/base.c
+++ b/drivers/firmware/arm_scmi/base.c
@@ -34,6 +34,12 @@
 	__le16 reserved;
 };
 
+struct scmi_msg_resp_base_discover_agent {
+	__le32 agent_id;
+	u8 name[SCMI_MAX_STR_SIZE];
+};
+
+
 struct scmi_msg_base_error_notify {
 	__le32 event_control;
 #define BASE_TP_NOTIFY_ALL	BIT(0)
@@ -225,18 +231,21 @@
 					int id, char *name)
 {
 	int ret;
+	struct scmi_msg_resp_base_discover_agent *agent_info;
 	struct scmi_xfer *t;
 
 	ret = ph->xops->xfer_get_init(ph, BASE_DISCOVER_AGENT,
-				      sizeof(__le32), SCMI_MAX_STR_SIZE, &t);
+				      sizeof(__le32), sizeof(*agent_info), &t);
 	if (ret)
 		return ret;
 
 	put_unaligned_le32(id, t->tx.buf);
 
 	ret = ph->xops->do_xfer(ph, t);
-	if (!ret)
-		strlcpy(name, t->rx.buf, SCMI_MAX_STR_SIZE);
+	if (!ret) {
+		agent_info = t->rx.buf;
+		strlcpy(name, agent_info->name, SCMI_MAX_STR_SIZE);
+	}
 
 	ph->xops->xfer_put(ph, t);
 
diff --git a/drivers/firmware/arm_scmi/scmi_pm_domain.c b/drivers/firmware/arm_scmi/scmi_pm_domain.c
index 4371fdc..581d34c 100644
--- a/drivers/firmware/arm_scmi/scmi_pm_domain.c
+++ b/drivers/firmware/arm_scmi/scmi_pm_domain.c
@@ -138,9 +138,7 @@
 	scmi_pd_data->domains = domains;
 	scmi_pd_data->num_domains = num_domains;
 
-	of_genpd_add_provider_onecell(np, scmi_pd_data);
-
-	return 0;
+	return of_genpd_add_provider_onecell(np, scmi_pd_data);
 }
 
 static const struct scmi_device_id scmi_id_table[] = {
diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c
index 3084715..cdbb287 100644
--- a/drivers/firmware/arm_scmi/sensors.c
+++ b/drivers/firmware/arm_scmi/sensors.c
@@ -637,7 +637,7 @@
 	if (ret)
 		return ret;
 
-	put_unaligned_le32(cpu_to_le32(sensor_id), t->tx.buf);
+	put_unaligned_le32(sensor_id, t->tx.buf);
 	ret = ph->xops->do_xfer(ph, t);
 	if (!ret) {
 		struct sensors_info *si = ph->get_priv(ph);
diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c
index 11e8efb..87039c5 100644
--- a/drivers/firmware/arm_scmi/virtio.c
+++ b/drivers/firmware/arm_scmi/virtio.c
@@ -82,7 +82,8 @@
 }
 
 static int scmi_vio_feed_vq_rx(struct scmi_vio_channel *vioch,
-			       struct scmi_vio_msg *msg)
+			       struct scmi_vio_msg *msg,
+			       struct device *dev)
 {
 	struct scatterlist sg_in;
 	int rc;
@@ -94,8 +95,7 @@
 
 	rc = virtqueue_add_inbuf(vioch->vqueue, &sg_in, 1, msg, GFP_ATOMIC);
 	if (rc)
-		dev_err_once(vioch->cinfo->dev,
-			     "failed to add to virtqueue (%d)\n", rc);
+		dev_err_once(dev, "failed to add to virtqueue (%d)\n", rc);
 	else
 		virtqueue_kick(vioch->vqueue);
 
@@ -108,7 +108,7 @@
 				  struct scmi_vio_msg *msg)
 {
 	if (vioch->is_rx) {
-		scmi_vio_feed_vq_rx(vioch, msg);
+		scmi_vio_feed_vq_rx(vioch, msg, vioch->cinfo->dev);
 	} else {
 		/* Here IRQs are assumed to be already disabled by the caller */
 		spin_lock(&vioch->lock);
@@ -269,7 +269,7 @@
 			list_add_tail(&msg->list, &vioch->free_list);
 			spin_unlock_irqrestore(&vioch->lock, flags);
 		} else {
-			scmi_vio_feed_vq_rx(vioch, msg);
+			scmi_vio_feed_vq_rx(vioch, msg, cinfo->dev);
 		}
 	}
 
diff --git a/drivers/firmware/arm_scmi/voltage.c b/drivers/firmware/arm_scmi/voltage.c
index a504895..ac08e81 100644
--- a/drivers/firmware/arm_scmi/voltage.c
+++ b/drivers/firmware/arm_scmi/voltage.c
@@ -156,7 +156,7 @@
 			int cnt;
 
 			cmd->domain_id = cpu_to_le32(v->id);
-			cmd->level_index = desc_index;
+			cmd->level_index = cpu_to_le32(desc_index);
 			ret = ph->xops->do_xfer(ph, tl);
 			if (ret)
 				break;
diff --git a/drivers/firmware/smccc/soc_id.c b/drivers/firmware/smccc/soc_id.c
index 581aa5e..dd7c3d5 100644
--- a/drivers/firmware/smccc/soc_id.c
+++ b/drivers/firmware/smccc/soc_id.c
@@ -50,7 +50,7 @@
 	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 			     ARM_SMCCC_ARCH_SOC_ID, &res);
 
-	if (res.a0 == SMCCC_RET_NOT_SUPPORTED) {
+	if ((int)res.a0 == SMCCC_RET_NOT_SUPPORTED) {
 		pr_info("ARCH_SOC_ID not implemented, skipping ....\n");
 		return 0;
 	}
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 072ed61..60d9374 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -523,6 +523,7 @@
 config GPIO_ROCKCHIP
 	tristate "Rockchip GPIO support"
 	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	select GENERIC_IRQ_CHIP
 	select GPIOLIB_IRQCHIP
 	default ARCH_ROCKCHIP
 	help
diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c
index aeec4bf..84f96b7 100644
--- a/drivers/gpio/gpio-virtio.c
+++ b/drivers/gpio/gpio-virtio.c
@@ -434,7 +434,7 @@
 		ret = generic_handle_domain_irq(vgpio->gc.irq.domain, gpio);
 		if (ret)
 			dev_err(dev, "failed to handle interrupt: %d\n", ret);
-	};
+	}
 }
 
 static void virtio_gpio_request_vq(struct virtqueue *vq)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 2a926d0d..0039df2 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -100,11 +100,25 @@
           This has the potential to use a lot of memory and print some very
           large kernel messages. If in doubt, say "N".
 
+config DRM_DEBUG_MODESET_LOCK
+	bool "Enable backtrace history for lock contention"
+	depends on STACKTRACE_SUPPORT
+	depends on DEBUG_KERNEL
+	depends on EXPERT
+	select STACKDEPOT
+	default y if DEBUG_WW_MUTEX_SLOWPATH
+	help
+	  Enable debug tracing of failures to gracefully handle drm modeset lock
+	  contention. A history of each drm modeset lock path hitting -EDEADLK
+	  will be saved until gracefully handled, and the backtrace will be
+	  printed when attempting to lock a contended lock.
+
+	  If in doubt, say "N".
+
 config DRM_FBDEV_EMULATION
 	bool "Enable legacy fbdev support for your modesetting driver"
-	depends on DRM
-	depends on FB=y || FB=DRM
-	select DRM_KMS_HELPER
+	depends on DRM_KMS_HELPER
+	depends on FB=y || FB=DRM_KMS_HELPER
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 751557a..a15a478 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -297,7 +297,7 @@
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
 static inline
@@ -312,7 +312,7 @@
 }
 
 static inline
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 }
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 0e9cfe9..71a6a9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -207,7 +207,7 @@
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	u32 domain = bo->preferred_domains;
@@ -219,6 +219,8 @@
 	}
 
 	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+
+	kfree(bo->kfd_bo);
 }
 
 
@@ -644,12 +646,6 @@
 	if (IS_ERR(gobj))
 		return PTR_ERR(gobj);
 
-	/* Import takes an extra reference on the dmabuf. Drop it now to
-	 * avoid leaking it. We only need the one reference in
-	 * kgd_mem->dmabuf.
-	 */
-	dma_buf_put(mem->dmabuf);
-
 	*bo = gem_to_amdgpu_bo(gobj);
 	(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
 	(*bo)->parent = amdgpu_bo_ref(mem->bo);
@@ -734,14 +730,19 @@
 		}
 
 		/* Add BO to VM internal data structures */
+		ret = amdgpu_bo_reserve(bo[i], false);
+		if (ret) {
+			pr_debug("Unable to reserve BO during memory attach");
+			goto unwind;
+		}
 		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+		amdgpu_bo_unreserve(bo[i]);
 		if (unlikely(!attachment[i]->bo_va)) {
 			ret = -ENOMEM;
 			pr_err("Failed to add BO object to VM. ret == %d\n",
 			       ret);
 			goto unwind;
 		}
-
 		attachment[i]->va = va;
 		attachment[i]->pte_flags = get_pte_flags(adev, mem);
 		attachment[i]->adev = adev;
@@ -757,7 +758,9 @@
 		if (!attachment[i])
 			continue;
 		if (attachment[i]->bo_va) {
+			amdgpu_bo_reserve(bo[i], true);
 			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
+			amdgpu_bo_unreserve(bo[i]);
 			list_del(&attachment[i]->list);
 		}
 		if (bo[i])
@@ -1568,12 +1571,12 @@
 	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
 		mem->va + bo_size * (1 + mem->aql_queue));
 
-	ret = unreserve_bo_and_vms(&ctx, false, false);
-
 	/* Remove from VM internal data structures */
 	list_for_each_entry_safe(entry, tmp, &mem->attachments, list)
 		kfd_mem_detach(entry);
 
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
 	/* Free the sync object */
 	amdgpu_sync_free(&mem->sync);
 
@@ -1600,9 +1603,13 @@
 	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
 	if (mem->dmabuf)
 		dma_buf_put(mem->dmabuf);
-	drm_gem_object_put(&mem->bo->tbo.base);
 	mutex_destroy(&mem->lock);
-	kfree(mem);
+
+	/* If this releases the last reference, it will end up calling
+	 * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+	 * this needs to be the last call here.
+	 */
+	drm_gem_object_put(&mem->bo->tbo.base);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 96b7bb1..12a6b1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1569,6 +1569,18 @@
 	WREG32(adev->bios_scratch_reg_offset + 3, tmp);
 }
 
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+						      u32 backlight_level)
+{
+	u32 tmp = RREG32(adev->bios_scratch_reg_offset + 2);
+
+	tmp &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+	tmp |= (backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) &
+		ATOM_S2_CURRENT_BL_LEVEL_MASK;
+
+	WREG32(adev->bios_scratch_reg_offset + 2, tmp);
+}
+
 bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev)
 {
 	u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 8cc0222..27e74b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -185,6 +185,8 @@
 void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
 void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
 					      bool hung);
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+						      u32 backlight_level);
 bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
 
 void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index b9c11c2..0de66f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -827,6 +827,7 @@
 
 	amdgpu_connector_get_edid(connector);
 	ret = amdgpu_connector_ddc_get_modes(connector);
+	amdgpu_get_native_mode(connector);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6e40cc1..d94fa74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2398,10 +2398,6 @@
 	if (!adev->gmc.xgmi.pending_reset)
 		amdgpu_amdkfd_device_init(adev);
 
-	r = amdgpu_amdkfd_resume_iommu(adev);
-	if (r)
-		goto init_failed;
-
 	amdgpu_fru_get_product_info(adev);
 
 init_failed:
@@ -3171,11 +3167,21 @@
 {
 	switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
-#if defined(CONFIG_DRM_AMD_DC_SI)
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
 	case CHIP_VERDE:
 	case CHIP_OLAND:
+		/*
+		 * We have systems in the wild with these ASICs that require
+		 * LVDS and VGA support which is not supported with DC.
+		 *
+		 * Fallback to the non-DC driver here by default so as not to
+		 * cause regressions.
+		 */
+#if defined(CONFIG_DRM_AMD_DC_SI)
+		return amdgpu_dc > 0;
+#else
+		return false;
 #endif
 	case CHIP_BONAIRE:
 	case CHIP_KAVERI:
@@ -3503,6 +3509,9 @@
 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
 	}
 
+	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
+		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
+
 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
 	if (adev->rmmio == NULL) {
 		return -ENOMEM;
@@ -4287,8 +4296,6 @@
 	if (r)
 		return r;
 
-	amdgpu_amdkfd_pre_reset(adev);
-
 	/* Resume IP prior to SMC */
 	r = amdgpu_device_ip_reinit_early_sriov(adev);
 	if (r)
@@ -4309,7 +4316,6 @@
 
 	amdgpu_irq_gpu_reset_resume_helper(adev);
 	r = amdgpu_ib_ring_tests(adev);
-	amdgpu_amdkfd_post_reset(adev);
 
 error:
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
@@ -4850,6 +4856,9 @@
 
 		/* clear job's guilty and depend the folowing step to decide the real one */
 		drm_sched_reset_karma(s_job);
+		/* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
+		 * to make sure fence is balanced */
+		dma_fence_get(s_job->s_fence->parent);
 		drm_sched_resubmit_jobs_ext(&ring->sched, 1);
 
 		ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
@@ -4885,6 +4894,7 @@
 
 		/* got the hw fence, signal finished fence */
 		atomic_dec(ring->sched.score);
+		dma_fence_put(s_job->s_fence->parent);
 		dma_fence_get(&s_job->s_fence->finished);
 		dma_fence_signal(&s_job->s_fence->finished);
 		dma_fence_put(&s_job->s_fence->finished);
@@ -5020,8 +5030,7 @@
 
 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
 
-		if (!amdgpu_sriov_vf(tmp_adev))
-			amdgpu_amdkfd_pre_reset(tmp_adev);
+		amdgpu_amdkfd_pre_reset(tmp_adev);
 
 		/*
 		 * Mark these ASICs to be reseted as untracked first
@@ -5079,7 +5088,7 @@
 
 	tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
 	/* Actual ASIC resets if needed.*/
-	/* TODO Implement XGMI hive reset logic for SRIOV */
+	/* Host driver will handle XGMI hive reset for SRIOV */
 	if (amdgpu_sriov_vf(adev)) {
 		r = amdgpu_device_reset_sriov(adev, job ? false : true);
 		if (r)
@@ -5139,8 +5148,8 @@
 
 skip_sched_resume:
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-		/* unlock kfd: SRIOV would do it separately */
-		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
+		/* unlock kfd */
+		if (!need_emergency_restart)
 	                amdgpu_amdkfd_post_reset(tmp_adev);
 
 		/* kfd_post_reset will do nothing if kfd device is not initialized,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index d7c8d9e..503995c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -248,8 +248,8 @@
 
 	offset = offsetof(struct binary_header, binary_checksum) +
 		sizeof(bhdr->binary_checksum);
-	size = bhdr->binary_size - offset;
-	checksum = bhdr->binary_checksum;
+	size = le16_to_cpu(bhdr->binary_size) - offset;
+	checksum = le16_to_cpu(bhdr->binary_checksum);
 
 	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
 					      size, checksum)) {
@@ -270,7 +270,7 @@
 	}
 
 	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
-					      ihdr->size, checksum)) {
+					      le16_to_cpu(ihdr->size), checksum)) {
 		DRM_ERROR("invalid ip discovery data table checksum\n");
 		r = -EINVAL;
 		goto out;
@@ -282,7 +282,7 @@
 	ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
 
 	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
-				              ghdr->size, checksum)) {
+				              le32_to_cpu(ghdr->size), checksum)) {
 		DRM_ERROR("invalid gc data table checksum\n");
 		r = -EINVAL;
 		goto out;
@@ -489,10 +489,10 @@
 			le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
 
 	for (i = 0; i < 32; i++) {
-		if (le32_to_cpu(harvest_info->list[i].hw_id) == 0)
+		if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
 			break;
 
-		switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
+		switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
 		case VCN_HWID:
 			vcn_harvest_count++;
 			if (harvest_info->list[i].number_instance == 0)
@@ -587,6 +587,9 @@
 		amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
 		break;
 	default:
+		dev_err(adev->dev,
+			"Failed to add common ip block(GC_HWIP:0x%x)\n",
+			adev->ip_versions[GC_HWIP][0]);
 		return -EINVAL;
 	}
 	return 0;
@@ -619,6 +622,9 @@
 		amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
 		break;
 	default:
+		dev_err(adev->dev,
+			"Failed to add gmc ip block(GC_HWIP:0x%x)\n",
+			adev->ip_versions[GC_HWIP][0]);
 		return -EINVAL;
 	}
 	return 0;
@@ -648,6 +654,9 @@
 		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
 		break;
 	default:
+		dev_err(adev->dev,
+			"Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
+			adev->ip_versions[OSSSYS_HWIP][0]);
 		return -EINVAL;
 	}
 	return 0;
@@ -688,6 +697,9 @@
 		amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
 		break;
 	default:
+		dev_err(adev->dev,
+			"Failed to add psp ip block(MP0_HWIP:0x%x)\n",
+			adev->ip_versions[MP0_HWIP][0]);
 		return -EINVAL;
 	}
 	return 0;
@@ -726,6 +738,9 @@
 		amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
 		break;
 	default:
+		dev_err(adev->dev,
+			"Failed to add smu ip block(MP1_HWIP:0x%x)\n",
+			adev->ip_versions[MP1_HWIP][0]);
 		return -EINVAL;
 	}
 	return 0;
@@ -753,6 +768,9 @@
 			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 			break;
 		default:
+			dev_err(adev->dev,
+				"Failed to add dm ip block(DCE_HWIP:0x%x)\n",
+				adev->ip_versions[DCE_HWIP][0]);
 			return -EINVAL;
 		}
 	} else if (adev->ip_versions[DCI_HWIP][0]) {
@@ -763,6 +781,9 @@
 			amdgpu_device_ip_block_add(adev, &dm_ip_block);
 			break;
 		default:
+			dev_err(adev->dev,
+				"Failed to add dm ip block(DCI_HWIP:0x%x)\n",
+				adev->ip_versions[DCI_HWIP][0]);
 			return -EINVAL;
 		}
 #endif
@@ -796,6 +817,9 @@
 		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
 		break;
 	default:
+		dev_err(adev->dev,
+			"Failed to add gfx ip block(GC_HWIP:0x%x)\n",
+			adev->ip_versions[GC_HWIP][0]);
 		return -EINVAL;
 	}
 	return 0;
@@ -829,6 +853,9 @@
 		amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
 		break;
 	default:
+		dev_err(adev->dev,
+			"Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
+			adev->ip_versions[SDMA0_HWIP][0]);
 		return -EINVAL;
 	}
 	return 0;
@@ -845,6 +872,9 @@
 				amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
 			break;
 		default:
+			dev_err(adev->dev,
+				"Failed to add uvd v7 ip block(UVD_HWIP:0x%x)\n",
+				adev->ip_versions[UVD_HWIP][0]);
 			return -EINVAL;
 		}
 		switch (adev->ip_versions[VCE_HWIP][0]) {
@@ -855,6 +885,9 @@
 				amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
 			break;
 		default:
+			dev_err(adev->dev,
+				"Failed to add VCE v4 ip block(VCE_HWIP:0x%x)\n",
+				adev->ip_versions[VCE_HWIP][0]);
 			return -EINVAL;
 		}
 	} else {
@@ -867,7 +900,8 @@
 		case IP_VERSION(2, 0, 2):
 		case IP_VERSION(2, 2, 0):
 			amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
-			amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
+			if (!amdgpu_sriov_vf(adev))
+				amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 			break;
 		case IP_VERSION(2, 0, 3):
 			break;
@@ -881,6 +915,7 @@
 			break;
 		case IP_VERSION(3, 0, 0):
 		case IP_VERSION(3, 0, 16):
+		case IP_VERSION(3, 0, 64):
 		case IP_VERSION(3, 1, 1):
 		case IP_VERSION(3, 0, 2):
 			amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
@@ -891,6 +926,9 @@
 			amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
 			break;
 		default:
+			dev_err(adev->dev,
+				"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
+				adev->ip_versions[UVD_HWIP][0]);
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a573424..a1e63ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -60,9 +60,10 @@
 			goto unlock;
 		}
 
-		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
-					       TTM_BO_VM_NUM_PREFAULT, 1);
-		drm_dev_exit(idx);
+		 ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+						TTM_BO_VM_NUM_PREFAULT);
+
+		 drm_dev_exit(idx);
 	} else {
 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index f3d62e1..0c7963dfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -223,7 +223,7 @@
  */
 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 {
-	unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
+	unsigned int count;
 	u32 wptr;
 
 	if (!ih->enabled || adev->shutdown)
@@ -232,6 +232,7 @@
 	wptr = amdgpu_ih_get_wptr(adev, ih);
 
 restart_ih:
+	count  = AMDGPU_IH_MAX_NUM_IVS;
 	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
 
 	/* Order reading of wptr vs. reading of IH ring data */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index dfe667e..651c7ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1423,6 +1423,8 @@
 	struct drm_amdgpu_info_firmware fw_info;
 	struct drm_amdgpu_query_fw query_fw;
 	struct atom_context *ctx = adev->mode_info.atom_context;
+	uint8_t smu_minor, smu_debug;
+	uint16_t smu_major;
 	int ret, i;
 
 	static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
@@ -1568,8 +1570,11 @@
 	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
 	if (ret)
 		return ret;
-	seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x\n",
-		   fw_info.feature, fw_info.ver);
+	smu_major = (fw_info.ver >> 16) & 0xffff;
+	smu_minor = (fw_info.ver >> 8) & 0xff;
+	smu_debug = (fw_info.ver >> 0) & 0xff;
+	seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x (%d.%d.%d)\n",
+		   fw_info.feature, fw_info.ver, smu_major, smu_minor, smu_debug);
 
 	/* SDMA */
 	query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index aeb92e5..4fcfc23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1274,7 +1274,7 @@
 	abo = ttm_to_amdgpu_bo(bo);
 
 	if (abo->kfd_bo)
-		amdgpu_amdkfd_unreserve_memory_limit(abo);
+		amdgpu_amdkfd_release_notify(abo);
 
 	/* We only remove the fence if the resv has individualized. */
 	WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 2658414..4f7c708 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -134,6 +134,7 @@
 			adev->vcn.indirect_sram = true;
 		break;
 	case IP_VERSION(3, 0, 0):
+	case IP_VERSION(3, 0, 64):
 		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
 			fw_name = FIRMWARE_SIENNA_CICHLID;
 		else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 978ac92..567df2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -386,6 +386,7 @@
 			"%s", "xgmi_hive_info");
 	if (ret) {
 		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
+		kobject_put(&hive->kobj);
 		kfree(hive);
 		hive = NULL;
 		goto pro_end;
@@ -806,9 +807,9 @@
 		for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++)
 			pcs_clear_status(adev,
 					 xgmi23_pcs_err_status_reg_aldebaran[i]);
-		for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++)
+		for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++)
 			pcs_clear_status(adev,
-					 xgmi23_pcs_err_status_reg_aldebaran[i]);
+					 xgmi3x16_pcs_err_status_reg_aldebaran[i]);
 		for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++)
 			pcs_clear_status(adev,
 					 walf_pcs_err_status_reg_aldebaran[i]);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 90a834d..dbe7442 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7707,8 +7707,19 @@
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(10, 3, 1):
 	case IP_VERSION(10, 3, 3):
-		clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) |
-			((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
+		preempt_disable();
+		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+		 * roughly every 42 seconds.
+		 */
+		if (hi_check != clock_hi) {
+			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+			clock_hi = hi_check;
+		}
+		preempt_enable();
+		clock = clock_lo | (clock_hi << 32ULL);
 		break;
 	default:
 		preempt_disable();
@@ -8249,6 +8260,9 @@
 static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
 {
 	u32 reg, data;
+
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	/* not for *_SOC15 */
 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
 	if (amdgpu_sriov_is_pp_one_vf(adev))
@@ -8263,6 +8277,8 @@
 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
 	else
 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+
+	amdgpu_gfx_off_ctrl(adev, true);
 }
 
 static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
@@ -8316,11 +8332,8 @@
 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
 		switch (adev->ip_versions[GC_HWIP][0]) {
 		case IP_VERSION(10, 3, 1):
-			data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
-			WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
-			break;
 		case IP_VERSION(10, 3, 3):
-			data = 0x1388 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
+			data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
 			WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
 			break;
 		default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 37b4a3d..d17a6f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3575,12 +3575,16 @@
 {
 	u32 data;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	data = RREG32(mmRLC_SPM_VMID);
 
 	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
 	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
 
 	WREG32(mmRLC_SPM_VMID, data);
+
+	amdgpu_gfx_off_ctrl(adev, true);
 }
 
 static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e0302c2..5f112ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5624,6 +5624,8 @@
 {
 	u32 data;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	if (amdgpu_sriov_is_pp_one_vf(adev))
 		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
 	else
@@ -5636,6 +5638,8 @@
 		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
 	else
 		WREG32(mmRLC_SPM_VMID, data);
+
+	amdgpu_gfx_off_ctrl(adev, true);
 }
 
 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 7f944bb..34478bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -140,6 +140,11 @@
 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
 
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
+
 enum ta_ras_gfx_subblock {
 	/*CPC*/
 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
@@ -2462,7 +2467,9 @@
 	amdgpu_gfx_kiq_fini(adev);
 
 	gfx_v9_0_mec_fini(adev);
-	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+				&adev->gfx.rlc.clear_state_gpu_addr,
+				(void **)&adev->gfx.rlc.cs_ptr);
 	if (adev->flags & AMD_IS_APU) {
 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
 				&adev->gfx.rlc.cp_table_gpu_addr,
@@ -4236,19 +4243,38 @@
 
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
-	uint64_t clock;
+	uint64_t clock, clock_lo, clock_hi, hi_check;
 
-	amdgpu_gfx_off_ctrl(adev, false);
-	mutex_lock(&adev->gfx.gpu_clock_mutex);
-	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
-		clock = gfx_v9_0_kiq_read_clock(adev);
-	} else {
-		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
-		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
-			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+	switch (adev->ip_versions[GC_HWIP][0]) {
+	case IP_VERSION(9, 3, 0):
+		preempt_disable();
+		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+		 * roughly every 42 seconds.
+		 */
+		if (hi_check != clock_hi) {
+			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+			clock_hi = hi_check;
+		}
+		preempt_enable();
+		clock = clock_lo | (clock_hi << 32ULL);
+		break;
+	default:
+		amdgpu_gfx_off_ctrl(adev, false);
+		mutex_lock(&adev->gfx.gpu_clock_mutex);
+		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
+			clock = gfx_v9_0_kiq_read_clock(adev);
+		} else {
+			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+		}
+		mutex_unlock(&adev->gfx.gpu_clock_mutex);
+		amdgpu_gfx_off_ctrl(adev, true);
+		break;
 	}
-	mutex_unlock(&adev->gfx.gpu_clock_mutex);
-	amdgpu_gfx_off_ctrl(adev, true);
 	return clock;
 }
 
@@ -5102,6 +5128,8 @@
 {
 	u32 reg, data;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
 	if (amdgpu_sriov_is_pp_one_vf(adev))
 		data = RREG32_NO_KIQ(reg);
@@ -5115,6 +5143,8 @@
 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
 	else
 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+
+	amdgpu_gfx_off_ctrl(adev, true);
 }
 
 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index bda1542..480e418 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -348,6 +348,10 @@
 		WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL,
 				    i * hub->ctx_distance, 0);
 
+	if (amdgpu_sriov_vf(adev))
+		/* Avoid write to GMC registers */
+		return;
+
 	/* Setup TLB control */
 	tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL);
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index 497b86c..90f0aef 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -54,15 +54,17 @@
 		seg_size = REG_GET_FIELD(
 			RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE_ALDE),
 			MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+		max_region =
+			REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL_ALDE, PF_MAX_REGION);
 	} else {
 		xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
 		seg_size = REG_GET_FIELD(
 			RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
 			MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+		max_region =
+			REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
 	}
 
-	max_region =
-		REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
 
 
 	switch (adev->asic_type) {
@@ -89,9 +91,15 @@
 		if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
 			return -EINVAL;
 
-		adev->gmc.xgmi.physical_node_id =
-		REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
-			      PF_LFB_REGION);
+		if (adev->asic_type == CHIP_ALDEBARAN) {
+			adev->gmc.xgmi.physical_node_id =
+				REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL_ALDE,
+						PF_LFB_REGION);
+		} else {
+			adev->gmc.xgmi.physical_node_id =
+				REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL,
+						PF_LFB_REGION);
+		}
 
 		if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 1d8414c..38241cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -160,6 +160,7 @@
 
 	tmp = RREG32(ih_regs->ih_rb_cntl);
 	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
 	/* enable_intr field is only valid in ring0 */
 	if (ih == &adev->irq.ih)
 		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
@@ -275,10 +276,8 @@
 	tmp = navi10_ih_rb_cntl(ih, tmp);
 	if (ih == &adev->irq.ih)
 		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
-	if (ih == &adev->irq.ih1) {
-		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+	if (ih == &adev->irq.ih1)
 		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
-	}
 
 	if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
 		if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
@@ -319,7 +318,6 @@
 {
 	struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
 	u32 ih_chicken;
-	u32 tmp;
 	int ret;
 	int i;
 
@@ -363,15 +361,6 @@
 	adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
 					    ih[0]->doorbell_index);
 
-	tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
-	tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
-			    CLIENT18_IS_STORM_CLIENT, 1);
-	WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp);
-
-	tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL);
-	tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
-	WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp);
-
 	pci_set_master(adev->pdev);
 
 	/* enable interrupts */
@@ -420,12 +409,19 @@
 	u32 wptr, tmp;
 	struct amdgpu_ih_regs *ih_regs;
 
-	wptr = le32_to_cpu(*ih->wptr_cpu);
+	if (ih == &adev->irq.ih) {
+		/* Only ring0 supports writeback. On other rings fall back
+		 * to register-based code with overflow checking below.
+		 */
+		wptr = le32_to_cpu(*ih->wptr_cpu);
+
+		if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+			goto out;
+	}
+
 	ih_regs = &ih->ih_regs;
 
-	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
-		goto out;
-
+	/* Double check that the overflow wasn't already cleared. */
 	wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
 	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
 		goto out;
@@ -513,15 +509,11 @@
 			      struct amdgpu_irq_src *source,
 			      struct amdgpu_iv_entry *entry)
 {
-	uint32_t wptr = cpu_to_le32(entry->src_data[0]);
-
 	switch (entry->ring_id) {
 	case 1:
-		*adev->irq.ih1.wptr_cpu = wptr;
 		schedule_work(&adev->irq.ih1_work);
 		break;
 	case 2:
-		*adev->irq.ih2.wptr_cpu = wptr;
 		schedule_work(&adev->irq.ih2_work);
 		break;
 	default: break;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 4ecd2b5..ee7cab3 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -359,6 +359,10 @@
 
 	if (def != data)
 		WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
+
+	if (amdgpu_sriov_vf(adev))
+		adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+			mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT		0x00000000 // off by default, no gains over L1
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 0d2d629..4bbacf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -276,6 +276,10 @@
 
 	if (def != data)
 		WREG32_PCIE(smnPCIE_CI_CNTL, data);
+
+	if (amdgpu_sriov_vf(adev))
+		adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+			mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 3c00666..37a4039 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -273,7 +273,9 @@
 
 static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
 {
-
+	if (amdgpu_sriov_vf(adev))
+		adev->rmmio_remap.reg_offset =
+			SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index 8f2a315..3444332 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -371,6 +371,10 @@
 		if (def != data)
 			WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data);
 	}
+
+	if (amdgpu_sriov_vf(adev))
+		adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+			regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index b8bd03d..dc5e937 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -362,7 +362,9 @@
 
 static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
 {
-
+	if (amdgpu_sriov_vf(adev))
+		adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+			mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
@@ -692,6 +694,9 @@
 {
 	uint32_t def, data;
 
+	if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
+		return;
+
 	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
 	data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
 	data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index febc903..a6659d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -182,6 +182,7 @@
 {
 	switch (adev->ip_versions[UVD_HWIP][0]) {
 	case IP_VERSION(3, 0, 0):
+	case IP_VERSION(3, 0, 64):
 		if (amdgpu_sriov_vf(adev)) {
 			if (encode)
 				*codecs = &sriov_sc_video_codecs_encode;
@@ -730,8 +731,10 @@
 #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
-	adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+	if (!amdgpu_sriov_vf(adev)) {
+		adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+		adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+	}
 	adev->smc_rreg = NULL;
 	adev->smc_wreg = NULL;
 	adev->pcie_rreg = &nv_pcie_rreg;
@@ -1031,7 +1034,7 @@
 	 * for the purpose of expose those registers
 	 * to process space
 	 */
-	if (adev->nbio.funcs->remap_hdp_registers)
+	if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
 		adev->nbio.funcs->remap_hdp_registers(adev);
 	/* enable the doorbell aperture */
 	nv_enable_doorbell_aperture(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 0c316a2..de9b553 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -971,8 +971,10 @@
 #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
-	adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+	if (!amdgpu_sriov_vf(adev)) {
+		adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+		adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+	}
 	adev->smc_rreg = NULL;
 	adev->smc_wreg = NULL;
 	adev->pcie_rreg = &soc15_pcie_rreg;
@@ -1285,7 +1287,7 @@
 	 * for the purpose of expose those registers
 	 * to process space
 	 */
-	if (adev->nbio.funcs->remap_hdp_registers)
+	if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
 		adev->nbio.funcs->remap_hdp_registers(adev);
 
 	/* enable the doorbell aperture */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index d5d023a..2d558c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -534,6 +534,19 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+	if (RREG32(mmUVD_STATUS) != 0)
+		uvd_v6_0_stop(adev);
+
+	return 0;
+}
+
+static int uvd_v6_0_suspend(void *handle)
+{
+	int r;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
 	/*
 	 * Proper cleanups before halting the HW engine:
 	 *   - cancel the delayed idle work
@@ -558,17 +571,6 @@
 						       AMD_CG_STATE_GATE);
 	}
 
-	if (RREG32(mmUVD_STATUS) != 0)
-		uvd_v6_0_stop(adev);
-
-	return 0;
-}
-
-static int uvd_v6_0_suspend(void *handle)
-{
-	int r;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
 	r = uvd_v6_0_hw_fini(adev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0fffaf8..3b119db1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -406,7 +406,7 @@
 static const struct kfd_device_info renoir_device_info = {
 	.asic_family = CHIP_RENOIR,
 	.asic_name = "renoir",
-	.gfx_target_version = 90002,
+	.gfx_target_version = 90012,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
 	.doorbell_size  = 8,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 533b27b..93e33dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1226,6 +1226,11 @@
 	bool hanging;
 
 	dqm_lock(dqm);
+	if (!dqm->sched_running) {
+		dqm_unlock(dqm);
+		return 0;
+	}
+
 	if (!dqm->is_hws_hang)
 		unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
 	hanging = dqm->is_hws_hang || dqm->is_resetting;
@@ -1430,7 +1435,7 @@
 
 	if (!dqm->sched_running)
 		return 0;
-	if (dqm->is_hws_hang)
+	if (dqm->is_hws_hang || dqm->is_resetting)
 		return -EIO;
 	if (!dqm->active_runlist)
 		return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 2e86692d..d138889 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -308,7 +308,7 @@
  * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
  * for now).
  */
-#define SVM_USER_BASE 0x1000000ull
+#define SVM_USER_BASE (u64)(KFD_CWSR_TBA_TMA_SIZE + 2*PAGE_SIZE)
 #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
 #define SVM_IB_BASE   (SVM_CWSR_BASE - PAGE_SIZE)
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d43bfd8..9b9c2b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -281,6 +281,19 @@
 	return cpages;
 }
 
+static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
+{
+	unsigned long upages = 0;
+	unsigned long i;
+
+	for (i = 0; i < migrate->npages; i++) {
+		if (migrate->src[i] & MIGRATE_PFN_VALID &&
+		    !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
+			upages++;
+	}
+	return upages;
+}
+
 static int
 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 			 struct migrate_vma *migrate, struct dma_fence **mfence,
@@ -632,10 +645,11 @@
 		       struct vm_area_struct *vma, uint64_t start, uint64_t end)
 {
 	uint64_t npages = (end - start) >> PAGE_SHIFT;
+	unsigned long upages = npages;
+	unsigned long cpages = 0;
 	struct kfd_process_device *pdd;
 	struct dma_fence *mfence = NULL;
 	struct migrate_vma migrate;
-	unsigned long cpages = 0;
 	dma_addr_t *scratch;
 	size_t size;
 	void *buf;
@@ -669,6 +683,7 @@
 	if (!cpages) {
 		pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
 			 prange->start, prange->last);
+		upages = svm_migrate_unsuccessful_pages(&migrate);
 		goto out_free;
 	}
 	if (cpages != npages)
@@ -681,8 +696,9 @@
 				    scratch, npages);
 	migrate_vma_pages(&migrate);
 
-	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-		svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
+	upages = svm_migrate_unsuccessful_pages(&migrate);
+	pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+		 upages, cpages, migrate.npages);
 
 	svm_migrate_copy_done(adev, mfence);
 	migrate_vma_finalize(&migrate);
@@ -696,9 +712,9 @@
 		if (pdd)
 			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
 
-		return cpages;
+		return upages;
 	}
-	return r;
+	return r ? r : upages;
 }
 
 /**
@@ -718,7 +734,7 @@
 	unsigned long addr;
 	unsigned long start;
 	unsigned long end;
-	unsigned long cpages = 0;
+	unsigned long upages = 0;
 	long r = 0;
 
 	if (!prange->actual_loc) {
@@ -754,12 +770,12 @@
 			pr_debug("failed %ld to migrate\n", r);
 			break;
 		} else {
-			cpages += r;
+			upages += r;
 		}
 		addr = next;
 	}
 
-	if (cpages) {
+	if (!upages) {
 		svm_range_vram_node_free(prange);
 		prange->actual_loc = 0;
 	}
@@ -782,7 +798,7 @@
 svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
 			 struct mm_struct *mm)
 {
-	int r;
+	int r, retries = 3;
 
 	/*
 	 * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
@@ -791,9 +807,14 @@
 
 	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
 
-	r = svm_migrate_vram_to_ram(prange, mm);
-	if (r)
-		return r;
+	do {
+		r = svm_migrate_vram_to_ram(prange, mm);
+		if (r)
+			return r;
+	} while (prange->actual_loc && --retries);
+
+	if (prange->actual_loc)
+		return -EDEADLK;
 
 	return svm_migrate_ram_to_vram(prange, best_loc, mm);
 }
@@ -838,6 +859,11 @@
 		pr_debug("failed find process at fault address 0x%lx\n", addr);
 		return VM_FAULT_SIGBUS;
 	}
+	if (READ_ONCE(p->svms.faulting_task) == current) {
+		pr_debug("skipping ram migration\n");
+		kfd_unref_process(p);
+		return 0;
+	}
 	addr >>= PAGE_SHIFT;
 	pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 4104b16..8fd48d0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -766,8 +766,10 @@
 	struct list_head		deferred_range_list;
 	spinlock_t			deferred_list_lock;
 	atomic_t			evicted_ranges;
+	atomic_t			drain_pagefaults;
 	struct delayed_work		restore_work;
 	DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
+	struct task_struct 		*faulting_task;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4578638..b993011 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1715,7 +1715,11 @@
 
 		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
 							    &pdd->qpd);
-		if (r) {
+		/* evict return -EIO if HWS is hang or asic is resetting, in this case
+		 * we would like to set all the queues to be in evicted state to prevent
+		 * them been add back since they actually not be saved right now.
+		 */
+		if (r && r != -EIO) {
 			pr_err("Failed to evict process queues\n");
 			goto fail;
 		}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b691c84..58b89b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1496,9 +1496,11 @@
 
 		next = min(vma->vm_end, end);
 		npages = (next - addr) >> PAGE_SHIFT;
+		WRITE_ONCE(p->svms.faulting_task, current);
 		r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
 					       addr, npages, &hmm_range,
 					       readonly, true, owner);
+		WRITE_ONCE(p->svms.faulting_task, NULL);
 		if (r) {
 			pr_debug("failed %d to get svm range pages\n", r);
 			goto unreserve_out;
@@ -1966,10 +1968,16 @@
 	struct kfd_process_device *pdd;
 	struct amdgpu_device *adev;
 	struct kfd_process *p;
+	int drain;
 	uint32_t i;
 
 	p = container_of(svms, struct kfd_process, svms);
 
+restart:
+	drain = atomic_read(&svms->drain_pagefaults);
+	if (!drain)
+		return;
+
 	for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
 		pdd = p->pdds[i];
 		if (!pdd)
@@ -1981,6 +1989,8 @@
 		amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
 		pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
 	}
+	if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
+		goto restart;
 }
 
 static void svm_range_deferred_list_work(struct work_struct *work)
@@ -1988,35 +1998,41 @@
 	struct svm_range_list *svms;
 	struct svm_range *prange;
 	struct mm_struct *mm;
+	struct kfd_process *p;
 
 	svms = container_of(work, struct svm_range_list, deferred_list_work);
 	pr_debug("enter svms 0x%p\n", svms);
 
+	p = container_of(svms, struct kfd_process, svms);
+	/* Avoid mm is gone when inserting mmu notifier */
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_debug("svms 0x%p process mm gone\n", svms);
+		return;
+	}
+retry:
+	mmap_write_lock(mm);
+
+	/* Checking for the need to drain retry faults must be inside
+	 * mmap write lock to serialize with munmap notifiers.
+	 */
+	if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+		mmap_write_unlock(mm);
+		svm_range_drain_retry_fault(svms);
+		goto retry;
+	}
+
 	spin_lock(&svms->deferred_list_lock);
 	while (!list_empty(&svms->deferred_range_list)) {
 		prange = list_first_entry(&svms->deferred_range_list,
 					  struct svm_range, deferred_list);
-		spin_unlock(&svms->deferred_list_lock);
-		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
-			 prange->start, prange->last, prange->work_item.op);
-
-		/* Make sure no stale retry fault coming after range is freed */
-		if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
-			svm_range_drain_retry_fault(prange->svms);
-
-		mm = prange->work_item.mm;
-		mmap_write_lock(mm);
-		mutex_lock(&svms->lock);
-
-		/* Remove from deferred_list must be inside mmap write lock,
-		 * otherwise, svm_range_list_lock_and_flush_work may hold mmap
-		 * write lock, and continue because deferred_list is empty, then
-		 * deferred_list handle is blocked by mmap write lock.
-		 */
-		spin_lock(&svms->deferred_list_lock);
 		list_del_init(&prange->deferred_list);
 		spin_unlock(&svms->deferred_list_lock);
 
+		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
+			 prange->start, prange->last, prange->work_item.op);
+
+		mutex_lock(&svms->lock);
 		mutex_lock(&prange->migrate_mutex);
 		while (!list_empty(&prange->child_list)) {
 			struct svm_range *pchild;
@@ -2032,12 +2048,13 @@
 
 		svm_range_handle_list_op(svms, prange);
 		mutex_unlock(&svms->lock);
-		mmap_write_unlock(mm);
 
 		spin_lock(&svms->deferred_list_lock);
 	}
 	spin_unlock(&svms->deferred_list_lock);
 
+	mmap_write_unlock(mm);
+	mmput(mm);
 	pr_debug("exit svms 0x%p\n", svms);
 }
 
@@ -2124,6 +2141,12 @@
 	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
 		 prange, prange->start, prange->last, start, last);
 
+	/* Make sure pending page faults are drained in the deferred worker
+	 * before the range is freed to avoid straggler interrupts on
+	 * unmapped memory causing "phantom faults".
+	 */
+	atomic_inc(&svms->drain_pagefaults);
+
 	unmap_parent = start <= prange->start && last >= prange->last;
 
 	list_for_each_entry(pchild, &prange->child_list, child_list) {
@@ -2261,7 +2284,7 @@
  * migration if actual loc is not best location, then update GPU page table
  * mapping to the best location.
  *
- * If vm fault gpu is range preferred loc, the best_loc is preferred loc.
+ * If the preferred loc is accessible by faulting GPU, use preferred loc.
  * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu
  * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then
  *    if range actual loc is cpu, best_loc is cpu
@@ -2278,7 +2301,7 @@
 				struct amdgpu_device *adev,
 				int32_t *gpuidx)
 {
-	struct amdgpu_device *bo_adev;
+	struct amdgpu_device *bo_adev, *preferred_adev;
 	struct kfd_process *p;
 	uint32_t gpuid;
 	int r;
@@ -2291,8 +2314,16 @@
 		return -1;
 	}
 
-	if (prange->preferred_loc == gpuid)
+	if (prange->preferred_loc == gpuid ||
+	    prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
 		return prange->preferred_loc;
+	} else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
+		preferred_adev = svm_range_get_adev_by_id(prange,
+							prange->preferred_loc);
+		if (amdgpu_xgmi_same_hive(adev, preferred_adev))
+			return prange->preferred_loc;
+		/* fall through */
+	}
 
 	if (test_bit(*gpuidx, prange->bitmap_access))
 		return gpuid;
@@ -2313,7 +2344,8 @@
 
 static int
 svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
-				unsigned long *start, unsigned long *last)
+			       unsigned long *start, unsigned long *last,
+			       bool *is_heap_stack)
 {
 	struct vm_area_struct *vma;
 	struct interval_tree_node *node;
@@ -2324,6 +2356,12 @@
 		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
 		return -EFAULT;
 	}
+
+	*is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
+			  vma->vm_end >= vma->vm_mm->start_brk) ||
+			 (vma->vm_start <= vma->vm_mm->start_stack &&
+			  vma->vm_end >= vma->vm_mm->start_stack);
+
 	start_limit = max(vma->vm_start >> PAGE_SHIFT,
 		      (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
 	end_limit = min(vma->vm_end >> PAGE_SHIFT,
@@ -2353,9 +2391,9 @@
 	*start = start_limit;
 	*last = end_limit - 1;
 
-	pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n",
-		  vma->vm_start >> PAGE_SHIFT, *start,
-		  vma->vm_end >> PAGE_SHIFT, *last);
+	pr_debug("vma [0x%lx 0x%lx] range [0x%lx 0x%lx] is_heap_stack %d\n",
+		 vma->vm_start >> PAGE_SHIFT, vma->vm_end >> PAGE_SHIFT,
+		 *start, *last, *is_heap_stack);
 
 	return 0;
 }
@@ -2420,11 +2458,13 @@
 	struct svm_range *prange = NULL;
 	unsigned long start, last;
 	uint32_t gpuid, gpuidx;
+	bool is_heap_stack;
 	uint64_t bo_s = 0;
 	uint64_t bo_l = 0;
 	int r;
 
-	if (svm_range_get_range_boundaries(p, addr, &start, &last))
+	if (svm_range_get_range_boundaries(p, addr, &start, &last,
+					   &is_heap_stack))
 		return NULL;
 
 	r = svm_range_check_vm(p, start, last, &bo_s, &bo_l);
@@ -2451,6 +2491,9 @@
 		return NULL;
 	}
 
+	if (is_heap_stack)
+		prange->preferred_loc = KFD_IOCTL_SVM_LOCATION_SYSMEM;
+
 	svm_range_add_to_svms(prange);
 	svm_range_add_notifier_locked(mm, prange);
 
@@ -2523,20 +2566,13 @@
 }
 
 static bool
-svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
+svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
 {
 	unsigned long requested = VM_READ;
-	struct vm_area_struct *vma;
 
 	if (write_fault)
 		requested |= VM_WRITE;
 
-	vma = find_vma(mm, addr << PAGE_SHIFT);
-	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
-		pr_debug("address 0x%llx VMA is removed\n", addr);
-		return true;
-	}
-
 	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
 		vma->vm_flags);
 	return (vma->vm_flags & requested) == requested;
@@ -2554,6 +2590,7 @@
 	int32_t best_loc;
 	int32_t gpuidx = MAX_GPU_INSTANCE;
 	bool write_locked = false;
+	struct vm_area_struct *vma;
 	int r = 0;
 
 	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2564,7 +2601,7 @@
 	p = kfd_lookup_process_by_pasid(pasid);
 	if (!p) {
 		pr_debug("kfd process not founded pasid 0x%x\n", pasid);
-		return -ESRCH;
+		return 0;
 	}
 	if (!p->xnack_enabled) {
 		pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
@@ -2575,10 +2612,17 @@
 
 	pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
 
+	if (atomic_read(&svms->drain_pagefaults)) {
+		pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+		goto out;
+	}
+
+	/* p->lead_thread is available as kfd_process_wq_release flush the work
+	 * before releasing task ref.
+	 */
 	mm = get_task_mm(p->lead_thread);
 	if (!mm) {
 		pr_debug("svms 0x%p failed to get mm\n", svms);
-		r = -ESRCH;
 		goto out;
 	}
 
@@ -2627,7 +2671,17 @@
 		goto out_unlock_range;
 	}
 
-	if (!svm_fault_allowed(mm, addr, write_fault)) {
+	/* __do_munmap removed VMA, return success as we are handling stale
+	 * retry fault.
+	 */
+	vma = find_vma(mm, addr << PAGE_SHIFT);
+	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+		pr_debug("address 0x%llx VMA is removed\n", addr);
+		r = 0;
+		goto out_unlock_range;
+	}
+
+	if (!svm_fault_allowed(vma, write_fault)) {
 		pr_debug("fault addr 0x%llx no %s permission\n", addr,
 			write_fault ? "write" : "read");
 		r = -EPERM;
@@ -2705,6 +2759,14 @@
 	/* Ensure list work is finished before process is destroyed */
 	flush_work(&p->svms.deferred_list_work);
 
+	/*
+	 * Ensure no retry fault comes in afterwards, as page fault handler will
+	 * not find kfd process and take mm lock to recover fault.
+	 */
+	atomic_inc(&p->svms.drain_pagefaults);
+	svm_range_drain_retry_fault(&p->svms);
+
+
 	list_for_each_entry_safe(prange, next, &p->svms.list, list) {
 		svm_range_unlink(prange);
 		svm_range_remove_notifier(prange);
@@ -2725,6 +2787,7 @@
 	mutex_init(&svms->lock);
 	INIT_LIST_HEAD(&svms->list);
 	atomic_set(&svms->evicted_ranges, 0);
+	atomic_set(&svms->drain_pagefaults, 0);
 	INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
 	INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
 	INIT_LIST_HEAD(&svms->deferred_range_list);
@@ -3076,6 +3139,8 @@
 		struct svm_range *prange =
 				list_first_entry(&svm_bo->range_list,
 						struct svm_range, svm_bo_list);
+		int retries = 3;
+
 		list_del_init(&prange->svm_bo_list);
 		spin_unlock(&svm_bo->list_lock);
 
@@ -3083,7 +3148,11 @@
 			 prange->start, prange->last);
 
 		mutex_lock(&prange->migrate_mutex);
-		svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm);
+		do {
+			svm_migrate_vram_to_ram(prange,
+						svm_bo->eviction_fence->mm);
+		} while (prange->actual_loc && --retries);
+		WARN(prange->actual_loc, "Migration failed during eviction");
 
 		mutex_lock(&prange->lock);
 		prange->svm_bo = NULL;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 43e983e..1cd6b9f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -51,6 +51,7 @@
 #include <drm/drm_hdcp.h>
 #endif
 #include "amdgpu_pm.h"
+#include "amdgpu_atombios.h"
 
 #include "amd_shared.h"
 #include "amdgpu_dm_irq.h"
@@ -217,6 +218,7 @@
 amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd);
 
 static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector);
+static void handle_hpd_rx_irq(void *param);
 
 static bool
 is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
@@ -619,7 +621,7 @@
 
 	amdgpu_dm_crtc_handle_crc_window_irq(&acrtc->base);
 }
-#endif
+#endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */
 
 /**
  * dmub_aux_setconfig_reply_callback - Callback for AUX or SET_CONFIG command.
@@ -669,10 +671,7 @@
 		return;
 	}
 
-	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-
 	link_index = notify->link_index;
-
 	link = adev->dm.dc->links[link_index];
 
 	drm_connector_list_iter_begin(dev, &iter);
@@ -685,10 +684,13 @@
 		}
 	}
 	drm_connector_list_iter_end(&iter);
-	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 
-	if (hpd_aconnector)
-		handle_hpd_irq_helper(hpd_aconnector);
+	if (hpd_aconnector) {
+		if (notify->type == DMUB_NOTIFICATION_HPD)
+			handle_hpd_irq_helper(hpd_aconnector);
+		else if (notify->type == DMUB_NOTIFICATION_HPD_IRQ)
+			handle_hpd_rx_irq(hpd_aconnector);
+	}
 }
 
 /**
@@ -764,6 +766,10 @@
 				DRM_ERROR("DM: notify type %d invalid!", notify.type);
 				continue;
 			}
+			if (!dm->dmub_callback[notify.type]) {
+				DRM_DEBUG_DRIVER("DMUB notification skipped, no handler: type=%d\n", notify.type);
+				continue;
+			}
 			if (dm->dmub_thread_offload[notify.type] == true) {
 				dmub_hpd_wrk = kzalloc(sizeof(*dmub_hpd_wrk), GFP_ATOMIC);
 				if (!dmub_hpd_wrk) {
@@ -813,7 +819,7 @@
 	if (count > DMUB_TRACE_MAX_READ)
 		DRM_DEBUG_DRIVER("Warning : count > DMUB_TRACE_MAX_READ");
 }
-#endif
+#endif /* CONFIG_DRM_AMD_DC_DCN */
 
 static int dm_set_clockgating_state(void *handle,
 		  enum amd_clockgating_state state)
@@ -1410,7 +1416,15 @@
 		switch (adev->ip_versions[DCE_HWIP][0]) {
 		case IP_VERSION(2, 1, 0):
 			init_data.flags.gpu_vm_support = true;
-			init_data.flags.disable_dmcu = true;
+			switch (adev->dm.dmcub_fw_version) {
+			case 0: /* development */
+			case 0x1: /* linux-firmware.git hash 6d9f399 */
+			case 0x01000000: /* linux-firmware.git hash 9a0b0f4 */
+				init_data.flags.disable_dmcu = false;
+				break;
+			default:
+				init_data.flags.disable_dmcu = true;
+			}
 			break;
 		case IP_VERSION(1, 0, 0):
 		case IP_VERSION(1, 0, 1):
@@ -1556,7 +1570,11 @@
 			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
 			goto error;
 		}
-#endif
+		if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
+			DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+			goto error;
+		}
+#endif /* CONFIG_DRM_AMD_DC_DCN */
 	}
 
 	if (amdgpu_dm_initialize_drm_device(adev)) {
@@ -2544,6 +2562,22 @@
 	if (amdgpu_in_reset(adev)) {
 		dc_state = dm->cached_dc_state;
 
+		/*
+		 * The dc->current_state is backed up into dm->cached_dc_state
+		 * before we commit 0 streams.
+		 *
+		 * DC will clear link encoder assignments on the real state
+		 * but the changes won't propagate over to the copy we made
+		 * before the 0 streams commit.
+		 *
+		 * DC expects that link encoder assignments are *not* valid
+		 * when committing a state, so as a workaround it needs to be
+		 * cleared here.
+		 */
+		link_enc_cfg_init(dm->dc, dc_state);
+
+		amdgpu_dm_outbox_init(adev);
+
 		r = dm_dmub_hw_init(adev);
 		if (r)
 			DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
@@ -2555,8 +2589,8 @@
 
 		for (i = 0; i < dc_state->stream_count; i++) {
 			dc_state->streams[i]->mode_changed = true;
-			for (j = 0; j < dc_state->stream_status->plane_count; j++) {
-				dc_state->stream_status->plane_states[j]->update_flags.raw
+			for (j = 0; j < dc_state->stream_status[i].plane_count; j++) {
+				dc_state->stream_status[i].plane_states[j]->update_flags.raw
 					= 0xffffffff;
 			}
 		}
@@ -3892,6 +3926,9 @@
 	caps = dm->backlight_caps[bl_idx];
 
 	dm->brightness[bl_idx] = user_brightness;
+	/* update scratch register */
+	if (bl_idx == 0)
+		amdgpu_atombios_scratch_regs_set_backlight_level(dm->adev, dm->brightness[bl_idx]);
 	brightness = convert_brightness_from_user(&caps, dm->brightness[bl_idx]);
 	link = (struct dc_link *)dm->backlight_link[bl_idx];
 
@@ -4225,7 +4262,8 @@
 		} else if (dc_link_detect(link, DETECT_REASON_BOOT)) {
 			amdgpu_dm_update_connector_after_detect(aconnector);
 			register_backlight_device(dm, link);
-
+			if (dm->num_of_edps)
+				update_connector_ext_caps(aconnector);
 			if (psr_feature_enabled)
 				amdgpu_dm_set_psr_caps(link);
 		}
@@ -4565,7 +4603,8 @@
 }
 
 
-static int fill_dc_scaling_info(const struct drm_plane_state *state,
+static int fill_dc_scaling_info(struct amdgpu_device *adev,
+				const struct drm_plane_state *state,
 				struct dc_scaling_info *scaling_info)
 {
 	int scale_w, scale_h, min_downscale, max_upscale;
@@ -4579,7 +4618,8 @@
 	/*
 	 * For reasons we don't (yet) fully understand a non-zero
 	 * src_y coordinate into an NV12 buffer can cause a
-	 * system hang. To avoid hangs (and maybe be overly cautious)
+	 * system hang on DCN1x.
+	 * To avoid hangs (and maybe be overly cautious)
 	 * let's reject both non-zero src_x and src_y.
 	 *
 	 * We currently know of only one use-case to reproduce a
@@ -4587,10 +4627,10 @@
 	 * is to gesture the YouTube Android app into full screen
 	 * on ChromeOS.
 	 */
-	if (state->fb &&
-	    state->fb->format->format == DRM_FORMAT_NV12 &&
-	    (scaling_info->src_rect.x != 0 ||
-	     scaling_info->src_rect.y != 0))
+	if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) ||
+	    (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) &&
+	    (state->fb && state->fb->format->format == DRM_FORMAT_NV12 &&
+	    (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0)))
 		return -EINVAL;
 
 	scaling_info->src_rect.width = state->src_w >> 16;
@@ -5496,7 +5536,7 @@
 	int ret;
 	bool force_disable_dcc = false;
 
-	ret = fill_dc_scaling_info(plane_state, &scaling_info);
+	ret = fill_dc_scaling_info(adev, plane_state, &scaling_info);
 	if (ret)
 		return ret;
 
@@ -6070,7 +6110,7 @@
 	if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel)
 		stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel;
 }
-#endif
+#endif /* CONFIG_DRM_AMD_DC_DCN */
 
 /**
  * DOC: FreeSync Video
@@ -7241,8 +7281,8 @@
 	struct drm_connector_state *new_con_state;
 	struct amdgpu_dm_connector *aconnector;
 	struct dm_connector_state *dm_conn_state;
-	int i, j, clock;
-	int vcpi, pbn_div, pbn = 0;
+	int i, j;
+	int vcpi, pbn_div, pbn, slot_num = 0;
 
 	for_each_new_connector_in_state(state, connector, new_con_state, i) {
 
@@ -7270,17 +7310,7 @@
 		if (!stream)
 			continue;
 
-		if (stream->timing.flags.DSC != 1) {
-			drm_dp_mst_atomic_enable_dsc(state,
-						     aconnector->port,
-						     dm_conn_state->pbn,
-						     0,
-						     false);
-			continue;
-		}
-
 		pbn_div = dm_mst_get_pbn_divider(stream->link);
-		clock = stream->timing.pix_clk_100hz / 10;
 		/* pbn is calculated by compute_mst_dsc_configs_for_state*/
 		for (j = 0; j < dc_state->stream_count; j++) {
 			if (vars[j].aconnector == aconnector) {
@@ -7289,6 +7319,23 @@
 			}
 		}
 
+		if (j == dc_state->stream_count)
+			continue;
+
+		slot_num = DIV_ROUND_UP(pbn, pbn_div);
+
+		if (stream->timing.flags.DSC != 1) {
+			dm_conn_state->pbn = pbn;
+			dm_conn_state->vcpi_slots = slot_num;
+
+			drm_dp_mst_atomic_enable_dsc(state,
+						     aconnector->port,
+						     dm_conn_state->pbn,
+						     0,
+						     false);
+			continue;
+		}
+
 		vcpi = drm_dp_mst_atomic_enable_dsc(state,
 						    aconnector->port,
 						    pbn, pbn_div,
@@ -7552,7 +7599,7 @@
 	if (ret)
 		return ret;
 
-	ret = fill_dc_scaling_info(new_plane_state, &scaling_info);
+	ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info);
 	if (ret)
 		return ret;
 
@@ -9000,7 +9047,7 @@
 			bundle->surface_updates[planes_count].gamut_remap_matrix = &dc_plane->gamut_remap_matrix;
 		}
 
-		fill_dc_scaling_info(new_plane_state,
+		fill_dc_scaling_info(dm->adev, new_plane_state,
 				     &bundle->scaling_infos[planes_count]);
 
 		bundle->surface_updates[planes_count].scaling_info =
@@ -10787,7 +10834,7 @@
 
 		ret = drm_atomic_add_affected_connectors(state, crtc);
 		if (ret)
-			return ret;
+			goto fail;
 
 		ret = drm_atomic_add_affected_planes(state, crtc);
 		if (ret)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 3655663..9d43ecb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -78,12 +78,10 @@
 
 	wr_buf_ptr = wr_buf;
 
-	r = copy_from_user(wr_buf_ptr, buf, wr_buf_size);
-
-		/* r is bytes not be copied */
-	if (r >= wr_buf_size) {
-		DRM_DEBUG_DRIVER("user data not be read\n");
-		return -EINVAL;
+	/* r is bytes not be copied */
+	if (copy_from_user(wr_buf_ptr, buf, wr_buf_size)) {
+		DRM_DEBUG_DRIVER("user data could not be read successfully\n");
+		return -EFAULT;
 	}
 
 	/* check number of parameters. isspace could not differ space and \n */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 874a49b..32a5ce0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -534,13 +534,14 @@
 
 static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *params,
 		struct dsc_mst_fairness_vars *vars,
-		int count)
+		int count,
+		int k)
 {
 	int i;
 
 	for (i = 0; i < count; i++) {
 		memset(&params[i].timing->dsc_cfg, 0, sizeof(params[i].timing->dsc_cfg));
-		if (vars[i].dsc_enabled && dc_dsc_compute_config(
+		if (vars[i + k].dsc_enabled && dc_dsc_compute_config(
 					params[i].sink->ctx->dc->res_pool->dscs[0],
 					&params[i].sink->dsc_caps.dsc_dec_caps,
 					params[i].sink->ctx->dc->debug.dsc_min_slice_height_override,
@@ -553,7 +554,7 @@
 			if (params[i].bpp_overwrite)
 				params[i].timing->dsc_cfg.bits_per_pixel = params[i].bpp_overwrite;
 			else
-				params[i].timing->dsc_cfg.bits_per_pixel = vars[i].bpp_x16;
+				params[i].timing->dsc_cfg.bits_per_pixel = vars[i + k].bpp_x16;
 
 			if (params[i].num_slices_h)
 				params[i].timing->dsc_cfg.num_slices_h = params[i].num_slices_h;
@@ -586,7 +587,8 @@
 			     struct dc_link *dc_link,
 			     struct dsc_mst_fairness_params *params,
 			     struct dsc_mst_fairness_vars *vars,
-			     int count)
+			     int count,
+			     int k)
 {
 	int i;
 	bool bpp_increased[MAX_PIPES];
@@ -601,8 +603,9 @@
 	pbn_per_timeslot = dm_mst_get_pbn_divider(dc_link);
 
 	for (i = 0; i < count; i++) {
-		if (vars[i].dsc_enabled) {
-			initial_slack[i] = kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i].pbn;
+		if (vars[i + k].dsc_enabled) {
+			initial_slack[i] =
+			kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn;
 			bpp_increased[i] = false;
 			remaining_to_increase += 1;
 		} else {
@@ -629,7 +632,7 @@
 		link_timeslots_used = 0;
 
 		for (i = 0; i < count; i++)
-			link_timeslots_used += DIV_ROUND_UP(vars[i].pbn, pbn_per_timeslot);
+			link_timeslots_used += DIV_ROUND_UP(vars[i + k].pbn, pbn_per_timeslot);
 
 		fair_pbn_alloc = (63 - link_timeslots_used) / remaining_to_increase * pbn_per_timeslot;
 
@@ -682,7 +685,8 @@
 			    struct dc_link *dc_link,
 			    struct dsc_mst_fairness_params *params,
 			    struct dsc_mst_fairness_vars *vars,
-			    int count)
+			    int count,
+			    int k)
 {
 	int i;
 	bool tried[MAX_PIPES];
@@ -692,8 +696,8 @@
 	int remaining_to_try = 0;
 
 	for (i = 0; i < count; i++) {
-		if (vars[i].dsc_enabled
-				&& vars[i].bpp_x16 == params[i].bw_range.max_target_bpp_x16
+		if (vars[i + k].dsc_enabled
+				&& vars[i + k].bpp_x16 == params[i].bw_range.max_target_bpp_x16
 				&& params[i].clock_force_enable == DSC_CLK_FORCE_DEFAULT) {
 			kbps_increase[i] = params[i].bw_range.stream_kbps - params[i].bw_range.max_kbps;
 			tried[i] = false;
@@ -748,9 +752,10 @@
 static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
 					     struct dc_state *dc_state,
 					     struct dc_link *dc_link,
-					     struct dsc_mst_fairness_vars *vars)
+					     struct dsc_mst_fairness_vars *vars,
+					     int *link_vars_start_index)
 {
-	int i;
+	int i, k;
 	struct dc_stream_state *stream;
 	struct dsc_mst_fairness_params params[MAX_PIPES];
 	struct amdgpu_dm_connector *aconnector;
@@ -768,11 +773,17 @@
 		if (stream->link != dc_link)
 			continue;
 
+		aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+		if (!aconnector)
+			continue;
+
+		if (!aconnector->port)
+			continue;
+
 		stream->timing.flags.DSC = 0;
 
 		params[count].timing = &stream->timing;
 		params[count].sink = stream->sink;
-		aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
 		params[count].aconnector = aconnector;
 		params[count].port = aconnector->port;
 		params[count].clock_force_enable = aconnector->dsc_settings.dsc_force_enable;
@@ -794,44 +805,55 @@
 
 		count++;
 	}
+
+	if (count == 0) {
+		ASSERT(0);
+		return true;
+	}
+
+	/* k is start index of vars for current phy link used by mst hub */
+	k = *link_vars_start_index;
+	/* set vars start index for next mst hub phy link */
+	*link_vars_start_index += count;
+
 	/* Try no compression */
 	for (i = 0; i < count; i++) {
-		vars[i].aconnector = params[i].aconnector;
-		vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
-		vars[i].dsc_enabled = false;
-		vars[i].bpp_x16 = 0;
+		vars[i + k].aconnector = params[i].aconnector;
+		vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+		vars[i + k].dsc_enabled = false;
+		vars[i + k].bpp_x16 = 0;
 		if (drm_dp_atomic_find_vcpi_slots(state,
 						 params[i].port->mgr,
 						 params[i].port,
-						 vars[i].pbn,
+						 vars[i + k].pbn,
 						 dm_mst_get_pbn_divider(dc_link)) < 0)
 			return false;
 	}
 	if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) {
-		set_dsc_configs_from_fairness_vars(params, vars, count);
+		set_dsc_configs_from_fairness_vars(params, vars, count, k);
 		return true;
 	}
 
 	/* Try max compression */
 	for (i = 0; i < count; i++) {
 		if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) {
-			vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps);
-			vars[i].dsc_enabled = true;
-			vars[i].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
+			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps);
+			vars[i + k].dsc_enabled = true;
+			vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
 			if (drm_dp_atomic_find_vcpi_slots(state,
 							  params[i].port->mgr,
 							  params[i].port,
-							  vars[i].pbn,
+							  vars[i + k].pbn,
 							  dm_mst_get_pbn_divider(dc_link)) < 0)
 				return false;
 		} else {
-			vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
-			vars[i].dsc_enabled = false;
-			vars[i].bpp_x16 = 0;
+			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
+			vars[i + k].dsc_enabled = false;
+			vars[i + k].bpp_x16 = 0;
 			if (drm_dp_atomic_find_vcpi_slots(state,
 							  params[i].port->mgr,
 							  params[i].port,
-							  vars[i].pbn,
+							  vars[i + k].pbn,
 							  dm_mst_get_pbn_divider(dc_link)) < 0)
 				return false;
 		}
@@ -840,15 +862,76 @@
 		return false;
 
 	/* Optimize degree of compression */
-	increase_dsc_bpp(state, dc_link, params, vars, count);
+	increase_dsc_bpp(state, dc_link, params, vars, count, k);
 
-	try_disable_dsc(state, dc_link, params, vars, count);
+	try_disable_dsc(state, dc_link, params, vars, count, k);
 
-	set_dsc_configs_from_fairness_vars(params, vars, count);
+	set_dsc_configs_from_fairness_vars(params, vars, count, k);
 
 	return true;
 }
 
+static bool is_dsc_need_re_compute(
+	struct drm_atomic_state *state,
+	struct dc_state *dc_state,
+	struct dc_link *dc_link)
+{
+	int i;
+	bool is_dsc_need_re_compute = false;
+
+	/* only check phy used by mst branch */
+	if (dc_link->type != dc_connection_mst_branch)
+		return false;
+
+	/* check if there is mode change in new request */
+	for (i = 0; i < dc_state->stream_count; i++) {
+		struct amdgpu_dm_connector *aconnector;
+		struct dc_stream_state *stream;
+		struct drm_crtc_state *new_crtc_state;
+		struct drm_connector_state *new_conn_state;
+
+		stream = dc_state->streams[i];
+
+		if (!stream)
+			continue;
+
+		/* check if stream using the same link for mst */
+		if (stream->link != dc_link)
+			continue;
+
+		aconnector = (struct amdgpu_dm_connector *) stream->dm_stream_context;
+		if (!aconnector)
+			continue;
+
+		new_conn_state = drm_atomic_get_new_connector_state(state, &aconnector->base);
+
+		if (!new_conn_state)
+			continue;
+
+		if (IS_ERR(new_conn_state))
+			continue;
+
+		if (!new_conn_state->crtc)
+			continue;
+
+		new_crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
+
+		if (!new_crtc_state)
+			continue;
+
+		if (IS_ERR(new_crtc_state))
+			continue;
+
+		if (new_crtc_state->enable && new_crtc_state->active) {
+			if (new_crtc_state->mode_changed || new_crtc_state->active_changed ||
+				new_crtc_state->connectors_changed)
+				is_dsc_need_re_compute = true;
+		}
+	}
+
+	return is_dsc_need_re_compute;
+}
+
 bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
 				       struct dc_state *dc_state,
 				       struct dsc_mst_fairness_vars *vars)
@@ -857,6 +940,7 @@
 	struct dc_stream_state *stream;
 	bool computed_streams[MAX_PIPES];
 	struct amdgpu_dm_connector *aconnector;
+	int link_vars_start_index = 0;
 
 	for (i = 0; i < dc_state->stream_count; i++)
 		computed_streams[i] = false;
@@ -881,8 +965,12 @@
 		if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
 			return false;
 
+		if (!is_dsc_need_re_compute(state, dc_state, stream->link))
+			continue;
+
 		mutex_lock(&aconnector->mst_mgr.lock);
-		if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars)) {
+		if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link,
+			vars, &link_vars_start_index)) {
 			mutex_unlock(&aconnector->mst_mgr.lock);
 			return false;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 12e5470..0ded4de 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1085,6 +1085,8 @@
 		struct dc_stream_state *old_stream =
 				dc->current_state->res_ctx.pipe_ctx[i].stream;
 		bool should_disable = true;
+		bool pipe_split_change =
+			context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
 
 		for (j = 0; j < context->stream_count; j++) {
 			if (old_stream == context->streams[j]) {
@@ -1092,6 +1094,9 @@
 				break;
 			}
 		}
+		if (!should_disable && pipe_split_change)
+			should_disable = true;
+
 		if (should_disable && old_stream) {
 			dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
@@ -1887,6 +1892,7 @@
 	return false;
 }
 
+#ifdef CONFIG_DRM_AMD_DC_DCN
 /* Perform updates here which need to be deferred until next vupdate
  *
  * i.e. blnd lut, 3dlut, and shaper lut bypass regs are double buffered
@@ -1896,7 +1902,6 @@
  */
 static void process_deferred_updates(struct dc *dc)
 {
-#ifdef CONFIG_DRM_AMD_DC_DCN
 	int i = 0;
 
 	if (dc->debug.enable_mem_low_power.bits.cm) {
@@ -1905,8 +1910,8 @@
 			if (dc->res_pool->dpps[i]->funcs->dpp_deferred_update)
 				dc->res_pool->dpps[i]->funcs->dpp_deferred_update(dc->res_pool->dpps[i]);
 	}
-#endif
 }
+#endif /* CONFIG_DRM_AMD_DC_DCN */
 
 void dc_post_update_surfaces_to_stream(struct dc *dc)
 {
@@ -1933,7 +1938,9 @@
 			dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]);
 		}
 
+#ifdef CONFIG_DRM_AMD_DC_DCN
 	process_deferred_updates(dc);
+#endif
 
 	dc->hwss.optimize_bandwidth(dc, context);
 
@@ -3603,7 +3610,8 @@
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 	/* YELLOW_CARP B0 USB4 DPIA needs dmub notifications for interrupts */
 	if (dc->ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
-	    dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0)
+	    dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0 &&
+	    !dc->debug.dpia_debug.bits.disable_dpia)
 		return true;
 #endif
 	/* dmub aux needs dmub notifications to be enabled */
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 2796bdd..6054478 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -4279,6 +4279,8 @@
 			 */
 			if (status != DC_FAIL_DP_LINK_TRAINING ||
 					pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+				if (false == stream->link->link_status.link_active)
+					disable_link(stream->link, pipe_ctx->stream->signal);
 				BREAK_TO_DEBUGGER();
 				return;
 			}
@@ -4768,7 +4770,7 @@
 				timing->dsc_cfg.bits_per_pixel,
 				timing->dsc_cfg.num_slices_h,
 				timing->dsc_cfg.is_dp);
-#endif
+#endif /* CONFIG_DRM_AMD_DC_DCN */
 
 	switch (timing->display_color_depth) {
 	case COLOR_DEPTH_666:
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index cc25ba0..cb7bf91 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -5329,6 +5329,14 @@
 			return false;
 
 		if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) {
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+			if (test_pattern == DP_TEST_PATTERN_SQUARE_PULSE)
+				core_link_write_dpcd(link,
+						DP_LINK_SQUARE_PATTERN,
+						p_custom_pattern,
+						1);
+
+#endif
 			/* tell receiver that we are sending qualification
 			 * pattern DP 1.2 or later - DP receiver's link quality
 			 * pattern is set using DPCD LINK_QUAL_LANEx_SET
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index 72b0f85..25e48a8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -236,6 +236,23 @@
 
 	return link_enc;
 }
+/* Clear all link encoder assignments. */
+static void clear_enc_assignments(struct dc_state *state)
+{
+	int i;
+	enum engine_id eng_id;
+	struct dc_stream_state *stream;
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].valid = false;
+		eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id;
+		stream = state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream;
+		if (eng_id != ENGINE_ID_UNKNOWN)
+			state->res_ctx.link_enc_cfg_ctx.link_enc_avail[eng_id - ENGINE_ID_DIGA] = eng_id;
+		if (stream)
+			stream->link_enc = NULL;
+	}
+}
 
 void link_enc_cfg_init(
 		struct dc *dc,
@@ -250,6 +267,8 @@
 			state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i] = ENGINE_ID_UNKNOWN;
 	}
 
+	clear_enc_assignments(state);
+
 	state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY;
 }
 
@@ -265,6 +284,9 @@
 
 	ASSERT(state->stream_count == stream_count);
 
+	if (stream_count == 0)
+		clear_enc_assignments(state);
+
 	/* Release DIG link encoder resources before running assignment algorithm. */
 	for (i = 0; i < stream_count; i++)
 		dc->res_pool->funcs->link_enc_unassign(state, streams[i]);
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index a533979..3aac3f4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.159"
+#define DC_VER "3.2.160"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -675,6 +675,7 @@
 #endif
 	union mem_low_power_enable_options enable_mem_low_power;
 	union root_clock_optimization_options root_clock_optimization;
+	bool hpo_optimization;
 	bool force_vblank_alignment;
 
 	/* Enable dmub aux for legacy ddc */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index bc87ea0..e68e9a8 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -898,6 +898,9 @@
 #ifndef DP_DFP_CAPABILITY_EXTENSION_SUPPORT
 #define DP_DFP_CAPABILITY_EXTENSION_SUPPORT		0x0A3
 #endif
+#ifndef DP_LINK_SQUARE_PATTERN
+#define DP_LINK_SQUARE_PATTERN				0x10F
+#endif
 #ifndef DP_DSC_CONFIGURATION
 #define DP_DSC_CONFIGURATION				0x161
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
index 989f5b6..a3fee92 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
@@ -671,6 +671,7 @@
 	uint32_t MC_VM_FB_LOCATION_BASE;
 	uint32_t MC_VM_FB_LOCATION_TOP;
 	uint32_t MC_VM_FB_OFFSET;
+	uint32_t HPO_TOP_HW_CONTROL;
 };
  /* set field name */
 #define HWS_SF(blk_name, reg_name, field_name, post_fix)\
@@ -1152,7 +1153,8 @@
 	type DOMAIN_PGFSM_PWR_STATUS;\
 	type HPO_HDMISTREAMCLK_G_GATE_DIS;\
 	type DISABLE_HOSTVM_FORCE_ALLOW_PSTATE;\
-	type I2C_LIGHT_SLEEP_FORCE;
+	type I2C_LIGHT_SLEEP_FORCE;\
+	type HPO_IO_EN;
 
 struct dce_hwseq_shift {
 	HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index af3e68d..24e47df 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1244,6 +1244,12 @@
 #endif
 	if (dc_is_dp_signal(pipe_ctx->stream->signal))
 		dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DISCONNECT_DIG_FE_BE);
+
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+	if (dc->hwseq->funcs.setup_hpo_hw_control && is_dp_128b_132b_signal(pipe_ctx))
+		dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, false);
+#endif
+
 }
 
 void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index a25732d..04d7bdd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -231,7 +231,7 @@
 
 		if (!s->blank_en)
 			DTN_INFO("[%2d]:  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh"
-				"%  8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh"
+				"  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh"
 				"  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh\n",
 				pool->hubps[i]->inst, dlg_regs->refcyc_h_blank_end, dlg_regs->dlg_vblank_end, dlg_regs->min_dst_y_next_start,
 				dlg_regs->refcyc_per_htotal, dlg_regs->refcyc_x_after_scaler, dlg_regs->dst_y_after_scaler,
@@ -1637,7 +1637,7 @@
 
 			dcn10_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
 			if (hws->funcs.enable_stream_gating)
-				hws->funcs.enable_stream_gating(dc, pipe_ctx);
+				hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
 			if (old_clk)
 				old_clk->funcs->cs_power_down(old_clk);
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index cfee456..e6af99a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2270,7 +2270,7 @@
 
 			dcn20_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
 			if (hws->funcs.enable_stream_gating)
-				hws->funcs.enable_stream_gating(dc, pipe_ctx);
+				hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
 			if (old_clk)
 				old_clk->funcs->cs_power_down(old_clk);
 		}
@@ -2397,6 +2397,9 @@
 	 * BY this, it is logic clean to separate stream and link
 	 */
 	if (is_dp_128b_132b_signal(pipe_ctx)) {
+		if (pipe_ctx->stream->ctx->dc->hwseq->funcs.setup_hpo_hw_control)
+			pipe_ctx->stream->ctx->dc->hwseq->funcs.setup_hpo_hw_control(
+				pipe_ctx->stream->ctx->dc->hwseq, true);
 		setup_dp_hpo_stream(pipe_ctx, true);
 		pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->enable_stream(
 				pipe_ctx->stream_res.hpo_dp_stream_enc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
index a82319f..9514973 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
@@ -1381,13 +1381,11 @@
 
 }
 
-static void mpc3_mpc_init(struct mpc *mpc)
+static void mpc3_set_mpc_mem_lp_mode(struct mpc *mpc)
 {
 	struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
 	int mpcc_id;
 
-	mpc1_mpc_init(mpc);
-
 	if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) {
 		if (mpc30->mpc_mask->MPC_RMU0_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPC_RMU1_MEM_LOW_PWR_MODE) {
 			REG_UPDATE(MPC_RMU_MEM_PWR_CTRL, MPC_RMU0_MEM_LOW_PWR_MODE, 3);
@@ -1405,7 +1403,7 @@
 	.read_mpcc_state = mpc1_read_mpcc_state,
 	.insert_plane = mpc1_insert_plane,
 	.remove_mpcc = mpc1_remove_mpcc,
-	.mpc_init = mpc3_mpc_init,
+	.mpc_init = mpc1_mpc_init,
 	.mpc_init_single_inst = mpc1_mpc_init_single_inst,
 	.update_blending = mpc2_update_blending,
 	.cursor_lock = mpc1_cursor_lock,
@@ -1432,6 +1430,7 @@
 	.power_on_mpc_mem_pwr = mpc3_power_on_ogam_lut,
 	.get_mpc_out_mux = mpc1_get_mpc_out_mux,
 	.set_bg_color = mpc1_set_bg_color,
+	.set_mpc_mem_lp_mode = mpc3_set_mpc_mem_lp_mode,
 };
 
 void dcn30_mpc_construct(struct dcn30_mpc *mpc30,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index e50c695..79a66e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -2128,10 +2128,10 @@
 		int pipe_cnt,
 		int vlevel)
 {
+	int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
 	int i, pipe_idx;
-	double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
-	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
-			dm_dram_clock_change_unsupported;
+	double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][maxMpcComb];
+	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported;
 
 	if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk)
 		dcfclk = context->bw_ctx.dml.soc.min_dcfclk;
@@ -2207,6 +2207,7 @@
 		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
 		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
 	}
+
 	context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
 	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
index d24ad77..4d4ac4ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
@@ -66,6 +66,45 @@
 #define FN(reg_name, field_name) \
 	hws->shifts->field_name, hws->masks->field_name
 
+static void enable_memory_low_power(struct dc *dc)
+{
+	struct dce_hwseq *hws = dc->hwseq;
+	int i;
+
+	if (dc->debug.enable_mem_low_power.bits.dmcu) {
+		// Force ERAM to shutdown if DMCU is not enabled
+		if (dc->debug.disable_dmcu || dc->config.disable_dmcu) {
+			REG_UPDATE(DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, 3);
+		}
+	}
+
+	// Set default OPTC memory power states
+	if (dc->debug.enable_mem_low_power.bits.optc) {
+		// Shutdown when unassigned and light sleep in VBLANK
+		REG_SET_2(ODM_MEM_PWR_CTRL3, 0, ODM_MEM_UNASSIGNED_PWR_MODE, 3, ODM_MEM_VBLANK_PWR_MODE, 1);
+	}
+
+	if (dc->debug.enable_mem_low_power.bits.vga) {
+		// Power down VGA memory
+		REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1);
+	}
+
+	if (dc->debug.enable_mem_low_power.bits.mpc)
+		dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode(dc->res_pool->mpc);
+
+
+	if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
+		// Power down VPGs
+		for (i = 0; i < dc->res_pool->stream_enc_count; i++)
+			dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+		for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
+			dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
+#endif
+	}
+
+}
+
 void dcn31_init_hw(struct dc *dc)
 {
 	struct abm **abms = dc->res_pool->multiple_abms;
@@ -108,35 +147,7 @@
 	if (res_pool->dccg->funcs->dccg_init)
 		res_pool->dccg->funcs->dccg_init(res_pool->dccg);
 
-	if (dc->debug.enable_mem_low_power.bits.dmcu) {
-		// Force ERAM to shutdown if DMCU is not enabled
-		if (dc->debug.disable_dmcu || dc->config.disable_dmcu) {
-			REG_UPDATE(DMU_MEM_PWR_CNTL, DMCU_ERAM_MEM_PWR_FORCE, 3);
-		}
-	}
-
-	// Set default OPTC memory power states
-	if (dc->debug.enable_mem_low_power.bits.optc) {
-		// Shutdown when unassigned and light sleep in VBLANK
-		REG_SET_2(ODM_MEM_PWR_CTRL3, 0, ODM_MEM_UNASSIGNED_PWR_MODE, 3, ODM_MEM_VBLANK_PWR_MODE, 1);
-	}
-
-	if (dc->debug.enable_mem_low_power.bits.vga) {
-		// Power down VGA memory
-		REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1);
-	}
-
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-	if (dc->debug.enable_mem_low_power.bits.vpg && dc->res_pool->stream_enc[0]->vpg->funcs->vpg_powerdown) {
-		// Power down VPGs
-		for (i = 0; i < dc->res_pool->stream_enc_count; i++)
-			dc->res_pool->stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->stream_enc[i]->vpg);
-#if defined(CONFIG_DRM_AMD_DC_DP2_0)
-		for (i = 0; i < dc->res_pool->hpo_dp_stream_enc_count; i++)
-			dc->res_pool->hpo_dp_stream_enc[i]->vpg->funcs->vpg_powerdown(dc->res_pool->hpo_dp_stream_enc[i]->vpg);
-#endif
-	}
-#endif
+	enable_memory_low_power(dc);
 
 	if (dc->ctx->dc_bios->fw_info_valid) {
 		res_pool->ref_clocks.xtalin_clock_inKhz =
@@ -264,6 +275,9 @@
 	if (dc->debug.enable_mem_low_power.bits.i2c)
 		REG_UPDATE(DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, 1);
 
+	if (hws->funcs.setup_hpo_hw_control)
+		hws->funcs.setup_hpo_hw_control(hws, false);
+
 	if (!dc->debug.disable_clock_gate) {
 		/* enable all DCN clock gating */
 		REG_WRITE(DCCG_GATE_DISABLE_CNTL, 0);
@@ -588,7 +602,7 @@
 
 			dcn31_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
 			if (hws->funcs.enable_stream_gating)
-				hws->funcs.enable_stream_gating(dc, pipe_ctx);
+				hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
 			if (old_clk)
 				old_clk->funcs->cs_power_down(old_clk);
 		}
@@ -597,3 +611,9 @@
 	/* New dc_state in the process of being applied to hardware. */
 	dc->current_state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_TRANSIENT;
 }
+
+void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
+{
+	if (hws->ctx->dc->debug.hpo_optimization)
+		REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
index 7ae45dd..edfc01d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
@@ -54,5 +54,6 @@
 bool dcn31_is_abm_supported(struct dc *dc,
 		struct dc_state *context, struct dc_stream_state *stream);
 void dcn31_init_pipes(struct dc *dc, struct dc_state *context);
+void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable);
 
 #endif /* __DC_HWSS_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
index c6a73778..05335a8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
@@ -137,6 +137,7 @@
 	.dccg_init = dcn20_dccg_init,
 	.set_blend_lut = dcn30_set_blend_lut,
 	.set_shaper_3dlut = dcn20_set_shaper_3dlut,
+	.setup_hpo_hw_control = dcn31_setup_hpo_hw_control,
 };
 
 void dcn31_hw_sequencer_construct(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index 87b2c24..1889629 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -860,7 +860,8 @@
 	SR(D6VGA_CONTROL), \
 	SR(DC_IP_REQUEST_CNTL), \
 	SR(AZALIA_AUDIO_DTO), \
-	SR(AZALIA_CONTROLLER_CLOCK_GATING)
+	SR(AZALIA_CONTROLLER_CLOCK_GATING), \
+	SR(HPO_TOP_HW_CONTROL)
 
 static const struct dce_hwseq_registers hwseq_reg = {
 		HWSEQ_DCN31_REG_LIST()
@@ -898,7 +899,8 @@
 	HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_UNASSIGNED_PWR_MODE, mask_sh), \
 	HWS_SF(, ODM_MEM_PWR_CTRL3, ODM_MEM_VBLANK_PWR_MODE, mask_sh), \
 	HWS_SF(, MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, mask_sh), \
-	HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh)
+	HWS_SF(, DIO_MEM_PWR_CTRL, I2C_LIGHT_SLEEP_FORCE, mask_sh), \
+	HWS_SF(, HPO_TOP_HW_CONTROL, HPO_IO_EN, mask_sh)
 
 static const struct dce_hwseq_shift hwseq_shift = {
 		HWSEQ_DCN31_MASK_SH_LIST(__SHIFT)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index e3d9f1dec..f47d82d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -3576,16 +3576,9 @@
 		MinDSCBPP = 8;
 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
 	} else {
-		if (Output == dm_hdmi) {
-			NonDSCBPP0 = 24;
-			NonDSCBPP1 = 24;
-			NonDSCBPP2 = 24;
-		}
-		else {
-			NonDSCBPP0 = 16;
-			NonDSCBPP1 = 20;
-			NonDSCBPP2 = 24;
-		}
+		NonDSCBPP0 = 16;
+		NonDSCBPP1 = 20;
+		NonDSCBPP2 = 24;
 
 		if (Format == dm_n422) {
 			MinDSCBPP = 7;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index d58925c..7e937bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -3892,15 +3892,11 @@
 		MinDSCBPP = 8;
 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
 	} else {
-		if (Output == dm_hdmi) {
-			NonDSCBPP0 = 24;
-			NonDSCBPP1 = 24;
-			NonDSCBPP2 = 24;
-		} else {
-			NonDSCBPP0 = 16;
-			NonDSCBPP1 = 20;
-			NonDSCBPP2 = 24;
-		}
+
+		NonDSCBPP0 = 16;
+		NonDSCBPP1 = 20;
+		NonDSCBPP2 = 24;
+
 		if (Format == dm_n422) {
 			MinDSCBPP = 7;
 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 04d6ec3..f5fd2a0 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -367,6 +367,7 @@
 	void (*set_bg_color)(struct mpc *mpc,
 			struct tg_color *bg_color,
 			int mpcc_id);
+	void (*set_mpc_mem_lp_mode)(struct mpc *mpc);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
index f324285..c200825 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
@@ -143,6 +143,7 @@
 			const struct dc_plane_state *plane_state);
 	void (*PLAT_58856_wa)(struct dc_state *context,
 			struct pipe_ctx *pipe_ctx);
+	void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable);
 };
 
 struct dce_hwseq {
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index 717c0e5..cd204ee 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -238,6 +238,7 @@
 	bool load_inst_const;
 	bool skip_panel_power_sequence;
 	bool disable_z10;
+	bool power_optimization;
 	bool dpia_supported;
 	bool disable_dpia;
 };
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 0293c58..c29a67c 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -46,10 +46,10 @@
 
 /* Firmware versioning. */
 #ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0x9525efb5
+#define DMUB_FW_VERSION_GIT_HASH 0x1d82d23e
 #define DMUB_FW_VERSION_MAJOR 0
 #define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 90
+#define DMUB_FW_VERSION_REVISION 91
 #define DMUB_FW_VERSION_TEST 0
 #define DMUB_FW_VERSION_VBIOS 0
 #define DMUB_FW_VERSION_HOTFIX 0
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
index 10ebf20..fa05691 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
@@ -340,6 +340,7 @@
 	boot_options.bits.z10_disable = params->disable_z10;
 	boot_options.bits.dpia_supported = params->dpia_supported;
 	boot_options.bits.enable_dpia = params->disable_dpia ? 0 : 1;
+	boot_options.bits.power_optimization = params->power_optimization;
 
 	boot_options.bits.sel_mux_phy_c_d_phy_f_g = (dmub->asic == DMUB_ASIC_DCN31B) ? 1 : 0;
 
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index f1a46d1..4b9e68a 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -98,7 +98,8 @@
 	AMD_IP_BLOCK_TYPE_ACP,
 	AMD_IP_BLOCK_TYPE_VCN,
 	AMD_IP_BLOCK_TYPE_MES,
-	AMD_IP_BLOCK_TYPE_JPEG
+	AMD_IP_BLOCK_TYPE_JPEG,
+	AMD_IP_BLOCK_TYPE_NUM,
 };
 
 enum amd_clockgating_state {
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 03581d5..08362d5 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -927,6 +927,13 @@
 {
 	int ret = 0;
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+	enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON;
+
+	if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state) {
+		dev_dbg(adev->dev, "IP block%d already in the target %s state!",
+				block_type, gate ? "gate" : "ungate");
+		return 0;
+	}
 
 	switch (block_type) {
 	case AMD_IP_BLOCK_TYPE_UVD:
@@ -979,6 +986,9 @@
 		break;
 	}
 
+	if (!ret)
+		atomic_set(&adev->pm.pwr_state[block_type], pwr_state);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 49fe415..41472ed 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2094,6 +2094,10 @@
 	} else if (DEVICE_ATTR_IS(pp_dpm_dclk)) {
 		if (!(asic_type == CHIP_VANGOGH || asic_type == CHIP_SIENNA_CICHLID))
 			*states = ATTR_STATE_UNSUPPORTED;
+	} else if (DEVICE_ATTR_IS(pp_power_profile_mode)) {
+		if (!adev->powerplay.pp_funcs->get_power_profile_mode ||
+		    amdgpu_dpm_get_power_profile_mode(adev, NULL) == -EOPNOTSUPP)
+			*states = ATTR_STATE_UNSUPPORTED;
 	}
 
 	switch (asic_type) {
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 98f1b3d..16e3f72 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -417,6 +417,12 @@
 	enum amd_dpm_forced_level forced_level;
 };
 
+enum ip_power_state {
+	POWER_STATE_UNKNOWN,
+	POWER_STATE_ON,
+	POWER_STATE_OFF,
+};
+
 struct amdgpu_pm {
 	struct mutex		mutex;
 	u32                     current_sclk;
@@ -452,6 +458,8 @@
 	struct i2c_adapter smu_i2c;
 	struct mutex		smu_i2c_mutex;
 	struct list_head	pm_attr_list;
+
+	atomic_t		pwr_state[AMD_IP_BLOCK_TYPE_NUM];
 };
 
 #define R600_SSTU_DFLT                               0
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_ppsmc.h
index 1d34479..fc91988 100644
--- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_ppsmc.h
@@ -51,7 +51,7 @@
 #define PPSMC_MSG_PowerUpVcn                    0x07 ///< Power up VCN; VCN is power gated by default
 #define PPSMC_MSG_SetHardMinVcn                 0x08 ///< For wireless display
 #define PPSMC_MSG_SetSoftMinGfxclk              0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz
-#define PPSMC_MSG_ActiveProcessNotify           0x0A ///< Set active work load type
+#define PPSMC_MSG_ActiveProcessNotify           0x0A ///< Deprecated (Not to be used)
 #define PPSMC_MSG_ForcePowerDownGfx             0x0B ///< Force power down GFX, i.e. enter GFXOFF
 #define PPSMC_MSG_PrepareMp1ForUnload           0x0C ///< Prepare PMFW for GFX driver unload
 #define PPSMC_MSG_SetDriverDramAddrHigh         0x0D ///< Set high 32 bits of DRAM address for Driver table transfer
@@ -63,7 +63,7 @@
 #define PPSMC_MSG_SetHardMinSocclkByFreq        0x13 ///< Set hard min for SOC CLK
 #define PPSMC_MSG_SetSoftMinFclk                0x14 ///< Set hard min for FCLK
 #define PPSMC_MSG_SetSoftMinVcn                 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK)
-#define PPSMC_MSG_SPARE0                        0x16 ///< Spared
+#define PPSMC_MSG_SPARE                         0x16 ///< Spare
 #define PPSMC_MSG_GetGfxclkFrequency            0x17 ///< Get GFX clock frequency
 #define PPSMC_MSG_GetFclkFrequency              0x18 ///< Get FCLK frequency
 #define PPSMC_MSG_AllowGfxOff                   0x19 ///< Inform PMFW of allowing GFXOFF entry
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 3212150..8d796ed 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -875,34 +875,30 @@
 static int pp_get_power_profile_mode(void *handle, char *buf)
 {
 	struct pp_hwmgr *hwmgr = handle;
+	int ret;
 
-	if (!hwmgr || !hwmgr->pm_en || !buf)
+	if (!hwmgr || !hwmgr->pm_en || !hwmgr->hwmgr_func->get_power_profile_mode)
+		return -EOPNOTSUPP;
+	if (!buf)
 		return -EINVAL;
 
-	if (hwmgr->hwmgr_func->get_power_profile_mode == NULL) {
-		pr_info_ratelimited("%s was not implemented.\n", __func__);
-		return snprintf(buf, PAGE_SIZE, "\n");
-	}
-
-	return hwmgr->hwmgr_func->get_power_profile_mode(hwmgr, buf);
+	mutex_lock(&hwmgr->smu_lock);
+	ret = hwmgr->hwmgr_func->get_power_profile_mode(hwmgr, buf);
+	mutex_unlock(&hwmgr->smu_lock);
+	return ret;
 }
 
 static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = -EINVAL;
+	int ret = -EOPNOTSUPP;
 
-	if (!hwmgr || !hwmgr->pm_en)
+	if (!hwmgr || !hwmgr->pm_en || !hwmgr->hwmgr_func->set_power_profile_mode)
 		return ret;
 
-	if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) {
-		pr_info_ratelimited("%s was not implemented.\n", __func__);
-		return ret;
-	}
-
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
 		pr_debug("power profile setting is for manual dpm mode only.\n");
-		return ret;
+		return -EINVAL;
 	}
 
 	mutex_lock(&hwmgr->smu_lock);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
index 1de3ae7..1f406f2 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
@@ -1036,13 +1036,13 @@
 		else
 			i = 1;
 
-		size += sysfs_emit_at(buf, size, "0: %uMhz %s\n",
+		size += sprintf(buf + size, "0: %uMhz %s\n",
 					data->gfx_min_freq_limit/100,
 					i == 0 ? "*" : "");
-		size += sysfs_emit_at(buf, size, "1: %uMhz %s\n",
+		size += sprintf(buf + size, "1: %uMhz %s\n",
 					i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK,
 					i == 1 ? "*" : "");
-		size += sysfs_emit_at(buf, size, "2: %uMhz %s\n",
+		size += sprintf(buf + size, "2: %uMhz %s\n",
 					data->gfx_max_freq_limit/100,
 					i == 2 ? "*" : "");
 		break;
@@ -1050,7 +1050,7 @@
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency, &now);
 
 		for (i = 0; i < mclk_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i,
 					mclk_table->entries[i].clk / 100,
 					((mclk_table->entries[i].clk / 100)
@@ -1065,10 +1065,10 @@
 			if (ret)
 				return ret;
 
-			size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
-			size += sysfs_emit_at(buf, size, "0: %10uMhz\n",
+			size += sprintf(buf + size, "%s:\n", "OD_SCLK");
+			size += sprintf(buf + size, "0: %10uMhz\n",
 			(data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq);
-			size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
+			size += sprintf(buf + size, "1: %10uMhz\n",
 			(data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq);
 		}
 		break;
@@ -1081,8 +1081,8 @@
 			if (ret)
 				return ret;
 
-			size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
-			size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
+			size += sprintf(buf + size, "%s:\n", "OD_RANGE");
+			size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
 				min_freq, max_freq);
 		}
 		break;
@@ -1456,6 +1456,8 @@
 	if (!buf)
 		return -EINVAL;
 
+	phm_get_sysfs_buf(&buf, &size);
+
 	size += sysfs_emit_at(buf, size, "%s %16s %s %s %s %s\n",title[0],
 			title[1], title[2], title[3], title[4], title[5]);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index e7803ce..611969b 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -4926,7 +4926,7 @@
 		now = i;
 
 		for (i = 0; i < sclk_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, sclk_table->dpm_levels[i].value / 100,
 					(i == now) ? "*" : "");
 		break;
@@ -4941,7 +4941,7 @@
 		now = i;
 
 		for (i = 0; i < mclk_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, mclk_table->dpm_levels[i].value / 100,
 					(i == now) ? "*" : "");
 		break;
@@ -4955,7 +4955,7 @@
 		now = i;
 
 		for (i = 0; i < pcie_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %s %s\n", i,
+			size += sprintf(buf + size, "%d: %s %s\n", i,
 					(pcie_table->dpm_levels[i].value == 0) ? "2.5GT/s, x8" :
 					(pcie_table->dpm_levels[i].value == 1) ? "5.0GT/s, x16" :
 					(pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "",
@@ -4963,32 +4963,32 @@
 		break;
 	case OD_SCLK:
 		if (hwmgr->od_enabled) {
-			size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
+			size += sprintf(buf + size, "%s:\n", "OD_SCLK");
 			for (i = 0; i < odn_sclk_table->num_of_pl; i++)
-				size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n",
+				size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
 					i, odn_sclk_table->entries[i].clock/100,
 					odn_sclk_table->entries[i].vddc);
 		}
 		break;
 	case OD_MCLK:
 		if (hwmgr->od_enabled) {
-			size = sysfs_emit(buf, "%s:\n", "OD_MCLK");
+			size += sprintf(buf + size, "%s:\n", "OD_MCLK");
 			for (i = 0; i < odn_mclk_table->num_of_pl; i++)
-				size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n",
+				size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
 					i, odn_mclk_table->entries[i].clock/100,
 					odn_mclk_table->entries[i].vddc);
 		}
 		break;
 	case OD_RANGE:
 		if (hwmgr->od_enabled) {
-			size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
-			size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
+			size += sprintf(buf + size, "%s:\n", "OD_RANGE");
+			size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
 				data->golden_dpm_table.sclk_table.dpm_levels[0].value/100,
 				hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
-			size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n",
+			size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
 				data->golden_dpm_table.mclk_table.dpm_levels[0].value/100,
 				hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
-			size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n",
+			size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
 				data->odn_dpm_table.min_vddc,
 				data->odn_dpm_table.max_vddc);
 		}
@@ -5518,6 +5518,8 @@
 	if (!buf)
 		return -EINVAL;
 
+	phm_get_sysfs_buf(&buf, &size);
+
 	size += sysfs_emit_at(buf, size, "%s %16s %16s %16s %16s %16s %16s %16s\n",
 			title[0], title[1], title[2], title[3],
 			title[4], title[5], title[6], title[7]);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
index b94a77e..03bf8f0 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
@@ -1559,7 +1559,7 @@
 				CURR_SCLK_INDEX);
 
 		for (i = 0; i < sclk_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, sclk_table->entries[i].clk / 100,
 					(i == now) ? "*" : "");
 		break;
@@ -1571,7 +1571,7 @@
 				CURR_MCLK_INDEX);
 
 		for (i = SMU8_NUM_NBPMEMORYCLOCK; i > 0; i--)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					SMU8_NUM_NBPMEMORYCLOCK-i, data->sys_info.nbp_memory_clock[i-1] / 100,
 					(SMU8_NUM_NBPMEMORYCLOCK-i == now) ? "*" : "");
 		break;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h
index ad33983..2a75da1 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.h
@@ -109,6 +109,19 @@
 			   struct amdgpu_irq_src *source,
 			   struct amdgpu_iv_entry *entry);
 
+/*
+ * Helper function to make sysfs_emit_at() happy. Align buf to
+ * the current page boundary and record the offset.
+ */
+static inline void phm_get_sysfs_buf(char **buf, int *offset)
+{
+	if (!*buf || !offset)
+		return;
+
+	*offset = offset_in_page(*buf);
+	*buf -= *offset;
+}
+
 int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr);
 
 void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size,
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
index c152a61d..e633665 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
@@ -4548,6 +4548,8 @@
 	int ret = 0;
 	int size = 0;
 
+	phm_get_sysfs_buf(&buf, &size);
+
 	ret = vega10_get_enabled_smc_features(hwmgr, &features_enabled);
 	PP_ASSERT_WITH_CODE(!ret,
 			"[EnableAllSmuFeatures] Failed to get enabled smc features!",
@@ -4650,7 +4652,7 @@
 		else
 			count = sclk_table->count;
 		for (i = 0; i < count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, sclk_table->dpm_levels[i].value / 100,
 					(i == now) ? "*" : "");
 		break;
@@ -4661,7 +4663,7 @@
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now);
 
 		for (i = 0; i < mclk_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, mclk_table->dpm_levels[i].value / 100,
 					(i == now) ? "*" : "");
 		break;
@@ -4672,7 +4674,7 @@
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now);
 
 		for (i = 0; i < soc_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, soc_table->dpm_levels[i].value / 100,
 					(i == now) ? "*" : "");
 		break;
@@ -4684,7 +4686,7 @@
 				PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK, &now);
 
 		for (i = 0; i < dcef_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 					i, dcef_table->dpm_levels[i].value / 100,
 					(dcef_table->dpm_levels[i].value / 100 == now) ?
 					"*" : "");
@@ -4698,7 +4700,7 @@
 			gen_speed = pptable->PcieGenSpeed[i];
 			lane_width = pptable->PcieLaneCount[i];
 
-			size += sysfs_emit_at(buf, size, "%d: %s %s %s\n", i,
+			size += sprintf(buf + size, "%d: %s %s %s\n", i,
 					(gen_speed == 0) ? "2.5GT/s," :
 					(gen_speed == 1) ? "5.0GT/s," :
 					(gen_speed == 2) ? "8.0GT/s," :
@@ -4717,34 +4719,34 @@
 
 	case OD_SCLK:
 		if (hwmgr->od_enabled) {
-			size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
+			size += sprintf(buf + size, "%s:\n", "OD_SCLK");
 			podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
 			for (i = 0; i < podn_vdd_dep->count; i++)
-				size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n",
+				size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
 					i, podn_vdd_dep->entries[i].clk / 100,
 						podn_vdd_dep->entries[i].vddc);
 		}
 		break;
 	case OD_MCLK:
 		if (hwmgr->od_enabled) {
-			size = sysfs_emit(buf, "%s:\n", "OD_MCLK");
+			size += sprintf(buf + size, "%s:\n", "OD_MCLK");
 			podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
 			for (i = 0; i < podn_vdd_dep->count; i++)
-				size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n",
+				size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
 					i, podn_vdd_dep->entries[i].clk/100,
 						podn_vdd_dep->entries[i].vddc);
 		}
 		break;
 	case OD_RANGE:
 		if (hwmgr->od_enabled) {
-			size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
-			size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
+			size += sprintf(buf + size, "%s:\n", "OD_RANGE");
+			size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
 				data->golden_dpm_table.gfx_table.dpm_levels[0].value/100,
 				hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
-			size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n",
+			size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
 				data->golden_dpm_table.mem_table.dpm_levels[0].value/100,
 				hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
-			size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n",
+			size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
 				data->odn_dpm_table.min_vddc,
 				data->odn_dpm_table.max_vddc);
 		}
@@ -5112,6 +5114,8 @@
 	if (!buf)
 		return -EINVAL;
 
+	phm_get_sysfs_buf(&buf, &size);
+
 	size += sysfs_emit_at(buf, size, "%s %16s %s %s %s %s\n",title[0],
 			title[1], title[2], title[3], title[4], title[5]);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
index 8558718..a2f4d67 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
@@ -2141,6 +2141,8 @@
 	int ret = 0;
 	int size = 0;
 
+	phm_get_sysfs_buf(&buf, &size);
+
 	ret = vega12_get_enabled_smc_features(hwmgr, &features_enabled);
 	PP_ASSERT_WITH_CODE(!ret,
 		"[EnableAllSmuFeatures] Failed to get enabled smc features!",
@@ -2256,7 +2258,7 @@
 				"Attempt to get gfx clk levels Failed!",
 				return -1);
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : "");
 		break;
@@ -2272,7 +2274,7 @@
 				"Attempt to get memory clk levels Failed!",
 				return -1);
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : "");
 		break;
@@ -2290,7 +2292,7 @@
 				"Attempt to get soc clk levels Failed!",
 				return -1);
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : "");
 		break;
@@ -2308,7 +2310,7 @@
 				"Attempt to get dcef clk levels Failed!",
 				return -1);
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : "");
 		break;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 0cf39c1..85d55ab 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -3238,6 +3238,8 @@
 	int ret = 0;
 	int size = 0;
 
+	phm_get_sysfs_buf(&buf, &size);
+
 	ret = vega20_get_enabled_smc_features(hwmgr, &features_enabled);
 	PP_ASSERT_WITH_CODE(!ret,
 			"[EnableAllSmuFeatures] Failed to get enabled smc features!",
@@ -3372,13 +3374,13 @@
 				return ret);
 
 		if (vega20_get_sclks(hwmgr, &clocks)) {
-			size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+			size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
 				now / 100);
 			break;
 		}
 
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
 		break;
@@ -3390,13 +3392,13 @@
 				return ret);
 
 		if (vega20_get_memclocks(hwmgr, &clocks)) {
-			size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+			size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
 				now / 100);
 			break;
 		}
 
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
 		break;
@@ -3408,13 +3410,13 @@
 				return ret);
 
 		if (vega20_get_socclocks(hwmgr, &clocks)) {
-			size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+			size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
 				now / 100);
 			break;
 		}
 
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
 		break;
@@ -3426,7 +3428,7 @@
 				return ret);
 
 		for (i = 0; i < fclk_dpm_table->count; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, fclk_dpm_table->dpm_levels[i].value,
 				fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : "");
 		break;
@@ -3438,13 +3440,13 @@
 				return ret);
 
 		if (vega20_get_dcefclocks(hwmgr, &clocks)) {
-			size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+			size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
 				now / 100);
 			break;
 		}
 
 		for (i = 0; i < clocks.num_levels; i++)
-			size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
 				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
 		break;
@@ -3458,7 +3460,7 @@
 			gen_speed = pptable->PcieGenSpeed[i];
 			lane_width = pptable->PcieLaneCount[i];
 
-			size += sysfs_emit_at(buf, size, "%d: %s %s %dMhz %s\n", i,
+			size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
 					(gen_speed == 0) ? "2.5GT/s," :
 					(gen_speed == 1) ? "5.0GT/s," :
 					(gen_speed == 2) ? "8.0GT/s," :
@@ -3479,18 +3481,18 @@
 	case OD_SCLK:
 		if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id &&
 		    od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) {
-			size = sysfs_emit(buf, "%s:\n", "OD_SCLK");
-			size += sysfs_emit_at(buf, size, "0: %10uMhz\n",
+			size += sprintf(buf + size, "%s:\n", "OD_SCLK");
+			size += sprintf(buf + size, "0: %10uMhz\n",
 				od_table->GfxclkFmin);
-			size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
+			size += sprintf(buf + size, "1: %10uMhz\n",
 				od_table->GfxclkFmax);
 		}
 		break;
 
 	case OD_MCLK:
 		if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) {
-			size = sysfs_emit(buf, "%s:\n", "OD_MCLK");
-			size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
+			size += sprintf(buf + size, "%s:\n", "OD_MCLK");
+			size += sprintf(buf + size, "1: %10uMhz\n",
 				od_table->UclkFmax);
 		}
 
@@ -3503,14 +3505,14 @@
 		    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id &&
 		    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id &&
 		    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) {
-			size = sysfs_emit(buf, "%s:\n", "OD_VDDC_CURVE");
-			size += sysfs_emit_at(buf, size, "0: %10uMhz %10dmV\n",
+			size += sprintf(buf + size, "%s:\n", "OD_VDDC_CURVE");
+			size += sprintf(buf + size, "0: %10uMhz %10dmV\n",
 				od_table->GfxclkFreq1,
 				od_table->GfxclkVolt1 / VOLTAGE_SCALE);
-			size += sysfs_emit_at(buf, size, "1: %10uMhz %10dmV\n",
+			size += sprintf(buf + size, "1: %10uMhz %10dmV\n",
 				od_table->GfxclkFreq2,
 				od_table->GfxclkVolt2 / VOLTAGE_SCALE);
-			size += sysfs_emit_at(buf, size, "2: %10uMhz %10dmV\n",
+			size += sprintf(buf + size, "2: %10uMhz %10dmV\n",
 				od_table->GfxclkFreq3,
 				od_table->GfxclkVolt3 / VOLTAGE_SCALE);
 		}
@@ -3518,17 +3520,17 @@
 		break;
 
 	case OD_RANGE:
-		size = sysfs_emit(buf, "%s:\n", "OD_RANGE");
+		size += sprintf(buf + size, "%s:\n", "OD_RANGE");
 
 		if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id &&
 		    od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) {
-			size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n",
+			size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n",
 				od8_settings[OD8_SETTING_GFXCLK_FMIN].min_value,
 				od8_settings[OD8_SETTING_GFXCLK_FMAX].max_value);
 		}
 
 		if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) {
-			size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n",
+			size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
 				od8_settings[OD8_SETTING_UCLK_FMAX].min_value,
 				od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
 		}
@@ -3539,22 +3541,22 @@
 		    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id &&
 		    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id &&
 		    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) {
-			size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
+			size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
 				od8_settings[OD8_SETTING_GFXCLK_FREQ1].min_value,
 				od8_settings[OD8_SETTING_GFXCLK_FREQ1].max_value);
-			size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n",
+			size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n",
 				od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].min_value,
 				od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].max_value);
-			size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n",
+			size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n",
 				od8_settings[OD8_SETTING_GFXCLK_FREQ2].min_value,
 				od8_settings[OD8_SETTING_GFXCLK_FREQ2].max_value);
-			size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n",
+			size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n",
 				od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].min_value,
 				od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].max_value);
-			size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n",
+			size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n",
 				od8_settings[OD8_SETTING_GFXCLK_FREQ3].min_value,
 				od8_settings[OD8_SETTING_GFXCLK_FREQ3].max_value);
-			size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n",
+			size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n",
 				od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].min_value,
 				od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].max_value);
 		}
@@ -4003,6 +4005,8 @@
 	if (!buf)
 		return -EINVAL;
 
+	phm_get_sysfs_buf(&buf, &size);
+
 	size += sysfs_emit_at(buf, size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
 			title[0], title[1], title[2], title[3], title[4], title[5],
 			title[6], title[7], title[8], title[9], title[10]);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b06c59d..01168b8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1468,7 +1468,7 @@
 			dev_err(adev->dev, "Failed to disable smu features.\n");
 	}
 
-	if (adev->ip_versions[MP1_HWIP][0] >= IP_VERSION(11, 0, 0) &&
+	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0) &&
 	    adev->gfx.rlc.funcs->stop)
 		adev->gfx.rlc.funcs->stop(adev);
 
@@ -2534,13 +2534,15 @@
 	struct smu_context *smu = handle;
 	int ret = 0;
 
-	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
+	    !smu->ppt_funcs->get_power_profile_mode)
 		return -EOPNOTSUPP;
+	if (!buf)
+		return -EINVAL;
 
 	mutex_lock(&smu->mutex);
 
-	if (smu->ppt_funcs->get_power_profile_mode)
-		ret = smu->ppt_funcs->get_power_profile_mode(smu, buf);
+	ret = smu->ppt_funcs->get_power_profile_mode(smu, buf);
 
 	mutex_unlock(&smu->mutex);
 
@@ -2554,7 +2556,8 @@
 	struct smu_context *smu = handle;
 	int ret = 0;
 
-	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+	if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
+	    !smu->ppt_funcs->set_power_profile_mode)
 		return -EOPNOTSUPP;
 
 	mutex_lock(&smu->mutex);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
index cbc3f99..2238ee1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
@@ -309,6 +309,7 @@
 {
 	int ret = 0, size = 0;
 	uint32_t cur_value = 0;
+	int i;
 
 	smu_cmn_get_sysfs_buf(&buf, &size);
 
@@ -334,8 +335,6 @@
 		size += sysfs_emit_at(buf, size, "VDDC: %7umV  %10umV\n",
 						CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX);
 		break;
-	case SMU_GFXCLK:
-	case SMU_SCLK:
 	case SMU_FCLK:
 	case SMU_MCLK:
 	case SMU_SOCCLK:
@@ -346,6 +345,25 @@
 			return ret;
 		size += sysfs_emit_at(buf, size, "0: %uMhz *\n", cur_value);
 		break;
+	case SMU_SCLK:
+	case SMU_GFXCLK:
+		ret = cyan_skillfish_get_current_clk_freq(smu, clk_type, &cur_value);
+		if (ret)
+			return ret;
+		if (cur_value  == CYAN_SKILLFISH_SCLK_MAX)
+			i = 2;
+		else if (cur_value == CYAN_SKILLFISH_SCLK_MIN)
+			i = 0;
+		else
+			i = 1;
+		size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", CYAN_SKILLFISH_SCLK_MIN,
+				i == 0 ? "*" : "");
+		size += sysfs_emit_at(buf, size, "1: %uMhz %s\n",
+				i == 1 ? cur_value : cyan_skillfish_sclk_default,
+				i == 1 ? "*" : "");
+		size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", CYAN_SKILLFISH_SCLK_MAX,
+				i == 2 ? "*" : "");
+		break;
 	default:
 		dev_warn(smu->adev->dev, "Unsupported clock type\n");
 		return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 71161f6..60a5570 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1265,7 +1265,7 @@
 			enum smu_clk_type clk_type, char *buf)
 {
 	uint16_t *curve_settings;
-	int i, size = 0, ret = 0;
+	int i, levels, size = 0, ret = 0;
 	uint32_t cur_value = 0, value = 0, count = 0;
 	uint32_t freq_values[3] = {0};
 	uint32_t mark_index = 0;
@@ -1319,14 +1319,17 @@
 			freq_values[1] = cur_value;
 			mark_index = cur_value == freq_values[0] ? 0 :
 				     cur_value == freq_values[2] ? 2 : 1;
-			if (mark_index != 1)
-				freq_values[1] = (freq_values[0] + freq_values[2]) / 2;
 
-			for (i = 0; i < 3; i++) {
+			levels = 3;
+			if (mark_index != 1) {
+				levels = 2;
+				freq_values[1] = freq_values[2];
+			}
+
+			for (i = 0; i < levels; i++) {
 				size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i],
 						i == mark_index ? "*" : "");
 			}
-
 		}
 		break;
 	case SMU_PCIE:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 421f38e..c02ed65 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -683,6 +683,7 @@
 	int i, size = 0, ret = 0;
 	uint32_t cur_value = 0, value = 0, count = 0;
 	bool cur_value_match_level = false;
+	uint32_t min, max;
 
 	memset(&metrics, 0, sizeof(metrics));
 
@@ -743,6 +744,13 @@
 		if (ret)
 			return ret;
 		break;
+	case SMU_GFXCLK:
+	case SMU_SCLK:
+		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetGfxclkFrequency, 0, &cur_value);
+		if (ret) {
+			return ret;
+		}
+		break;
 	default:
 		break;
 	}
@@ -768,6 +776,24 @@
 		if (!cur_value_match_level)
 			size += sysfs_emit_at(buf, size, "   %uMhz *\n", cur_value);
 		break;
+	case SMU_GFXCLK:
+	case SMU_SCLK:
+		min = (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq;
+		max = (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq;
+		if (cur_value  == max)
+			i = 2;
+		else if (cur_value == min)
+			i = 0;
+		else
+			i = 1;
+		size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", min,
+				i == 0 ? "*" : "");
+		size += sysfs_emit_at(buf, size, "1: %uMhz %s\n",
+				i == 1 ? cur_value : VANGOGH_UMD_PSTATE_STANDARD_GFXCLK,
+				i == 1 ? "*" : "");
+		size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", max,
+				i == 2 ? "*" : "");
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index a403657..caf1775 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -64,7 +64,6 @@
 	MSG_MAP(PowerDownVcn,                   PPSMC_MSG_PowerDownVcn,			1),
 	MSG_MAP(PowerUpVcn,                     PPSMC_MSG_PowerUpVcn,			1),
 	MSG_MAP(SetHardMinVcn,                  PPSMC_MSG_SetHardMinVcn,		1),
-	MSG_MAP(ActiveProcessNotify,            PPSMC_MSG_ActiveProcessNotify,		1),
 	MSG_MAP(PrepareMp1ForUnload,            PPSMC_MSG_PrepareMp1ForUnload,      1),
 	MSG_MAP(SetDriverDramAddrHigh,          PPSMC_MSG_SetDriverDramAddrHigh,	1),
 	MSG_MAP(SetDriverDramAddrLow,           PPSMC_MSG_SetDriverDramAddrLow,		1),
@@ -135,14 +134,6 @@
 	TAB_MAP_VALID(CUSTOM_DPM),
 	TAB_MAP_VALID(DPMCLOCKS),
 };
-
-static struct cmn2asic_mapping yellow_carp_workload_map[PP_SMC_POWER_PROFILE_COUNT] = {
-	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_FULLSCREEN3D,		WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT),
-	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO,		WORKLOAD_PPLIB_VIDEO_BIT),
-	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR,			WORKLOAD_PPLIB_VR_BIT),
-	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE,		WORKLOAD_PPLIB_COMPUTE_BIT),
-	WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,		WORKLOAD_PPLIB_CUSTOM_BIT),
-};
 	
 static int yellow_carp_init_smc_tables(struct smu_context *smu)
 {
@@ -543,81 +534,6 @@
 	return 0;
 }
 
-static int yellow_carp_get_power_profile_mode(struct smu_context *smu,
-						char *buf)
-{
-	static const char *profile_name[] = {
-					"BOOTUP_DEFAULT",
-					"3D_FULL_SCREEN",
-					"POWER_SAVING",
-					"VIDEO",
-					"VR",
-					"COMPUTE",
-					"CUSTOM"};
-	uint32_t i, size = 0;
-	int16_t workload_type = 0;
-
-	if (!buf)
-		return -EINVAL;
-
-	for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) {
-		/*
-		 * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT.
-		 * Not all profile modes are supported on yellow carp.
-		 */
-		workload_type = smu_cmn_to_asic_specific_index(smu,
-							       CMN2ASIC_MAPPING_WORKLOAD,
-							       i);
-
-		if (workload_type < 0)
-			continue;
-
-		size += sysfs_emit_at(buf, size, "%2d %14s%s\n",
-			i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " ");
-	}
-
-	return size;
-}
-
-static int yellow_carp_set_power_profile_mode(struct smu_context *smu,
-						long *input, uint32_t size)
-{
-	int workload_type, ret;
-	uint32_t profile_mode = input[size];
-
-	if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
-		dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
-		return -EINVAL;
-	}
-
-	if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
-			profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
-		return 0;
-
-	/* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
-	workload_type = smu_cmn_to_asic_specific_index(smu,
-						       CMN2ASIC_MAPPING_WORKLOAD,
-						       profile_mode);
-	if (workload_type < 0) {
-		dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on YELLOWCARP\n",
-					profile_mode);
-		return -EINVAL;
-	}
-
-	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
-				    1 << workload_type,
-				    NULL);
-	if (ret) {
-		dev_err_once(smu->adev->dev, "Fail to set workload type %d\n",
-					workload_type);
-		return ret;
-	}
-
-	smu->power_profile_mode = profile_mode;
-
-	return 0;
-}
-
 static ssize_t yellow_carp_get_gpu_metrics(struct smu_context *smu,
 						void **table)
 {
@@ -781,6 +697,11 @@
 	case SMU_FCLK:
 		return smu_cmn_send_smc_msg_with_param(smu,
 				SMU_MSG_GetFclkFrequency, 0, value);
+	case SMU_GFXCLK:
+	case SMU_SCLK:
+		return smu_cmn_send_smc_msg_with_param(smu,
+				SMU_MSG_GetGfxclkFrequency, 0, value);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1051,6 +972,7 @@
 {
 	int i, size = 0, ret = 0;
 	uint32_t cur_value = 0, value = 0, count = 0;
+	uint32_t min, max;
 
 	smu_cmn_get_sysfs_buf(&buf, &size);
 
@@ -1089,6 +1011,27 @@
 					cur_value == value ? "*" : "");
 		}
 		break;
+	case SMU_GFXCLK:
+	case SMU_SCLK:
+		ret = yellow_carp_get_current_clk_freq(smu, clk_type, &cur_value);
+		if (ret)
+			goto print_clk_out;
+		min = (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq;
+		max = (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq;
+		if (cur_value  == max)
+			i = 2;
+		else if (cur_value == min)
+			i = 0;
+		else
+			i = 1;
+		size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", min,
+				i == 0 ? "*" : "");
+		size += sysfs_emit_at(buf, size, "1: %uMhz %s\n",
+				i == 1 ? cur_value : YELLOW_CARP_UMD_PSTATE_GFXCLK,
+				i == 1 ? "*" : "");
+		size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", max,
+				i == 2 ? "*" : "");
+		break;
 	default:
 		break;
 	}
@@ -1238,8 +1181,6 @@
 	.read_sensor = yellow_carp_read_sensor,
 	.is_dpm_running = yellow_carp_is_dpm_running,
 	.set_watermarks_table = yellow_carp_set_watermarks_table,
-	.get_power_profile_mode = yellow_carp_get_power_profile_mode,
-	.set_power_profile_mode = yellow_carp_set_power_profile_mode,
 	.get_gpu_metrics = yellow_carp_get_gpu_metrics,
 	.get_enabled_mask = smu_cmn_get_enabled_32_bits_mask,
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
@@ -1261,6 +1202,5 @@
 	smu->message_map = yellow_carp_message_map;
 	smu->feature_map = yellow_carp_feature_mask_map;
 	smu->table_map = yellow_carp_table_map;
-	smu->workload_map = yellow_carp_workload_map;
 	smu->is_apu = true;
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h
index b3ad835..a9205a8 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h
@@ -24,5 +24,6 @@
 #define __YELLOW_CARP_PPT_H__
 
 extern void yellow_carp_set_ppt_funcs(struct smu_context *smu);
+#define YELLOW_CARP_UMD_PSTATE_GFXCLK       1100
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 843d2cb..ea6f50c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -139,9 +139,13 @@
 	const char *message = smu_get_message_name(smu, msg);
 
 	switch (reg_c2pmsg_90) {
-	case SMU_RESP_NONE:
+	case SMU_RESP_NONE: {
+		u32 msg_idx = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66);
+		u32 prm     = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
 		dev_err_ratelimited(adev->dev,
-				    "SMU: I'm not done with your previous command!");
+				    "SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x%08X SMN_C2PMSG_82:0x%08X",
+				    msg_idx, prm);
+	}
 		break;
 	case SMU_RESP_OK:
 		/* The SMU executed the command. It completed with a
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
index b53fee6..65f1728 100644
--- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
@@ -291,7 +291,7 @@
 	if (rc)
 		return rc;
 
-	return sprintf(buf, "%u\n", reg & 1);
+	return sprintf(buf, "%u\n", reg);
 }
 static DEVICE_ATTR_RO(vga_pw);
 
diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c
index 3cac16d..010657e 100644
--- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c
+++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c
@@ -167,9 +167,10 @@
 	struct lt9611uxc *lt9611uxc = container_of(work, struct lt9611uxc, work);
 	bool connected;
 
-	if (lt9611uxc->connector.dev)
-		drm_kms_helper_hotplug_event(lt9611uxc->connector.dev);
-	else {
+	if (lt9611uxc->connector.dev) {
+		if (lt9611uxc->connector.dev->mode_config.funcs)
+			drm_kms_helper_hotplug_event(lt9611uxc->connector.dev);
+	} else {
 
 		mutex_lock(&lt9611uxc->ocm_lock);
 		connected = lt9611uxc->hdmi_connected;
@@ -339,6 +340,8 @@
 		return -ENODEV;
 	}
 
+	lt9611uxc->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
 	drm_connector_helper_add(&lt9611uxc->connector,
 				 &lt9611uxc_bridge_connector_helper_funcs);
 	ret = drm_connector_init(bridge->dev, &lt9611uxc->connector,
diff --git a/drivers/gpu/drm/bridge/lvds-codec.c b/drivers/gpu/drm/bridge/lvds-codec.c
index dcf579a..ad460b9 100644
--- a/drivers/gpu/drm/bridge/lvds-codec.c
+++ b/drivers/gpu/drm/bridge/lvds-codec.c
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_panel.h>
 
@@ -22,6 +23,7 @@
 	struct regulator *vcc;
 	struct gpio_desc *powerdown_gpio;
 	u32 connector_type;
+	unsigned int bus_format;
 };
 
 static inline struct lvds_codec *to_lvds_codec(struct drm_bridge *bridge)
@@ -74,12 +76,50 @@
 	.disable = lvds_codec_disable,
 };
 
+#define MAX_INPUT_SEL_FORMATS 1
+static u32 *
+lvds_codec_atomic_get_input_bus_fmts(struct drm_bridge *bridge,
+				     struct drm_bridge_state *bridge_state,
+				     struct drm_crtc_state *crtc_state,
+				     struct drm_connector_state *conn_state,
+				     u32 output_fmt,
+				     unsigned int *num_input_fmts)
+{
+	struct lvds_codec *lvds_codec = to_lvds_codec(bridge);
+	u32 *input_fmts;
+
+	*num_input_fmts = 0;
+
+	input_fmts = kcalloc(MAX_INPUT_SEL_FORMATS, sizeof(*input_fmts),
+			     GFP_KERNEL);
+	if (!input_fmts)
+		return NULL;
+
+	input_fmts[0] = lvds_codec->bus_format;
+	*num_input_fmts = MAX_INPUT_SEL_FORMATS;
+
+	return input_fmts;
+}
+
+static const struct drm_bridge_funcs funcs_decoder = {
+	.attach = lvds_codec_attach,
+	.enable = lvds_codec_enable,
+	.disable = lvds_codec_disable,
+	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
+	.atomic_reset = drm_atomic_helper_bridge_reset,
+	.atomic_get_input_bus_fmts = lvds_codec_atomic_get_input_bus_fmts,
+};
+
 static int lvds_codec_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *panel_node;
+	struct device_node *bus_node;
 	struct drm_panel *panel;
 	struct lvds_codec *lvds_codec;
+	const char *mapping;
+	int ret;
 
 	lvds_codec = devm_kzalloc(dev, sizeof(*lvds_codec), GFP_KERNEL);
 	if (!lvds_codec)
@@ -119,13 +159,47 @@
 	if (IS_ERR(lvds_codec->panel_bridge))
 		return PTR_ERR(lvds_codec->panel_bridge);
 
+	lvds_codec->bridge.funcs = &funcs;
+
+	/*
+	 * Decoder input LVDS format is a property of the decoder chip or even
+	 * its strapping. Handle data-mapping the same way lvds-panel does. In
+	 * case data-mapping is not present, do nothing, since there are still
+	 * legacy bindings which do not specify this property.
+	 */
+	if (lvds_codec->connector_type != DRM_MODE_CONNECTOR_LVDS) {
+		bus_node = of_graph_get_endpoint_by_regs(dev->of_node, 0, 0);
+		if (!bus_node) {
+			dev_dbg(dev, "bus DT node not found\n");
+			return -ENXIO;
+		}
+
+		ret = of_property_read_string(bus_node, "data-mapping",
+					      &mapping);
+		of_node_put(bus_node);
+		if (ret < 0) {
+			dev_warn(dev, "missing 'data-mapping' DT property\n");
+		} else {
+			if (!strcmp(mapping, "jeida-18")) {
+				lvds_codec->bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG;
+			} else if (!strcmp(mapping, "jeida-24")) {
+				lvds_codec->bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA;
+			} else if (!strcmp(mapping, "vesa-24")) {
+				lvds_codec->bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG;
+			} else {
+				dev_err(dev, "invalid 'data-mapping' DT property\n");
+				return -EINVAL;
+			}
+			lvds_codec->bridge.funcs = &funcs_decoder;
+		}
+	}
+
 	/*
 	 * The panel_bridge bridge is attached to the panel's of_node,
 	 * but we need a bridge attached to our of_node for our user
 	 * to look up.
 	 */
 	lvds_codec->bridge.of_node = dev->of_node;
-	lvds_codec->bridge.funcs = &funcs;
 	drm_bridge_add(&lvds_codec->bridge);
 
 	platform_set_drvdata(pdev, lvds_codec);
diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c
index ed8ac50..a7389a0 100644
--- a/drivers/gpu/drm/bridge/nwl-dsi.c
+++ b/drivers/gpu/drm/bridge/nwl-dsi.c
@@ -939,6 +939,40 @@
 	drm_of_panel_bridge_remove(dsi->dev->of_node, 1, 0);
 }
 
+static u32 *nwl_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge,
+						 struct drm_bridge_state *bridge_state,
+						 struct drm_crtc_state *crtc_state,
+						 struct drm_connector_state *conn_state,
+						 u32 output_fmt,
+						 unsigned int *num_input_fmts)
+{
+	u32 *input_fmts, input_fmt;
+
+	*num_input_fmts = 0;
+
+	switch (output_fmt) {
+	/* If MEDIA_BUS_FMT_FIXED is tested, return default bus format */
+	case MEDIA_BUS_FMT_FIXED:
+		input_fmt = MEDIA_BUS_FMT_RGB888_1X24;
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_RGB666_1X18:
+	case MEDIA_BUS_FMT_RGB565_1X16:
+		input_fmt = output_fmt;
+		break;
+	default:
+		return NULL;
+	}
+
+	input_fmts = kcalloc(1, sizeof(*input_fmts), GFP_KERNEL);
+	if (!input_fmts)
+		return NULL;
+	input_fmts[0] = input_fmt;
+	*num_input_fmts = 1;
+
+	return input_fmts;
+}
+
 static const struct drm_bridge_funcs nwl_dsi_bridge_funcs = {
 	.atomic_duplicate_state	= drm_atomic_helper_bridge_duplicate_state,
 	.atomic_destroy_state	= drm_atomic_helper_bridge_destroy_state,
@@ -946,6 +980,7 @@
 	.atomic_check		= nwl_dsi_bridge_atomic_check,
 	.atomic_enable		= nwl_dsi_bridge_atomic_enable,
 	.atomic_disable		= nwl_dsi_bridge_atomic_disable,
+	.atomic_get_input_bus_fmts = nwl_bridge_atomic_get_input_bus_fmts,
 	.mode_set		= nwl_dsi_bridge_mode_set,
 	.mode_valid		= nwl_dsi_bridge_mode_valid,
 	.attach			= nwl_dsi_bridge_attach,
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
index a32f70b..ba1160e 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
@@ -288,6 +288,19 @@
 	return ret;
 }
 
+static void sn65dsi83_detach(struct drm_bridge *bridge)
+{
+	struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge);
+
+	if (!ctx->dsi)
+		return;
+
+	mipi_dsi_detach(ctx->dsi);
+	mipi_dsi_device_unregister(ctx->dsi);
+	drm_bridge_remove(&ctx->bridge);
+	ctx->dsi = NULL;
+}
+
 static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge,
 					struct drm_bridge_state *old_bridge_state)
 {
@@ -583,6 +596,7 @@
 
 static const struct drm_bridge_funcs sn65dsi83_funcs = {
 	.attach			= sn65dsi83_attach,
+	.detach			= sn65dsi83_detach,
 	.atomic_pre_enable	= sn65dsi83_atomic_pre_enable,
 	.atomic_enable		= sn65dsi83_atomic_enable,
 	.atomic_disable		= sn65dsi83_atomic_disable,
@@ -697,9 +711,6 @@
 {
 	struct sn65dsi83 *ctx = i2c_get_clientdata(client);
 
-	mipi_dsi_detach(ctx->dsi);
-	mipi_dsi_device_unregister(ctx->dsi);
-	drm_bridge_remove(&ctx->bridge);
 	of_node_put(ctx->host_node);
 
 	return 0;
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 3bc782b..52e20c6 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -625,6 +625,8 @@
  *
  * In contrast to the other drm_get_*_name functions this one here returns a
  * const pointer and hence is threadsafe.
+ *
+ * Returns: connector status string
  */
 const char *drm_get_connector_status_name(enum drm_connector_status status)
 {
@@ -707,7 +709,7 @@
  * drm_connector_list_iter_next - return next connector
  * @iter: connector_list iterator
  *
- * Returns the next connector for @iter, or NULL when the list walk has
+ * Returns: the next connector for @iter, or NULL when the list walk has
  * completed.
  */
 struct drm_connector *
@@ -780,6 +782,8 @@
  *
  * Note you could abuse this and return something out of bounds, but that
  * would be a caller error.  No unscrubbed user data should make it here.
+ *
+ * Returns: string describing an enumerated subpixel property
  */
 const char *drm_get_subpixel_order_name(enum subpixel_order order)
 {
@@ -809,6 +813,9 @@
  * Store the supported bus formats in display info structure.
  * See MEDIA_BUS_FMT_* definitions in include/uapi/linux/media-bus-format.h for
  * a full list of available formats.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
  */
 int drm_display_info_set_bus_formats(struct drm_display_info *info,
 				     const u32 *formats,
@@ -1326,6 +1333,8 @@
  * @dev: DRM device
  *
  * Called by a driver the first time a DVI-I connector is made.
+ *
+ * Returns: %0
  */
 int drm_mode_create_dvi_i_properties(struct drm_device *dev)
 {
@@ -1397,6 +1406,8 @@
  *	Game:
  *		Content type is game
  *
+ *	The meaning of each content type is defined in CTA-861-G table 15.
+ *
  *	Drivers can set up this property by calling
  *	drm_connector_attach_content_type_property(). Decoding to
  *	infoframe values is done through drm_hdmi_avi_infoframe_content_type().
@@ -1407,6 +1418,8 @@
  * @connector: connector to attach content type property on.
  *
  * Called by a driver the first time a HDMI connector is made.
+ *
+ * Returns: %0
  */
 int drm_connector_attach_content_type_property(struct drm_connector *connector)
 {
@@ -1487,6 +1500,9 @@
  * creates the TV margin properties for a given device. No need to call this
  * function for an SDTV connector, it's already called from
  * drm_mode_create_tv_properties().
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
  */
 int drm_mode_create_tv_margin_properties(struct drm_device *dev)
 {
@@ -1527,6 +1543,9 @@
  * the TV specific connector properties for a given device.  Caller is
  * responsible for allocating a list of format names and passing them to
  * this routine.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
  */
 int drm_mode_create_tv_properties(struct drm_device *dev,
 				  unsigned int num_modes,
@@ -1622,6 +1641,8 @@
  * Atomic drivers should use drm_connector_attach_scaling_mode_property()
  * instead to correctly assign &drm_connector_state.scaling_mode
  * in the atomic state.
+ *
+ * Returns: %0
  */
 int drm_mode_create_scaling_mode_property(struct drm_device *dev)
 {
@@ -1939,6 +1960,9 @@
  * @dev: DRM device
  *
  * Create the suggested x/y offset property for connectors.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
  */
 int drm_mode_create_suggested_offset_properties(struct drm_device *dev)
 {
@@ -2312,8 +2336,8 @@
 EXPORT_SYMBOL(drm_connector_set_panel_orientation);
 
 /**
- * drm_connector_set_panel_orientation_with_quirk -
- *	set the connector's panel_orientation after checking for quirks
+ * drm_connector_set_panel_orientation_with_quirk - set the
+ *	connector's panel_orientation after checking for quirks
  * @connector: connector for which to init the panel-orientation property.
  * @panel_orientation: drm_panel_orientation value to set
  * @width: width in pixels of the panel, used for panel quirk detection
@@ -2597,7 +2621,7 @@
 
 /**
  * drm_connector_oob_hotplug_event - Report out-of-band hotplug event to connector
- * @connector: connector to report the event on
+ * @connector_fwnode: fwnode_handle to report the event on
  *
  * On some hardware a hotplug event notification may come from outside the display
  * driver / device. An example of this is some USB Type-C setups where the hardware
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 09c8200..4dcdec6 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1340,31 +1340,15 @@
 				     struct drm_gem_object *obj,
 				     bool write)
 {
-	int ret;
-	struct dma_fence **fences;
-	unsigned int i, fence_count;
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	int ret = 0;
 
-	if (!write) {
-		struct dma_fence *fence =
-			dma_resv_get_excl_unlocked(obj->resv);
-
-		return drm_gem_fence_array_add(fence_array, fence);
-	}
-
-	ret = dma_resv_get_fences(obj->resv, NULL,
-						&fence_count, &fences);
-	if (ret || !fence_count)
-		return ret;
-
-	for (i = 0; i < fence_count; i++) {
-		ret = drm_gem_fence_array_add(fence_array, fences[i]);
+	dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
+		ret = drm_gem_fence_array_add(fence_array, fence);
 		if (ret)
 			break;
 	}
-
-	for (; i < fence_count; i++)
-		dma_fence_put(fences[i]);
-	kfree(fences);
 	return ret;
 }
 EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index d533881..9d05674 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -210,8 +210,13 @@
 			dma_buf_vunmap(gem_obj->import_attach->dmabuf, &map);
 		drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
 	} else if (cma_obj->vaddr) {
-		dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
-			    cma_obj->vaddr, cma_obj->paddr);
+		if (cma_obj->map_noncoherent)
+			dma_free_noncoherent(gem_obj->dev->dev, cma_obj->base.size,
+					     cma_obj->vaddr, cma_obj->paddr,
+					     DMA_TO_DEVICE);
+		else
+			dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
+				    cma_obj->vaddr, cma_obj->paddr);
 	}
 
 	drm_gem_object_release(gem_obj);
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index bf8a6e8..c973233 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -25,6 +25,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_device.h>
 #include <drm/drm_modeset_lock.h>
+#include <drm/drm_print.h>
 
 /**
  * DOC: kms locking
@@ -77,6 +78,45 @@
 
 static DEFINE_WW_CLASS(crtc_ww_class);
 
+#if IS_ENABLED(CONFIG_DRM_DEBUG_MODESET_LOCK)
+static noinline depot_stack_handle_t __drm_stack_depot_save(void)
+{
+	unsigned long entries[8];
+	unsigned int n;
+
+	n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+	return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
+}
+
+static void __drm_stack_depot_print(depot_stack_handle_t stack_depot)
+{
+	struct drm_printer p = drm_debug_printer("drm_modeset_lock");
+	unsigned long *entries;
+	unsigned int nr_entries;
+	char *buf;
+
+	buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+	if (!buf)
+		return;
+
+	nr_entries = stack_depot_fetch(stack_depot, &entries);
+	stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 2);
+
+	drm_printf(&p, "attempting to lock a contended lock without backoff:\n%s", buf);
+
+	kfree(buf);
+}
+#else /* CONFIG_DRM_DEBUG_MODESET_LOCK */
+static depot_stack_handle_t __drm_stack_depot_save(void)
+{
+	return 0;
+}
+static void __drm_stack_depot_print(depot_stack_handle_t stack_depot)
+{
+}
+#endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */
+
 /**
  * drm_modeset_lock_all - take all modeset locks
  * @dev: DRM device
@@ -225,7 +265,9 @@
  */
 void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx)
 {
-	WARN_ON(ctx->contended);
+	if (WARN_ON(ctx->contended))
+		__drm_stack_depot_print(ctx->stack_depot);
+
 	while (!list_empty(&ctx->locked)) {
 		struct drm_modeset_lock *lock;
 
@@ -243,7 +285,8 @@
 {
 	int ret;
 
-	WARN_ON(ctx->contended);
+	if (WARN_ON(ctx->contended))
+		__drm_stack_depot_print(ctx->stack_depot);
 
 	if (ctx->trylock_only) {
 		lockdep_assert_held(&ctx->ww_ctx);
@@ -274,6 +317,7 @@
 		ret = 0;
 	} else if (ret == -EDEADLK) {
 		ctx->contended = lock;
+		ctx->stack_depot = __drm_stack_depot_save();
 	}
 
 	return ret;
@@ -296,6 +340,7 @@
 	struct drm_modeset_lock *contended = ctx->contended;
 
 	ctx->contended = NULL;
+	ctx->stack_depot = 0;
 
 	if (WARN_ON(!contended))
 		return 0;
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index 5b2d0ca..838b32b 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -123,7 +123,6 @@
 		.crtc_w = drm_rect_width(dst),
 		.crtc_h = drm_rect_height(dst),
 		.rotation = rotation,
-		.visible = *visible,
 	};
 	struct drm_crtc_state crtc_state = {
 		.crtc = crtc,
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index d8ba957..c773d3d 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -722,11 +722,13 @@
 	if (obj->funcs && obj->funcs->mmap) {
 		vma->vm_ops = obj->funcs->vm_ops;
 
-		ret = obj->funcs->mmap(obj, vma);
-		if (ret)
-			return ret;
-		vma->vm_private_data = obj;
 		drm_gem_object_get(obj);
+		ret = obj->funcs->mmap(obj, vma);
+		if (ret) {
+			drm_gem_object_put(obj);
+			return ret;
+		}
+		vma->vm_private_data = obj;
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
index cd818a6..00e53de 100644
--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
@@ -225,12 +225,29 @@
 {
 	struct drm_device *dev = hv_get_drvdata(hdev);
 	struct hyperv_drm_device *hv = to_hv(dev);
+	struct pci_dev *pdev;
 
 	drm_dev_unplug(dev);
 	drm_atomic_helper_shutdown(dev);
 	vmbus_close(hdev->channel);
 	hv_set_drvdata(hdev, NULL);
-	vmbus_free_mmio(hv->mem->start, hv->fb_size);
+
+	/*
+	 * Free allocated MMIO memory only on Gen2 VMs.
+	 * On Gen1 VMs, release the PCI device
+	 */
+	if (efi_enabled(EFI_BOOT)) {
+		vmbus_free_mmio(hv->mem->start, hv->fb_size);
+	} else {
+		pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT,
+				      PCI_DEVICE_ID_HYPERV_VIDEO, NULL);
+		if (!pdev) {
+			drm_err(dev, "Unable to find PCI Hyper-V video\n");
+			return -ENODEV;
+		}
+		pci_release_region(pdev, 0);
+		pci_dev_put(pdev);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
index 88c427f..f5b4dd5 100644
--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
+++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
@@ -584,6 +584,7 @@
 		else
 			intel_encoder->enable = g4x_enable_hdmi;
 	}
+	intel_encoder->shutdown = intel_hdmi_encoder_shutdown;
 
 	intel_encoder->type = INTEL_OUTPUT_HDMI;
 	intel_encoder->power_domain = intel_port_to_power_domain(port);
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 168c84a..71fbdcd 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -696,10 +696,7 @@
 	intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
 
 	for_each_dsi_phy(phy, intel_dsi->phys) {
-		if (DISPLAY_VER(dev_priv) >= 12)
-			val |= ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
-		else
-			val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
+		val &= ~ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy);
 	}
 	intel_de_write(dev_priv, ICL_DPCLKA_CFGCR0, val);
 
@@ -1135,8 +1132,6 @@
 gen11_dsi_enable_port_and_phy(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-
 	/* step 4a: power up all lanes of the DDI used by DSI */
 	gen11_dsi_power_up_lanes(encoder);
 
@@ -1162,8 +1157,7 @@
 	gen11_dsi_configure_transcoder(encoder, crtc_state);
 
 	/* Step 4l: Gate DDI clocks */
-	if (DISPLAY_VER(dev_priv) == 11)
-		gen11_dsi_gate_clocks(encoder);
+	gen11_dsi_gate_clocks(encoder);
 }
 
 static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
@@ -1271,7 +1265,8 @@
 	if (DISPLAY_VER(i915) == 13) {
 		for_each_dsi_port(port, intel_dsi->ports)
 			intel_de_rmw(i915, TGL_DSI_CHKN_REG(port),
-				     TGL_DSI_CHKN_LSHS_GB, 0x4);
+				     TGL_DSI_CHKN_LSHS_GB_MASK,
+				     TGL_DSI_CHKN_LSHS_GB(4));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index b99907c6..2b1423a 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1707,6 +1707,39 @@
 	child->aux_channel = 0;
 }
 
+static u8 dvo_port_type(u8 dvo_port)
+{
+	switch (dvo_port) {
+	case DVO_PORT_HDMIA:
+	case DVO_PORT_HDMIB:
+	case DVO_PORT_HDMIC:
+	case DVO_PORT_HDMID:
+	case DVO_PORT_HDMIE:
+	case DVO_PORT_HDMIF:
+	case DVO_PORT_HDMIG:
+	case DVO_PORT_HDMIH:
+	case DVO_PORT_HDMII:
+		return DVO_PORT_HDMIA;
+	case DVO_PORT_DPA:
+	case DVO_PORT_DPB:
+	case DVO_PORT_DPC:
+	case DVO_PORT_DPD:
+	case DVO_PORT_DPE:
+	case DVO_PORT_DPF:
+	case DVO_PORT_DPG:
+	case DVO_PORT_DPH:
+	case DVO_PORT_DPI:
+		return DVO_PORT_DPA;
+	case DVO_PORT_MIPIA:
+	case DVO_PORT_MIPIB:
+	case DVO_PORT_MIPIC:
+	case DVO_PORT_MIPID:
+		return DVO_PORT_MIPIA;
+	default:
+		return dvo_port;
+	}
+}
+
 static enum port __dvo_port_to_port(int n_ports, int n_dvo,
 				    const int port_mapping[][3], u8 dvo_port)
 {
@@ -1930,50 +1963,6 @@
 	}
 }
 
-static enum port get_edp_port(struct drm_i915_private *i915)
-{
-	const struct intel_bios_encoder_data *devdata;
-	enum port port;
-
-	for_each_port(port) {
-		devdata = i915->vbt.ports[port];
-
-		if (devdata && intel_bios_encoder_supports_edp(devdata))
-			return port;
-	}
-
-	return PORT_NONE;
-}
-
-/*
- * FIXME: The power sequencer and backlight code currently do not support more
- * than one set registers, at least not on anything other than VLV/CHV. It will
- * clobber the registers. As a temporary workaround, gracefully prevent more
- * than one eDP from being registered.
- */
-static void sanitize_dual_edp(struct intel_bios_encoder_data *devdata,
-			      enum port port)
-{
-	struct drm_i915_private *i915 = devdata->i915;
-	struct child_device_config *child = &devdata->child;
-	enum port p;
-
-	/* CHV might not clobber PPS registers. */
-	if (IS_CHERRYVIEW(i915))
-		return;
-
-	p = get_edp_port(i915);
-	if (p == PORT_NONE)
-		return;
-
-	drm_dbg_kms(&i915->drm, "both ports %c and %c configured as eDP, "
-		    "disabling port %c eDP\n", port_name(p), port_name(port),
-		    port_name(port));
-
-	child->device_type &= ~DEVICE_TYPE_DISPLAYPORT_OUTPUT;
-	child->device_type &= ~DEVICE_TYPE_INTERNAL_CONNECTOR;
-}
-
 static bool is_port_valid(struct drm_i915_private *i915, enum port port)
 {
 	/*
@@ -2031,9 +2020,6 @@
 		    supports_typec_usb, supports_tbt,
 		    devdata->dsc != NULL);
 
-	if (is_edp)
-		sanitize_dual_edp(devdata, port);
-
 	if (is_dvi)
 		sanitize_ddc_pin(devdata, port);
 
@@ -2670,8 +2656,25 @@
 	return false;
 }
 
-static bool child_dev_is_dp_dual_mode(const struct child_device_config *child,
-				      enum port port)
+static bool child_dev_is_dp_dual_mode(const struct child_device_config *child)
+{
+	if ((child->device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) !=
+	    (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS))
+		return false;
+
+	if (dvo_port_type(child->dvo_port) == DVO_PORT_DPA)
+		return true;
+
+	/* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */
+	if (dvo_port_type(child->dvo_port) == DVO_PORT_HDMIA &&
+	    child->aux_channel != 0)
+		return true;
+
+	return false;
+}
+
+bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *i915,
+				     enum port port)
 {
 	static const struct {
 		u16 dp, hdmi;
@@ -2686,32 +2689,23 @@
 		[PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, },
 		[PORT_F] = { DVO_PORT_DPF, DVO_PORT_HDMIF, },
 	};
+	const struct intel_bios_encoder_data *devdata;
+
+	if (HAS_DDI(i915)) {
+		const struct intel_bios_encoder_data *devdata;
+
+		devdata = intel_bios_encoder_data_lookup(i915, port);
+
+		return devdata && child_dev_is_dp_dual_mode(&devdata->child);
+	}
 
 	if (port == PORT_A || port >= ARRAY_SIZE(port_mapping))
 		return false;
 
-	if ((child->device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) !=
-	    (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS))
-		return false;
-
-	if (child->dvo_port == port_mapping[port].dp)
-		return true;
-
-	/* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */
-	if (child->dvo_port == port_mapping[port].hdmi &&
-	    child->aux_channel != 0)
-		return true;
-
-	return false;
-}
-
-bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *i915,
-				     enum port port)
-{
-	const struct intel_bios_encoder_data *devdata;
-
 	list_for_each_entry(devdata, &i915->vbt.display_devices, node) {
-		if (child_dev_is_dp_dual_mode(&devdata->child, port))
+		if ((devdata->child.dvo_port == port_mapping[port].dp ||
+		     devdata->child.dvo_port == port_mapping[port].hdmi) &&
+		    child_dev_is_dp_dual_mode(&devdata->child))
 			return true;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 9e466d8..868dd43 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2885,7 +2885,7 @@
 	return freq;
 }
 
-static struct intel_cdclk_funcs tgl_cdclk_funcs = {
+static const struct intel_cdclk_funcs tgl_cdclk_funcs = {
 	.get_cdclk = bxt_get_cdclk,
 	.set_cdclk = bxt_set_cdclk,
 	.bw_calc_min_cdclk = skl_bw_calc_min_cdclk,
@@ -2893,7 +2893,7 @@
 	.calc_voltage_level = tgl_calc_voltage_level,
 };
 
-static struct intel_cdclk_funcs ehl_cdclk_funcs = {
+static const struct intel_cdclk_funcs ehl_cdclk_funcs = {
 	.get_cdclk = bxt_get_cdclk,
 	.set_cdclk = bxt_set_cdclk,
 	.bw_calc_min_cdclk = skl_bw_calc_min_cdclk,
@@ -2901,7 +2901,7 @@
 	.calc_voltage_level = ehl_calc_voltage_level,
 };
 
-static struct intel_cdclk_funcs icl_cdclk_funcs = {
+static const struct intel_cdclk_funcs icl_cdclk_funcs = {
 	.get_cdclk = bxt_get_cdclk,
 	.set_cdclk = bxt_set_cdclk,
 	.bw_calc_min_cdclk = skl_bw_calc_min_cdclk,
@@ -2909,7 +2909,7 @@
 	.calc_voltage_level = icl_calc_voltage_level,
 };
 
-static struct intel_cdclk_funcs bxt_cdclk_funcs = {
+static const struct intel_cdclk_funcs bxt_cdclk_funcs = {
 	.get_cdclk = bxt_get_cdclk,
 	.set_cdclk = bxt_set_cdclk,
 	.bw_calc_min_cdclk = skl_bw_calc_min_cdclk,
@@ -2917,54 +2917,54 @@
 	.calc_voltage_level = bxt_calc_voltage_level,
 };
 
-static struct intel_cdclk_funcs skl_cdclk_funcs = {
+static const struct intel_cdclk_funcs skl_cdclk_funcs = {
 	.get_cdclk = skl_get_cdclk,
 	.set_cdclk = skl_set_cdclk,
 	.bw_calc_min_cdclk = skl_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = skl_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs bdw_cdclk_funcs = {
+static const struct intel_cdclk_funcs bdw_cdclk_funcs = {
 	.get_cdclk = bdw_get_cdclk,
 	.set_cdclk = bdw_set_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = bdw_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs chv_cdclk_funcs = {
+static const struct intel_cdclk_funcs chv_cdclk_funcs = {
 	.get_cdclk = vlv_get_cdclk,
 	.set_cdclk = chv_set_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = vlv_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs vlv_cdclk_funcs = {
+static const struct intel_cdclk_funcs vlv_cdclk_funcs = {
 	.get_cdclk = vlv_get_cdclk,
 	.set_cdclk = vlv_set_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = vlv_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs hsw_cdclk_funcs = {
+static const struct intel_cdclk_funcs hsw_cdclk_funcs = {
 	.get_cdclk = hsw_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
 /* SNB, IVB, 965G, 945G */
-static struct intel_cdclk_funcs fixed_400mhz_cdclk_funcs = {
+static const struct intel_cdclk_funcs fixed_400mhz_cdclk_funcs = {
 	.get_cdclk = fixed_400mhz_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs ilk_cdclk_funcs = {
+static const struct intel_cdclk_funcs ilk_cdclk_funcs = {
 	.get_cdclk = fixed_450mhz_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs gm45_cdclk_funcs = {
+static const struct intel_cdclk_funcs gm45_cdclk_funcs = {
 	.get_cdclk = gm45_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
@@ -2972,7 +2972,7 @@
 
 /* G45 uses G33 */
 
-static struct intel_cdclk_funcs i965gm_cdclk_funcs = {
+static const struct intel_cdclk_funcs i965gm_cdclk_funcs = {
 	.get_cdclk = i965gm_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
@@ -2980,19 +2980,19 @@
 
 /* i965G uses fixed 400 */
 
-static struct intel_cdclk_funcs pnv_cdclk_funcs = {
+static const struct intel_cdclk_funcs pnv_cdclk_funcs = {
 	.get_cdclk = pnv_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs g33_cdclk_funcs = {
+static const struct intel_cdclk_funcs g33_cdclk_funcs = {
 	.get_cdclk = g33_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs i945gm_cdclk_funcs = {
+static const struct intel_cdclk_funcs i945gm_cdclk_funcs = {
 	.get_cdclk = i945gm_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
@@ -3000,37 +3000,37 @@
 
 /* i945G uses fixed 400 */
 
-static struct intel_cdclk_funcs i915gm_cdclk_funcs = {
+static const struct intel_cdclk_funcs i915gm_cdclk_funcs = {
 	.get_cdclk = i915gm_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs i915g_cdclk_funcs = {
+static const struct intel_cdclk_funcs i915g_cdclk_funcs = {
 	.get_cdclk = fixed_333mhz_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs i865g_cdclk_funcs = {
+static const struct intel_cdclk_funcs i865g_cdclk_funcs = {
 	.get_cdclk = fixed_266mhz_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs i85x_cdclk_funcs = {
+static const struct intel_cdclk_funcs i85x_cdclk_funcs = {
 	.get_cdclk = i85x_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs i845g_cdclk_funcs = {
+static const struct intel_cdclk_funcs i845g_cdclk_funcs = {
 	.get_cdclk = fixed_200mhz_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
 };
 
-static struct intel_cdclk_funcs i830_cdclk_funcs = {
+static const struct intel_cdclk_funcs i830_cdclk_funcs = {
 	.get_cdclk = fixed_133mhz_get_cdclk,
 	.bw_calc_min_cdclk = intel_bw_calc_min_cdclk,
 	.modeset_calc_cdclk = fixed_modeset_calc_cdclk,
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 1dcfe31..cfb567d 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4361,6 +4361,7 @@
 	enum phy phy = intel_port_to_phy(i915, encoder->port);
 
 	intel_dp_encoder_shutdown(encoder);
+	intel_hdmi_encoder_shutdown(encoder);
 
 	if (!intel_phy_is_tc(i915, phy))
 		return;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index ff598b6..ec403e4 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -848,9 +848,16 @@
 	int i;
 
 	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+		unsigned int plane_size;
+
+		plane_size = rem_info->plane[i].dst_stride * rem_info->plane[i].height;
+		if (plane_size == 0)
+			continue;
+
 		if (rem_info->plane_alignment)
 			size = ALIGN(size, rem_info->plane_alignment);
-		size += rem_info->plane[i].dst_stride * rem_info->plane[i].height;
+
+		size += plane_size;
 	}
 
 	return size;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 23de500..be88346 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -120,6 +120,12 @@
 	return crtc_state->port_clock >= 1000000;
 }
 
+static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp)
+{
+	intel_dp->sink_rates[0] = 162000;
+	intel_dp->num_sink_rates = 1;
+}
+
 /* update sink rates from dpcd */
 static void intel_dp_set_sink_rates(struct intel_dp *intel_dp)
 {
@@ -281,7 +287,7 @@
 		 */
 		int max_link_rate_kbps = max_link_rate * 10;
 
-		max_link_rate_kbps = DIV_ROUND_CLOSEST_ULL(max_link_rate_kbps * 9671, 10000);
+		max_link_rate_kbps = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(max_link_rate_kbps, 9671), 10000);
 		max_link_rate = max_link_rate_kbps / 8;
 	}
 
@@ -1858,6 +1864,12 @@
 	intel_dp->lane_count = lane_count;
 }
 
+static void intel_dp_reset_max_link_params(struct intel_dp *intel_dp)
+{
+	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
+	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
+}
+
 /* Enable backlight PWM and backlight PP control. */
 void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
 			    const struct drm_connector_state *conn_state)
@@ -2017,8 +2029,7 @@
 	if (intel_dp->dpcd[DP_DPCD_REV] == 0)
 		intel_dp_get_dpcd(intel_dp);
 
-	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
-	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
+	intel_dp_reset_max_link_params(intel_dp);
 }
 
 bool intel_dp_initial_fastset_check(struct intel_encoder *encoder,
@@ -2556,6 +2567,9 @@
 	 */
 	intel_psr_init_dpcd(intel_dp);
 
+	/* Clear the default sink rates */
+	intel_dp->num_sink_rates = 0;
+
 	/* Read the eDP 1.4+ supported link rates. */
 	if (intel_dp->edp_dpcd[0] >= DP_EDP_14) {
 		__le16 sink_rates[DP_MAX_SUPPORTED_RATES];
@@ -2591,6 +2605,7 @@
 		intel_dp_set_sink_rates(intel_dp);
 
 	intel_dp_set_common_rates(intel_dp);
+	intel_dp_reset_max_link_params(intel_dp);
 
 	/* Read the eDP DSC DPCD registers */
 	if (DISPLAY_VER(dev_priv) >= 10)
@@ -4332,12 +4347,7 @@
 	 * supports link training fallback params.
 	 */
 	if (intel_dp->reset_link_params || intel_dp->is_mst) {
-		/* Initial max link lane count */
-		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
-
-		/* Initial max link rate */
-		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
-
+		intel_dp_reset_max_link_params(intel_dp);
 		intel_dp->reset_link_params = false;
 	}
 
@@ -5003,6 +5013,9 @@
 	}
 
 	intel_dp_set_source_rates(intel_dp);
+	intel_dp_set_default_sink_rates(intel_dp);
+	intel_dp_set_common_rates(intel_dp);
+	intel_dp_reset_max_link_params(intel_dp);
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		intel_dp->pps.active_pipe = vlv_active_pipe(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c
index fa1f375..cb511b2 100644
--- a/drivers/gpu/drm/i915/display/intel_fb.c
+++ b/drivers/gpu/drm/i915/display/intel_fb.c
@@ -378,8 +378,8 @@
 	intel_fb_plane_get_subsampling(&main_hsub, &main_vsub, &fb->base, main_plane);
 	intel_fb_plane_get_subsampling(&hsub, &vsub, &fb->base, color_plane);
 
-	*w = main_width / main_hsub / hsub;
-	*h = main_height / main_vsub / vsub;
+	*w = DIV_ROUND_UP(main_width, main_hsub * hsub);
+	*h = DIV_ROUND_UP(main_height, main_vsub * vsub);
 }
 
 static u32 intel_adjust_tile_offset(int *x, int *y,
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index d2e61f6c..371736b 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -1246,12 +1246,13 @@
 void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable)
 {
 	struct drm_i915_private *dev_priv = intel_hdmi_to_i915(hdmi);
-	struct i2c_adapter *adapter =
-		intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
+	struct i2c_adapter *adapter;
 
 	if (hdmi->dp_dual_mode.type < DRM_DP_DUAL_MODE_TYPE2_DVI)
 		return;
 
+	adapter = intel_gmbus_get_adapter(dev_priv, hdmi->ddc_bus);
+
 	drm_dbg_kms(&dev_priv->drm, "%s DP dual mode adaptor TMDS output\n",
 		    enable ? "Enabling" : "Disabling");
 
@@ -2258,6 +2259,17 @@
 	return 0;
 }
 
+void intel_hdmi_encoder_shutdown(struct intel_encoder *encoder)
+{
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+
+	/*
+	 * Give a hand to buggy BIOSen which forget to turn
+	 * the TMDS output buffers back on after a reboot.
+	 */
+	intel_dp_dual_mode_set_tmds_output(intel_hdmi, true);
+}
+
 static void
 intel_hdmi_unset_edid(struct drm_connector *connector)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h
index b43a180..2bf440e 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.h
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.h
@@ -28,6 +28,7 @@
 int intel_hdmi_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state);
+void intel_hdmi_encoder_shutdown(struct intel_encoder *encoder);
 bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
 				       struct drm_connector *connector,
 				       bool high_tmds_clock_ratio,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index eae09d3..e8a58c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -9,6 +9,8 @@
 #include <linux/dma-resv.h>
 #include <linux/module.h>
 
+#include <asm/smp.h>
+
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index f17383e..57c9755 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -1396,6 +1396,9 @@
 {
 	unsigned int row;
 
+	if (!width || !height)
+		return sg;
+
 	if (alignment_pad) {
 		st->nents++;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 524eaf6..795689e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -301,7 +301,7 @@
 	user_forcewake(gt, true);
 	wait_for_suspend(gt);
 
-	intel_pxp_suspend(&gt->pxp, false);
+	intel_pxp_suspend_prepare(&gt->pxp);
 }
 
 static suspend_state_t pm_suspend_target(void)
@@ -326,6 +326,7 @@
 	GEM_BUG_ON(gt->awake);
 
 	intel_uc_suspend(&gt->uc);
+	intel_pxp_suspend(&gt->pxp);
 
 	/*
 	 * On disabling the device, we want to turn off HW access to memory
@@ -353,7 +354,7 @@
 
 void intel_gt_runtime_suspend(struct intel_gt *gt)
 {
-	intel_pxp_suspend(&gt->pxp, true);
+	intel_pxp_runtime_suspend(&gt->pxp);
 	intel_uc_runtime_suspend(&gt->uc);
 
 	GT_TRACE(gt, "\n");
@@ -371,7 +372,7 @@
 	if (ret)
 		return ret;
 
-	intel_pxp_resume(&gt->pxp);
+	intel_pxp_runtime_resume(&gt->pxp);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index d7710de..c48557d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2373,6 +2373,7 @@
 	unsigned long flags;
 	bool disabled;
 
+	lockdep_assert_held(&guc->submission_state.lock);
 	GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
 	GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id));
 	GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id));
@@ -2388,7 +2389,7 @@
 	}
 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 	if (unlikely(disabled)) {
-		release_guc_id(guc, ce);
+		__release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
 		return;
 	}
@@ -3079,8 +3080,8 @@
 
 		ce = intel_engine_create_virtual(siblings, num_siblings,
 						 FORCE_VIRTUAL);
-		if (!ce) {
-			err = ERR_PTR(-ENOMEM);
+		if (IS_ERR(ce)) {
+			err = ERR_CAST(ce);
 			goto unwind;
 		}
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index da9055c..bcee121 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -11717,7 +11717,9 @@
 #define TGL_DSI_CHKN_REG(port)		_MMIO_PORT(port,	\
 						    _TGL_DSI_CHKN_REG_0, \
 						    _TGL_DSI_CHKN_REG_1)
-#define TGL_DSI_CHKN_LSHS_GB			REG_GENMASK(15, 12)
+#define TGL_DSI_CHKN_LSHS_GB_MASK		REG_GENMASK(15, 12)
+#define TGL_DSI_CHKN_LSHS_GB(byte_clocks)	REG_FIELD_PREP(TGL_DSI_CHKN_LSHS_GB_MASK, \
+							       (byte_clocks))
 
 /* Display Stream Splitter Control */
 #define DSS_CTL1				_MMIO(0x67400)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 2c3cd6e6..820a1f3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1537,38 +1537,14 @@
 			  struct drm_i915_gem_object *obj,
 			  bool write)
 {
-	struct dma_fence *excl;
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
 	int ret = 0;
 
-	if (write) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-
-		ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
-					  &shared);
+	dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) {
+		ret = i915_request_await_dma_fence(to, fence);
 		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			ret = i915_request_await_dma_fence(to, shared[i]);
-			if (ret)
-				break;
-
-			dma_fence_put(shared[i]);
-		}
-
-		for (; i < count; i++)
-			dma_fence_put(shared[i]);
-		kfree(shared);
-	} else {
-		excl = dma_resv_get_excl_unlocked(obj->base.resv);
-	}
-
-	if (excl) {
-		if (ret == 0)
-			ret = i915_request_await_dma_fence(to, excl);
-
-		dma_fence_put(excl);
+			break;
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
index 23fd86d..6a7d4e2 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
@@ -7,26 +7,29 @@
 #include "intel_pxp_irq.h"
 #include "intel_pxp_pm.h"
 #include "intel_pxp_session.h"
+#include "i915_drv.h"
 
-void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime)
+void intel_pxp_suspend_prepare(struct intel_pxp *pxp)
 {
 	if (!intel_pxp_is_enabled(pxp))
 		return;
 
 	pxp->arb_is_valid = false;
 
-	/*
-	 * Contexts using protected objects keep a runtime PM reference, so we
-	 * can only runtime suspend when all of them have been either closed
-	 * or banned. Therefore, there is no need to invalidate in that
-	 * scenario.
-	 */
-	if (!runtime)
-		intel_pxp_invalidate(pxp);
+	intel_pxp_invalidate(pxp);
+}
 
-	intel_pxp_fini_hw(pxp);
+void intel_pxp_suspend(struct intel_pxp *pxp)
+{
+	intel_wakeref_t wakeref;
 
-	pxp->hw_state_invalidated = false;
+	if (!intel_pxp_is_enabled(pxp))
+		return;
+
+	with_intel_runtime_pm(&pxp_to_gt(pxp)->i915->runtime_pm, wakeref) {
+		intel_pxp_fini_hw(pxp);
+		pxp->hw_state_invalidated = false;
+	}
 }
 
 void intel_pxp_resume(struct intel_pxp *pxp)
@@ -44,3 +47,15 @@
 
 	intel_pxp_init_hw(pxp);
 }
+
+void intel_pxp_runtime_suspend(struct intel_pxp *pxp)
+{
+	if (!intel_pxp_is_enabled(pxp))
+		return;
+
+	pxp->arb_is_valid = false;
+
+	intel_pxp_fini_hw(pxp);
+
+	pxp->hw_state_invalidated = false;
+}
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
index c89e97a..16990a3 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
@@ -9,16 +9,29 @@
 #include "intel_pxp_types.h"
 
 #ifdef CONFIG_DRM_I915_PXP
-void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime);
+void intel_pxp_suspend_prepare(struct intel_pxp *pxp);
+void intel_pxp_suspend(struct intel_pxp *pxp);
 void intel_pxp_resume(struct intel_pxp *pxp);
+void intel_pxp_runtime_suspend(struct intel_pxp *pxp);
 #else
-static inline void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime)
+static inline void intel_pxp_suspend_prepare(struct intel_pxp *pxp)
+{
+}
+
+static inline void intel_pxp_suspend(struct intel_pxp *pxp)
 {
 }
 
 static inline void intel_pxp_resume(struct intel_pxp *pxp)
 {
 }
-#endif
 
+static inline void intel_pxp_runtime_suspend(struct intel_pxp *pxp)
+{
+}
+#endif
+static inline void intel_pxp_runtime_resume(struct intel_pxp *pxp)
+{
+	intel_pxp_resume(pxp);
+}
 #endif /* __INTEL_PXP_PM_H__ */
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 9558e9e..cb685fe 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -81,7 +81,6 @@
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	bool plane_disabling = false;
 	int i;
-	bool fence_cookie = dma_fence_begin_signalling();
 
 	drm_atomic_helper_commit_modeset_disables(dev, state);
 
@@ -112,7 +111,6 @@
 	}
 
 	drm_atomic_helper_commit_hw_done(state);
-	dma_fence_end_signalling(fence_cookie);
 }
 
 static const struct drm_mode_config_helper_funcs imx_drm_mode_config_helpers = {
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
index 89dd618..0655582 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c
@@ -88,7 +88,7 @@
 		ctrl |= CTRL_BUS_WIDTH_24;
 		break;
 	default:
-		dev_err(drm->dev, "Unknown media bus format %d\n", bus_format);
+		dev_err(drm->dev, "Unknown media bus format 0x%x\n", bus_format);
 		break;
 	}
 
@@ -362,6 +362,12 @@
 			drm_atomic_get_new_bridge_state(state,
 							mxsfb->bridge);
 		bus_format = bridge_state->input_bus_cfg.format;
+		if (bus_format == MEDIA_BUS_FMT_FIXED) {
+			dev_warn_once(drm->dev,
+				      "Bridge does not provide bus format, assuming MEDIA_BUS_FMT_RGB888_1X24.\n"
+				      "Please fix bridge driver by handling atomic_get_input_bus_fmts.\n");
+			bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+		}
 	}
 
 	/* If there is no bridge, use bus format from connector */
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 12b107a..fa73fe5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1249,7 +1249,6 @@
 {
 	struct ttm_tt *ttm_dma = (void *)ttm;
 	struct nouveau_drm *drm;
-	struct device *dev;
 	bool slave = !!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL);
 
 	if (ttm_tt_is_populated(ttm))
@@ -1262,7 +1261,6 @@
 	}
 
 	drm = nouveau_bdev(bdev);
-	dev = drm->dev->dev;
 
 	return ttm_pool_alloc(&drm->ttm.bdev.pool, ttm, ctx);
 }
@@ -1272,7 +1270,6 @@
 			  struct ttm_tt *ttm)
 {
 	struct nouveau_drm *drm;
-	struct device *dev;
 	bool slave = !!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL);
 
 	if (slave)
@@ -1281,7 +1278,6 @@
 	nouveau_ttm_tt_unbind(bdev, ttm);
 
 	drm = nouveau_bdev(bdev);
-	dev = drm->dev->dev;
 
 	return ttm_pool_free(&drm->ttm.bdev.pool, ttm);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 6109cd9..e7efd9e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -562,6 +562,7 @@
 		nvkm_dbgopt(nouveau_debug, "DRM");
 
 	INIT_LIST_HEAD(&drm->clients);
+	mutex_init(&drm->clients_lock);
 	spin_lock_init(&drm->tile.lock);
 
 	/* workaround an odd issue on nvc1 by disabling the device's
@@ -632,6 +633,7 @@
 static void
 nouveau_drm_device_fini(struct drm_device *dev)
 {
+	struct nouveau_cli *cli, *temp_cli;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
 	if (nouveau_pmops_runtime()) {
@@ -656,9 +658,28 @@
 	nouveau_ttm_fini(drm);
 	nouveau_vga_fini(drm);
 
+	/*
+	 * There may be existing clients from as-yet unclosed files. For now,
+	 * clean them up here rather than deferring until the file is closed,
+	 * but this likely not correct if we want to support hot-unplugging
+	 * properly.
+	 */
+	mutex_lock(&drm->clients_lock);
+	list_for_each_entry_safe(cli, temp_cli, &drm->clients, head) {
+		list_del(&cli->head);
+		mutex_lock(&cli->mutex);
+		if (cli->abi16)
+			nouveau_abi16_fini(cli->abi16);
+		mutex_unlock(&cli->mutex);
+		nouveau_cli_fini(cli);
+		kfree(cli);
+	}
+	mutex_unlock(&drm->clients_lock);
+
 	nouveau_cli_fini(&drm->client);
 	nouveau_cli_fini(&drm->master);
 	nvif_parent_dtor(&drm->parent);
+	mutex_destroy(&drm->clients_lock);
 	kfree(drm);
 }
 
@@ -796,7 +817,7 @@
 	struct nvkm_client *client;
 	struct nvkm_device *device;
 
-	drm_dev_unregister(dev);
+	drm_dev_unplug(dev);
 
 	client = nvxx_client(&drm->client.base);
 	device = nvkm_device_find(client->device);
@@ -1090,9 +1111,9 @@
 
 	fpriv->driver_priv = cli;
 
-	mutex_lock(&drm->client.mutex);
+	mutex_lock(&drm->clients_lock);
 	list_add(&cli->head, &drm->clients);
-	mutex_unlock(&drm->client.mutex);
+	mutex_unlock(&drm->clients_lock);
 
 done:
 	if (ret && cli) {
@@ -1110,6 +1131,16 @@
 {
 	struct nouveau_cli *cli = nouveau_cli(fpriv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
+	int dev_index;
+
+	/*
+	 * The device is gone, and as it currently stands all clients are
+	 * cleaned up in the removal codepath. In the future this may change
+	 * so that we can support hot-unplugging, but for now we immediately
+	 * return to avoid a double-free situation.
+	 */
+	if (!drm_dev_enter(dev, &dev_index))
+		return;
 
 	pm_runtime_get_sync(dev->dev);
 
@@ -1118,14 +1149,15 @@
 		nouveau_abi16_fini(cli->abi16);
 	mutex_unlock(&cli->mutex);
 
-	mutex_lock(&drm->client.mutex);
+	mutex_lock(&drm->clients_lock);
 	list_del(&cli->head);
-	mutex_unlock(&drm->client.mutex);
+	mutex_unlock(&drm->clients_lock);
 
 	nouveau_cli_fini(cli);
 	kfree(cli);
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
+	drm_dev_exit(dev_index);
 }
 
 static const struct drm_ioctl_desc
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index ba65f13..b2a970a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -139,6 +139,11 @@
 
 	struct list_head clients;
 
+	/**
+	 * @clients_lock: Protects access to the @clients list of &struct nouveau_cli.
+	 */
+	struct mutex clients_lock;
+
 	u8 old_pm_cap;
 
 	struct {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 8c2ecc2..9416bee 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -56,7 +56,7 @@
 
 	nouveau_bo_del_io_reserve_lru(bo);
 	prot = vm_get_page_prot(vma->vm_flags);
-	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
+	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
 	nouveau_bo_add_io_reserve_lru(bo);
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 		return ret;
@@ -337,7 +337,7 @@
 	struct ttm_buffer_object *bo = &nvbo->bo;
 	uint32_t domains = valid_domains & nvbo->valid_domains &
 		(write_domains ? write_domains : read_domains);
-	uint32_t pref_domains = 0;;
+	uint32_t pref_domains = 0;
 
 	if (!domains)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 1a896a2..266809e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -162,10 +162,14 @@
 	 */
 
 	mm = get_task_mm(current);
+	if (!mm) {
+		return -EINVAL;
+	}
 	mmap_read_lock(mm);
 
 	if (!cli->svm.svmm) {
 		mmap_read_unlock(mm);
+		mmput(mm);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
index 704df0f..09a112a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
@@ -78,6 +78,6 @@
 gt215_ce_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	     struct nvkm_engine **pengine)
 {
-	return nvkm_falcon_new_(&gt215_ce, device, type, inst,
+	return nvkm_falcon_new_(&gt215_ce, device, type, -1,
 				(device->chipset != 0xaf), 0x104000, pengine);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index ca75c5f..88d262b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2627,6 +2627,27 @@
 };
 
 static const struct nvkm_device_chip
+nv176_chipset = {
+	.name = "GA106",
+	.bar      = { 0x00000001, tu102_bar_new },
+	.bios     = { 0x00000001, nvkm_bios_new },
+	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fb       = { 0x00000001, ga102_fb_new },
+	.gpio     = { 0x00000001, ga102_gpio_new },
+	.i2c      = { 0x00000001, gm200_i2c_new },
+	.imem     = { 0x00000001, nv50_instmem_new },
+	.mc       = { 0x00000001, ga100_mc_new },
+	.mmu      = { 0x00000001, tu102_mmu_new },
+	.pci      = { 0x00000001, gp100_pci_new },
+	.privring = { 0x00000001, gm200_privring_new },
+	.timer    = { 0x00000001, gk20a_timer_new },
+	.top      = { 0x00000001, ga100_top_new },
+	.disp     = { 0x00000001, ga102_disp_new },
+	.dma      = { 0x00000001, gv100_dma_new },
+	.fifo     = { 0x00000001, ga102_fifo_new },
+};
+
+static const struct nvkm_device_chip
 nv177_chipset = {
 	.name = "GA107",
 	.bar      = { 0x00000001, tu102_bar_new },
@@ -3072,6 +3093,7 @@
 		case 0x168: device->chip = &nv168_chipset; break;
 		case 0x172: device->chip = &nv172_chipset; break;
 		case 0x174: device->chip = &nv174_chipset; break;
+		case 0x176: device->chip = &nv176_chipset; break;
 		case 0x177: device->chip = &nv177_chipset; break;
 		default:
 			if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) {
@@ -3147,8 +3169,7 @@
 	WARN_ON(device->chip->ptr.inst & ~((1 << ARRAY_SIZE(device->ptr)) - 1));             \
 	for (j = 0; device->chip->ptr.inst && j < ARRAY_SIZE(device->ptr); j++) {            \
 		if ((device->chip->ptr.inst & BIT(j)) && (subdev_mask & BIT_ULL(type))) {    \
-			int inst = (device->chip->ptr.inst == 1) ? -1 : (j);                 \
-			ret = device->chip->ptr.ctor(device, (type), inst, &device->ptr[j]); \
+			ret = device->chip->ptr.ctor(device, (type), (j), &device->ptr[j]);  \
 			subdev = nvkm_device_subdev(device, (type), (j));                    \
 			if (ret) {                                                           \
 				nvkm_subdev_del(&subdev);                                    \
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
index 6e3c450..3ff4934 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
@@ -62,7 +62,6 @@
 		nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header);
 		nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low);
 		nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x6f0110 + hdmi, 0x00000000);
 		nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000);
 		nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000);
 		nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/base.c
index c39e797..cf5dcfd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/base.c
@@ -21,7 +21,6 @@
  */
 #include "priv.h"
 
-#include "priv.h"
 #include <core/firmware.h>
 
 static void *
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
index cdb1ead..82b4c8e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
@@ -207,11 +207,13 @@
 gm200_acr_wpr_parse(struct nvkm_acr *acr)
 {
 	const struct wpr_header *hdr = (void *)acr->wpr_fw->data;
+	struct nvkm_acr_lsfw *lsfw;
 
 	while (hdr->falcon_id != WPR_HEADER_V0_FALCON_ID_INVALID) {
 		wpr_header_dump(&acr->subdev, hdr);
-		if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id))
-			return -ENOMEM;
+		lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id);
+		if (IS_ERR(lsfw))
+			return PTR_ERR(lsfw);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
index fb9132a..fd97a93 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
@@ -161,11 +161,13 @@
 gp102_acr_wpr_parse(struct nvkm_acr *acr)
 {
 	const struct wpr_header_v1 *hdr = (void *)acr->wpr_fw->data;
+	struct nvkm_acr_lsfw *lsfw;
 
 	while (hdr->falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID) {
 		wpr_header_v1_dump(&acr->subdev, hdr);
-		if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id))
-			return -ENOMEM;
+		lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id);
+		if (IS_ERR(lsfw))
+			return PTR_ERR(lsfw);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
index d6a1f8d..186b4e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
@@ -299,7 +299,7 @@
 	page = uvmm->vmm->func->page;
 	for (nr = 0; page[nr].shift; nr++);
 
-	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
+	if (!(nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
 		if ((index = args->v0.index) >= nr)
 			return -EINVAL;
 		type = page[index].type;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
index b5e7337..17899fc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
@@ -488,7 +488,7 @@
 		struct gp100_vmm_fault_cancel_v0 v0;
 	} *args = argv;
 	int ret = -ENOSYS;
-	u32 inst, aper;
+	u32 aper;
 
 	if ((ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false)))
 		return ret;
@@ -502,7 +502,7 @@
 	args->v0.inst |= 0x80000000;
 
 	if (!WARN_ON(nvkm_gr_ctxsw_pause(device))) {
-		if ((inst = nvkm_gr_ctxsw_inst(device)) == args->v0.inst) {
+		if (nvkm_gr_ctxsw_inst(device) == args->v0.inst) {
 			gf100_vmm_invalidate(vmm, 0x0000001b
 					     /* CANCEL_TARGETED. */ |
 					     (args->v0.hub    << 20) |
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 2cb8eba..cfc8d64 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -520,6 +520,16 @@
 	  Say Y here if you want to enable support for Sharp LS043T1LE01 qHD
 	  (540x960) DSI panel as found on the Qualcomm APQ8074 Dragonboard
 
+config DRM_PANEL_SHARP_LS060T1SX01
+	tristate "Sharp LS060T1SX01 FullHD video mode panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Sharp LS060T1SX01 6.0"
+	  FullHD (1080x1920) DSI panel as found in Dragonboard Display Adapter
+	  Bundle.
+
 config DRM_PANEL_SITRONIX_ST7701
 	tristate "Sitronix ST7701 panel driver"
 	depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 6e30640..bca4cc1 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -53,6 +53,7 @@
 obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LS037V7DW01) += panel-sharp-ls037v7dw01.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LS043T1LE01) += panel-sharp-ls043t1le01.o
+obj-$(CONFIG_DRM_PANEL_SHARP_LS060T1SX01) += panel-sharp-ls060t1sx01.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7701) += panel-sitronix-st7701.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7703) += panel-sitronix-st7703.o
 obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7789V) += panel-sitronix-st7789v.o
diff --git a/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c b/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c
index 30f28ad..31daae1d 100644
--- a/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c
+++ b/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c
@@ -8,6 +8,7 @@
 #include <linux/backlight.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/media-bus-format.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/regulator/consumer.h>
@@ -220,6 +221,10 @@
 	.height_mm   = 130,
 };
 
+static const u32 mantix_bus_formats[] = {
+	MEDIA_BUS_FMT_RGB888_1X24,
+};
+
 static int mantix_get_modes(struct drm_panel *panel,
 			    struct drm_connector *connector)
 {
@@ -241,6 +246,10 @@
 	connector->display_info.height_mm = mode->height_mm;
 	drm_mode_probed_add(connector, mode);
 
+	drm_display_info_set_bus_formats(&connector->display_info,
+					 mantix_bus_formats,
+					 ARRAY_SIZE(mantix_bus_formats));
+
 	return 1;
 }
 
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c
index e0b1a7e..e0f7736 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c
@@ -116,7 +116,8 @@
 static int s6e63m0_dsi_remove(struct mipi_dsi_device *dsi)
 {
 	mipi_dsi_detach(dsi);
-	return s6e63m0_remove(&dsi->dev);
+	s6e63m0_remove(&dsi->dev);
+	return 0;
 }
 
 static const struct of_device_id s6e63m0_dsi_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
index 3669cc3..c178d96 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c
@@ -64,7 +64,8 @@
 
 static int s6e63m0_spi_remove(struct spi_device *spi)
 {
-	return s6e63m0_remove(&spi->dev);
+	s6e63m0_remove(&spi->dev);
+	return 0;
 }
 
 static const struct of_device_id s6e63m0_spi_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
index 35d72ac..b34fa4d 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
@@ -749,13 +749,11 @@
 }
 EXPORT_SYMBOL_GPL(s6e63m0_probe);
 
-int s6e63m0_remove(struct device *dev)
+void s6e63m0_remove(struct device *dev)
 {
 	struct s6e63m0 *ctx = dev_get_drvdata(dev);
 
 	drm_panel_remove(&ctx->panel);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(s6e63m0_remove);
 
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h
index 306605e..c926eca 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h
@@ -35,6 +35,6 @@
 				   const u8 *data,
 				   size_t len),
 		  bool dsi_mode);
-int s6e63m0_remove(struct device *dev);
+void s6e63m0_remove(struct device *dev);
 
 #endif /* _PANEL_SAMSUNG_S6E63M0_H */
diff --git a/drivers/gpu/drm/panel/panel-sharp-ls060t1sx01.c b/drivers/gpu/drm/panel/panel-sharp-ls060t1sx01.c
new file mode 100644
index 0000000..e125705
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-sharp-ls060t1sx01.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021 Linaro Ltd.
+ * Generated with linux-mdss-dsi-panel-driver-generator from vendor device tree:
+ *   Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+
+#include <video/mipi_display.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+struct sharp_ls060 {
+	struct drm_panel panel;
+	struct mipi_dsi_device *dsi;
+	struct regulator *vddi_supply;
+	struct regulator *vddh_supply;
+	struct regulator *avdd_supply;
+	struct regulator *avee_supply;
+	struct gpio_desc *reset_gpio;
+	bool prepared;
+};
+
+static inline struct sharp_ls060 *to_sharp_ls060(struct drm_panel *panel)
+{
+	return container_of(panel, struct sharp_ls060, panel);
+}
+
+#define dsi_dcs_write_seq(dsi, seq...) ({				\
+		static const u8 d[] = { seq };				\
+									\
+		mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d));	\
+	})
+
+static void sharp_ls060_reset(struct sharp_ls060 *ctx)
+{
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	usleep_range(10000, 11000);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+	usleep_range(10000, 11000);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	usleep_range(10000, 11000);
+}
+
+static int sharp_ls060_on(struct sharp_ls060 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
+	ret = dsi_dcs_write_seq(dsi, 0xbb, 0x13);
+	if (ret < 0) {
+		dev_err(dev, "Failed to send command: %d\n", ret);
+		return ret;
+	}
+
+	ret = dsi_dcs_write_seq(dsi, MIPI_DCS_WRITE_MEMORY_START);
+	if (ret < 0) {
+		dev_err(dev, "Failed to send command: %d\n", ret);
+		return ret;
+	}
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to exit sleep mode: %d\n", ret);
+		return ret;
+	}
+	msleep(120);
+
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display on: %d\n", ret);
+		return ret;
+	}
+	msleep(50);
+
+	return 0;
+}
+
+static int sharp_ls060_off(struct sharp_ls060 *ctx)
+{
+	struct mipi_dsi_device *dsi = ctx->dsi;
+	struct device *dev = &dsi->dev;
+	int ret;
+
+	dsi->mode_flags &= ~MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set display off: %d\n", ret);
+		return ret;
+	}
+	usleep_range(2000, 3000);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enter sleep mode: %d\n", ret);
+		return ret;
+	}
+	msleep(121);
+
+	return 0;
+}
+
+static int sharp_ls060_prepare(struct drm_panel *panel)
+{
+	struct sharp_ls060 *ctx = to_sharp_ls060(panel);
+	struct device *dev = &ctx->dsi->dev;
+	int ret;
+
+	if (ctx->prepared)
+		return 0;
+
+	ret = regulator_enable(ctx->vddi_supply);
+	if (ret < 0)
+		return ret;
+
+	ret = regulator_enable(ctx->avdd_supply);
+	if (ret < 0)
+		goto err_avdd;
+
+	usleep_range(1000, 2000);
+
+	ret = regulator_enable(ctx->avee_supply);
+	if (ret < 0)
+		goto err_avee;
+
+	usleep_range(10000, 11000);
+
+	ret = regulator_enable(ctx->vddh_supply);
+	if (ret < 0)
+		goto err_vddh;
+
+	usleep_range(10000, 11000);
+
+	sharp_ls060_reset(ctx);
+
+	ret = sharp_ls060_on(ctx);
+	if (ret < 0) {
+		dev_err(dev, "Failed to initialize panel: %d\n", ret);
+		goto err_on;
+	}
+
+	ctx->prepared = true;
+
+	return 0;
+
+err_on:
+	regulator_disable(ctx->vddh_supply);
+
+	usleep_range(10000, 11000);
+
+err_vddh:
+	regulator_disable(ctx->avee_supply);
+
+err_avee:
+	regulator_disable(ctx->avdd_supply);
+
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+
+err_avdd:
+	regulator_disable(ctx->vddi_supply);
+
+	return ret;
+}
+
+static int sharp_ls060_unprepare(struct drm_panel *panel)
+{
+	struct sharp_ls060 *ctx = to_sharp_ls060(panel);
+	struct device *dev = &ctx->dsi->dev;
+	int ret;
+
+	if (!ctx->prepared)
+		return 0;
+
+	ret = sharp_ls060_off(ctx);
+	if (ret < 0)
+		dev_err(dev, "Failed to un-initialize panel: %d\n", ret);
+
+	regulator_disable(ctx->vddh_supply);
+
+	usleep_range(10000, 11000);
+
+	regulator_disable(ctx->avee_supply);
+	regulator_disable(ctx->avdd_supply);
+
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+
+	regulator_disable(ctx->vddi_supply);
+
+	ctx->prepared = false;
+	return 0;
+}
+
+static const struct drm_display_mode sharp_ls060_mode = {
+	.clock = (1080 + 96 + 16 + 64) * (1920 + 4 + 1 + 16) * 60 / 1000,
+	.hdisplay = 1080,
+	.hsync_start = 1080 + 96,
+	.hsync_end = 1080 + 96 + 16,
+	.htotal = 1080 + 96 + 16 + 64,
+	.vdisplay = 1920,
+	.vsync_start = 1920 + 4,
+	.vsync_end = 1920 + 4 + 1,
+	.vtotal = 1920 + 4 + 1 + 16,
+	.width_mm = 75,
+	.height_mm = 132,
+};
+
+static int sharp_ls060_get_modes(struct drm_panel *panel,
+				 struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, &sharp_ls060_mode);
+	if (!mode)
+		return -ENOMEM;
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	connector->display_info.width_mm = mode->width_mm;
+	connector->display_info.height_mm = mode->height_mm;
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs sharp_ls060_panel_funcs = {
+	.prepare = sharp_ls060_prepare,
+	.unprepare = sharp_ls060_unprepare,
+	.get_modes = sharp_ls060_get_modes,
+};
+
+static int sharp_ls060_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct sharp_ls060 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->vddi_supply = devm_regulator_get(dev, "vddi");
+	if (IS_ERR(ctx->vddi_supply))
+		return PTR_ERR(ctx->vddi_supply);
+
+	ctx->vddh_supply = devm_regulator_get(dev, "vddh");
+	if (IS_ERR(ctx->vddh_supply))
+		return PTR_ERR(ctx->vddh_supply);
+
+	ctx->avdd_supply = devm_regulator_get(dev, "avdd");
+	if (IS_ERR(ctx->avdd_supply))
+		return PTR_ERR(ctx->avdd_supply);
+
+	ctx->avee_supply = devm_regulator_get(dev, "avee");
+	if (IS_ERR(ctx->avee_supply))
+		return PTR_ERR(ctx->avee_supply);
+
+	ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(ctx->reset_gpio))
+		return dev_err_probe(dev, PTR_ERR(ctx->reset_gpio),
+				     "Failed to get reset-gpios\n");
+
+	ctx->dsi = dsi;
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_MODE_NO_EOT_PACKET |
+			  MIPI_DSI_CLOCK_NON_CONTINUOUS;
+
+	drm_panel_init(&ctx->panel, dev, &sharp_ls060_panel_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	ret = drm_panel_of_backlight(&ctx->panel);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to get backlight\n");
+
+	drm_panel_add(&ctx->panel);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "Failed to attach to DSI host: %d\n", ret);
+		drm_panel_remove(&ctx->panel);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int sharp_ls060_remove(struct mipi_dsi_device *dsi)
+{
+	struct sharp_ls060 *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret);
+
+	drm_panel_remove(&ctx->panel);
+
+	return 0;
+}
+
+static const struct of_device_id sharp_ls060t1sx01_of_match[] = {
+	{ .compatible = "sharp,ls060t1sx01" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sharp_ls060t1sx01_of_match);
+
+static struct mipi_dsi_driver sharp_ls060_driver = {
+	.probe = sharp_ls060_probe,
+	.remove = sharp_ls060_remove,
+	.driver = {
+		.name = "panel-sharp-ls060t1sx01",
+		.of_match_table = sharp_ls060t1sx01_of_match,
+	},
+};
+module_mipi_dsi_driver(sharp_ls060_driver);
+
+MODULE_AUTHOR("Dmitry Baryshkov <dmitry.baryshkov@linaro.org>");
+MODULE_DESCRIPTION("DRM driver for Sharp LS060T1SX01 1080p video mode dsi panel");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 7f3e1b8..eb475a3 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2370,6 +2370,38 @@
 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
+static const struct drm_display_mode logictechno_lttd800480070_l2rt_mode = {
+	.clock = 33000,
+	.hdisplay = 800,
+	.hsync_start = 800 + 112,
+	.hsync_end = 800 + 112 + 3,
+	.htotal = 800 + 112 + 3 + 85,
+	.vdisplay = 480,
+	.vsync_start = 480 + 38,
+	.vsync_end = 480 + 38 + 3,
+	.vtotal = 480 + 38 + 3 + 29,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc logictechno_lttd800480070_l2rt = {
+	.modes = &logictechno_lttd800480070_l2rt_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 154,
+		.height = 86,
+	},
+	.delay = {
+		.prepare = 45,
+		.enable = 100,
+		.disable = 100,
+		.unprepare = 45
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE,
+	.connector_type = DRM_MODE_CONNECTOR_DPI,
+};
+
 static const struct drm_display_mode logictechno_lttd800480070_l6wh_rt_mode = {
 	.clock = 33000,
 	.hdisplay = 800,
@@ -3751,6 +3783,9 @@
 		.compatible = "logictechno,lt170410-2whc",
 		.data = &logictechno_lt170410_2whc,
 	}, {
+		.compatible = "logictechno,lttd800480070-l2rt",
+		.data = &logictechno_lttd800480070_l2rt,
+	}, {
 		.compatible = "logictechno,lttd800480070-l6wh-rt",
 		.data = &logictechno_lttd800480070_l6wh_rt,
 	}, {
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7703.c b/drivers/gpu/drm/panel/panel-sitronix-st7703.c
index a2c303e..73f69c9 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7703.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7703.c
@@ -453,6 +453,10 @@
 	return ret;
 }
 
+static const u32 mantix_bus_formats[] = {
+	MEDIA_BUS_FMT_RGB888_1X24,
+};
+
 static int st7703_get_modes(struct drm_panel *panel,
 			    struct drm_connector *connector)
 {
@@ -474,6 +478,10 @@
 	connector->display_info.height_mm = mode->height_mm;
 	drm_mode_probed_add(connector, mode);
 
+	drm_display_info_set_bus_formats(&connector->display_info,
+					 mantix_bus_formats,
+					 ARRAY_SIZE(mantix_bus_formats));
+
 	return 1;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 458f92a..a36a4f2 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -61,7 +61,7 @@
 		goto unlock_resv;
 
 	ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
-				       TTM_BO_VM_NUM_PREFAULT, 1);
+				       TTM_BO_VM_NUM_PREFAULT);
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 		goto unlock_mclk;
 
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 042c16b..f91fb31 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -699,30 +699,20 @@
 					    struct drm_gem_object *obj,
 					    bool write)
 {
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
 	int ret;
-	struct dma_fence **fences;
-	unsigned int i, fence_count;
 
-	if (!write) {
-		struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv);
-
-		return drm_sched_job_add_dependency(job, fence);
+	dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
+		/* Make sure to grab an additional ref on the added fence */
+		dma_fence_get(fence);
+		ret = drm_sched_job_add_dependency(job, fence);
+		if (ret) {
+			dma_fence_put(fence);
+			return ret;
+		}
 	}
-
-	ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences);
-	if (ret || !fence_count)
-		return ret;
-
-	for (i = 0; i < fence_count; i++) {
-		ret = drm_sched_job_add_dependency(job, fences[i]);
-		if (ret)
-			break;
-	}
-
-	for (; i < fence_count; i++)
-		dma_fence_put(fences[i]);
-	kfree(fences);
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
 
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 5755f04..8c796de 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -46,6 +46,7 @@
 	default MACH_SUN8I
 	select CRC_CCITT
 	select DRM_MIPI_DSI
+	select RESET_CONTROLLER
 	select PHY_SUN6I_MIPI_DPHY
 	help
 	  Choose this option if you want have an Allwinner SoC with
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d62b201..739f11c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -269,23 +269,15 @@
 static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
 {
 	struct dma_resv *resv = &bo->base._resv;
-	struct dma_resv_list *fobj;
+	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
-	int i;
 
-	rcu_read_lock();
-	fobj = dma_resv_shared_list(resv);
-	fence = dma_resv_excl_fence(resv);
-	if (fence && !fence->ops->signaled)
-		dma_fence_enable_sw_signaling(fence);
-
-	for (i = 0; fobj && i < fobj->shared_count; ++i) {
-		fence = rcu_dereference(fobj->shared[i]);
-
+	dma_resv_iter_begin(&cursor, resv, true);
+	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 		if (!fence->ops->signaled)
 			dma_fence_enable_sw_signaling(fence);
 	}
-	rcu_read_unlock();
+	dma_resv_iter_end(&cursor);
 }
 
 /**
@@ -627,7 +619,8 @@
 			*busy = !ret;
 	}
 
-	if (ret && place && !bo->bdev->funcs->eviction_valuable(bo, place)) {
+	if (ret && place && (bo->resource->mem_type != place->mem_type ||
+		!bo->bdev->funcs->eviction_valuable(bo, place))) {
 		ret = false;
 		if (*locked) {
 			dma_resv_unlock(bo->base.resv);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 33680c9..08ba083 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -173,89 +173,6 @@
 }
 EXPORT_SYMBOL(ttm_bo_vm_reserve);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/**
- * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
- * @vmf: Fault data
- * @bo: The buffer object
- * @page_offset: Page offset from bo start
- * @fault_page_size: The size of the fault in pages.
- * @pgprot: The page protections.
- * Does additional checking whether it's possible to insert a PUD or PMD
- * pfn and performs the insertion.
- *
- * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
- * a huge fault was not possible, or on insertion error.
- */
-static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
-					struct ttm_buffer_object *bo,
-					pgoff_t page_offset,
-					pgoff_t fault_page_size,
-					pgprot_t pgprot)
-{
-	pgoff_t i;
-	vm_fault_t ret;
-	unsigned long pfn;
-	pfn_t pfnt;
-	struct ttm_tt *ttm = bo->ttm;
-	bool write = vmf->flags & FAULT_FLAG_WRITE;
-
-	/* Fault should not cross bo boundary. */
-	page_offset &= ~(fault_page_size - 1);
-	if (page_offset + fault_page_size > bo->resource->num_pages)
-		goto out_fallback;
-
-	if (bo->resource->bus.is_iomem)
-		pfn = ttm_bo_io_mem_pfn(bo, page_offset);
-	else
-		pfn = page_to_pfn(ttm->pages[page_offset]);
-
-	/* pfn must be fault_page_size aligned. */
-	if ((pfn & (fault_page_size - 1)) != 0)
-		goto out_fallback;
-
-	/* Check that memory is contiguous. */
-	if (!bo->resource->bus.is_iomem) {
-		for (i = 1; i < fault_page_size; ++i) {
-			if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
-				goto out_fallback;
-		}
-	} else if (bo->bdev->funcs->io_mem_pfn) {
-		for (i = 1; i < fault_page_size; ++i) {
-			if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
-				goto out_fallback;
-		}
-	}
-
-	pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
-	if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
-		ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-	else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
-		ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
-#endif
-	else
-		WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
-
-	if (ret != VM_FAULT_NOPAGE)
-		goto out_fallback;
-
-	return VM_FAULT_NOPAGE;
-out_fallback:
-	count_vm_event(THP_FAULT_FALLBACK);
-	return VM_FAULT_FALLBACK;
-}
-#else
-static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
-					struct ttm_buffer_object *bo,
-					pgoff_t page_offset,
-					pgoff_t fault_page_size,
-					pgprot_t pgprot)
-{
-	return VM_FAULT_FALLBACK;
-}
-#endif
-
 /**
  * ttm_bo_vm_fault_reserved - TTM fault helper
  * @vmf: The struct vm_fault given as argument to the fault callback
@@ -263,7 +180,6 @@
  * @num_prefault: Maximum number of prefault pages. The caller may want to
  * specify this based on madvice settings and the size of the GPU object
  * backed by the memory.
- * @fault_page_size: The size of the fault in pages.
  *
  * This function inserts one or more page table entries pointing to the
  * memory backing the buffer object, and then returns a return code
@@ -277,8 +193,7 @@
  */
 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 				    pgprot_t prot,
-				    pgoff_t num_prefault,
-				    pgoff_t fault_page_size)
+				    pgoff_t num_prefault)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct ttm_buffer_object *bo = vma->vm_private_data;
@@ -329,11 +244,6 @@
 		prot = pgprot_decrypted(prot);
 	}
 
-	/* We don't prefault on huge faults. Yet. */
-	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1)
-		return ttm_bo_vm_insert_huge(vmf, bo, page_offset,
-					     fault_page_size, prot);
-
 	/*
 	 * Speculatively prefault a number of pages. Only error on
 	 * first page.
@@ -429,7 +339,7 @@
 
 	prot = vma->vm_page_prot;
 	if (drm_dev_enter(ddev, &idx)) {
-		ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
+		ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
 		drm_dev_exit(idx);
 	} else {
 		ret = ttm_bo_vm_dummy_page(vmf, prot);
diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index 3750fd2..930574a 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c
@@ -30,7 +30,7 @@
 		int bval = (i + block * EDID_LENGTH) << 8;
 		ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
 				      0x02, (0x80 | (0x02 << 5)), bval,
-				      0xA1, read_buff, 2, HZ);
+				      0xA1, read_buff, 2, 1000);
 		if (ret < 1) {
 			DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret);
 			kfree(read_buff);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 6a000d7..e47ae40 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -487,8 +487,8 @@
 			for (i = 0; i < se->in_sync_count; i++) {
 				struct drm_v3d_sem in;
 
-				ret = copy_from_user(&in, handle++, sizeof(in));
-				if (ret) {
+				if (copy_from_user(&in, handle++, sizeof(in))) {
+					ret = -EFAULT;
 					DRM_DEBUG("Failed to copy wait dep handle.\n");
 					goto fail_deps;
 				}
@@ -609,8 +609,8 @@
 	for (i = 0; i < count; i++) {
 		struct drm_v3d_sem out;
 
-		ret = copy_from_user(&out, post_deps++, sizeof(out));
-		if (ret) {
+		if (copy_from_user(&out, post_deps++, sizeof(out))) {
+			ret = -EFAULT;
 			DRM_DEBUG("Failed to copy post dep handles\n");
 			goto fail;
 		}
@@ -646,9 +646,8 @@
 	struct v3d_submit_ext *se = data;
 	int ret;
 
-	ret = copy_from_user(&multisync, ext, sizeof(multisync));
-	if (ret)
-		return ret;
+	if (copy_from_user(&multisync, ext, sizeof(multisync)))
+		return -EFAULT;
 
 	if (multisync.pad)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index fddaeb0..f642bd6 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -391,7 +391,7 @@
 
 	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
 	if (!bo)
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 
 	bo->madv = VC4_MADV_WILLNEED;
 	refcount_set(&bo->usecnt, 0);
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index a6caebd4..5b00310a 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -308,8 +308,10 @@
 		return ERR_PTR(-EINVAL);
 
 	virtio_gpu_fb = kzalloc(sizeof(*virtio_gpu_fb), GFP_KERNEL);
-	if (virtio_gpu_fb == NULL)
+	if (virtio_gpu_fb == NULL) {
+		drm_gem_object_put(obj);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj);
 	if (ret) {
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 749db18..d86e1ad 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -163,10 +163,11 @@
 	struct drm_file *drm_file = filp->private_data;
 	struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv;
 	struct drm_device *dev = drm_file->minor->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
 	struct drm_pending_event *e = NULL;
 	__poll_t mask = 0;
 
-	if (!vfpriv->ring_idx_mask)
+	if (!vgdev->has_virgl_3d || !vfpriv || !vfpriv->ring_idx_mask)
 		return drm_poll(filp, wait);
 
 	poll_wait(filp, &drm_file->event_wait, wait);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a833751..858aff9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1550,10 +1550,6 @@
 			pgoff_t start, pgoff_t end);
 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
-				enum page_entry_size pe_size);
-#endif
 
 /* Transparent hugepage support - vmwgfx_thp.c */
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
index e5a9a5c..922317d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -477,7 +477,7 @@
 	else
 		prot = vm_get_page_prot(vma->vm_flags);
 
-	ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1);
+	ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 		return ret;
 
@@ -486,73 +486,3 @@
 
 	return ret;
 }
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
-				enum page_entry_size pe_size)
-{
-	struct vm_area_struct *vma = vmf->vma;
-	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
-	    vma->vm_private_data;
-	struct vmw_buffer_object *vbo =
-		container_of(bo, struct vmw_buffer_object, base);
-	pgprot_t prot;
-	vm_fault_t ret;
-	pgoff_t fault_page_size;
-	bool write = vmf->flags & FAULT_FLAG_WRITE;
-
-	switch (pe_size) {
-	case PE_SIZE_PMD:
-		fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
-		break;
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-	case PE_SIZE_PUD:
-		fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
-		break;
-#endif
-	default:
-		WARN_ON_ONCE(1);
-		return VM_FAULT_FALLBACK;
-	}
-
-	/* Always do write dirty-tracking and COW on PTE level. */
-	if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
-		return VM_FAULT_FALLBACK;
-
-	ret = ttm_bo_vm_reserve(bo, vmf);
-	if (ret)
-		return ret;
-
-	if (vbo->dirty) {
-		pgoff_t allowed_prefault;
-		unsigned long page_offset;
-
-		page_offset = vmf->pgoff -
-			drm_vma_node_start(&bo->base.vma_node);
-		if (page_offset >= bo->resource->num_pages ||
-		    vmw_resources_clean(vbo, page_offset,
-					page_offset + PAGE_SIZE,
-					&allowed_prefault)) {
-			ret = VM_FAULT_SIGBUS;
-			goto out_unlock;
-		}
-
-		/*
-		 * Write protect, so we get a new fault on write, and can
-		 * split.
-		 */
-		prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
-	} else {
-		prot = vm_get_page_prot(vma->vm_flags);
-	}
-
-	ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
-	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
-		return ret;
-
-out_unlock:
-	dma_resv_unlock(bo->base.resv);
-
-	return ret;
-}
-#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index e6b1f98..0a4c340 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -61,9 +61,6 @@
 		.fault = vmw_bo_vm_fault,
 		.open = ttm_bo_vm_open,
 		.close = ttm_bo_vm_close,
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		.huge_fault = vmw_bo_vm_huge_fault,
-#endif
 	};
 	struct drm_file *file_priv = filp->private_data;
 	struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
index 9f14d99..bc76053 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -773,6 +773,7 @@
 	.probe = xen_drv_probe,
 	.remove = xen_drv_remove,
 	.otherend_changed = displback_changed,
+	.not_essential = true,
 };
 
 static int __init xen_drv_init(void)
diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 5d57214..f3ecddc 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -854,7 +854,7 @@
 		switch (usage->hid & HID_USAGE) {
 		case 0x10: asus_map_key_clear(KEY_BRIGHTNESSDOWN);	break;
 		case 0x20: asus_map_key_clear(KEY_BRIGHTNESSUP);		break;
-		case 0x35: asus_map_key_clear(KEY_SCREENLOCK);		break;
+		case 0x35: asus_map_key_clear(KEY_DISPLAY_OFF);		break;
 		case 0x6c: asus_map_key_clear(KEY_SLEEP);		break;
 		case 0x7c: asus_map_key_clear(KEY_MICMUTE);		break;
 		case 0x82: asus_map_key_clear(KEY_CAMERA);		break;
diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c
index 4ef1c3b..8ee77f4 100644
--- a/drivers/hid/hid-ft260.c
+++ b/drivers/hid/hid-ft260.c
@@ -966,24 +966,23 @@
 	mutex_init(&dev->lock);
 	init_completion(&dev->wait);
 
+	ret = ft260_xfer_status(dev);
+	if (ret)
+		ft260_i2c_reset(hdev);
+
+	i2c_set_adapdata(&dev->adap, dev);
 	ret = i2c_add_adapter(&dev->adap);
 	if (ret) {
 		hid_err(hdev, "failed to add i2c adapter\n");
 		goto err_hid_close;
 	}
 
-	i2c_set_adapdata(&dev->adap, dev);
-
 	ret = sysfs_create_group(&hdev->dev.kobj, &ft260_attr_group);
 	if (ret < 0) {
 		hid_err(hdev, "failed to create sysfs attrs\n");
 		goto err_i2c_free;
 	}
 
-	ret = ft260_xfer_status(dev);
-	if (ret)
-		ft260_i2c_reset(hdev);
-
 	return 0;
 
 err_i2c_free:
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 95037a3..96a4559 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -397,6 +397,7 @@
 #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W	0x0401
 #define USB_DEVICE_ID_HP_X2		0x074d
 #define USB_DEVICE_ID_HP_X2_10_COVER	0x0755
+#define I2C_DEVICE_ID_HP_ENVY_X360_15	0x2d05
 #define I2C_DEVICE_ID_HP_SPECTRE_X360_15	0x2817
 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN	0x2706
 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN	0x261A
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 2c72ce4..217f2d1 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -160,6 +160,7 @@
 	if (usage) {
 		*old_keycode = usage->type == EV_KEY ?
 				usage->code : KEY_RESERVED;
+		usage->type = EV_KEY;
 		usage->code = ke->keycode;
 
 		clear_bit(*old_keycode, dev->keybit);
@@ -324,6 +325,8 @@
 	  HID_BATTERY_QUIRK_IGNORE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN),
 	  HID_BATTERY_QUIRK_IGNORE },
+	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15),
+	  HID_BATTERY_QUIRK_IGNORE },
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
 	  HID_BATTERY_QUIRK_IGNORE },
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
@@ -650,10 +653,9 @@
 						code += KEY_MACRO1;
 					else
 						code += BTN_TRIGGER_HAPPY - 0x1e;
-				} else {
-					goto ignore;
+					break;
 				}
-				break;
+				fallthrough;
 		default:
 			switch (field->physical) {
 			case HID_GD_MOUSE:
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 686788e..d7687ce 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -256,8 +256,11 @@
 		unsigned long now = jiffies;
 		int step_x = msc->touches[id].scroll_x - x;
 		int step_y = msc->touches[id].scroll_y - y;
-		int step_hr = ((64 - (int)scroll_speed) * msc->scroll_accel) /
-			      SCROLL_HR_STEPS;
+		int step_hr =
+			max_t(int,
+			      ((64 - (int)scroll_speed) * msc->scroll_accel) /
+					SCROLL_HR_STEPS,
+			      1);
 		int step_x_hr = msc->touches[id].scroll_x_hr - x;
 		int step_y_hr = msc->touches[id].scroll_y_hr - y;
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index e1afddb..082376a 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1888,6 +1888,11 @@
 		MT_USB_DEVICE(USB_VENDOR_ID_CVTOUCH,
 			USB_DEVICE_ID_CVTOUCH_SCREEN) },
 
+	/* eGalax devices (SAW) */
+	{ .driver_data = MT_CLS_EXPORT_ALL_INPUTS,
+		MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
+			USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER) },
+
 	/* eGalax devices (resistive) */
 	{ .driver_data = MT_CLS_EGALAX,
 		MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
index a1e0f68..b6a9a0f 100644
--- a/drivers/hid/hid-nintendo.c
+++ b/drivers/hid/hid-nintendo.c
@@ -189,6 +189,7 @@
 	u16 amp;
 };
 
+#if IS_ENABLED(CONFIG_NINTENDO_FF)
 /*
  * These tables are from
  * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering/blob/master/rumble_data_table.md
@@ -289,6 +290,10 @@
 	{ 0xc2, 0x8070,  940 }, { 0xc4, 0x0071,  960 }, { 0xc6, 0x8071,  981 },
 	{ 0xc8, 0x0072, joycon_max_rumble_amp }
 };
+static const u16 JC_RUMBLE_DFLT_LOW_FREQ = 160;
+static const u16 JC_RUMBLE_DFLT_HIGH_FREQ = 320;
+#endif /* IS_ENABLED(CONFIG_NINTENDO_FF) */
+static const u16 JC_RUMBLE_PERIOD_MS = 50;
 
 /* States for controller state machine */
 enum joycon_ctlr_state {
@@ -397,9 +402,6 @@
 #define JC_RUMBLE_DATA_SIZE	8
 #define JC_RUMBLE_QUEUE_SIZE	8
 
-static const u16 JC_RUMBLE_DFLT_LOW_FREQ = 160;
-static const u16 JC_RUMBLE_DFLT_HIGH_FREQ = 320;
-static const u16 JC_RUMBLE_PERIOD_MS = 50;
 static const unsigned short JC_RUMBLE_ZERO_AMP_PKT_CNT = 5;
 
 static const char * const joycon_player_led_names[] = {
@@ -1850,8 +1852,10 @@
 				      d_name,
 				      "green",
 				      joycon_player_led_names[i]);
-		if (!name)
+		if (!name) {
+			mutex_unlock(&joycon_input_num_mutex);
 			return -ENOMEM;
+		}
 
 		led = &ctlr->leds[i];
 		led->name = name;
@@ -1864,6 +1868,7 @@
 		ret = devm_led_classdev_register(&hdev->dev, led);
 		if (ret) {
 			hid_err(hdev, "Failed registering %s LED\n", led->name);
+			mutex_unlock(&joycon_input_num_mutex);
 			return ret;
 		}
 	}
diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index d44550a..3a53334 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -205,7 +205,7 @@
 	struct tm_wheel *tm_wheel = hid_get_drvdata(hdev);
 	uint16_t model = 0;
 	int i, ret;
-	const struct tm_wheel_info *twi = 0;
+	const struct tm_wheel_info *twi = NULL;
 
 	if (urb->status) {
 		hid_err(hdev, "URB to get model id failed with error %d\n", urb->status);
@@ -238,7 +238,7 @@
 		tm_wheel->usb_dev,
 		usb_sndctrlpipe(tm_wheel->usb_dev, 0),
 		(char *)tm_wheel->change_request,
-		0, 0, // We do not expect any response from the wheel
+		NULL, 0, // We do not expect any response from the wheel
 		thrustmaster_change_handler,
 		hdev
 	);
@@ -272,7 +272,7 @@
 static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
 	int ret = 0;
-	struct tm_wheel *tm_wheel = 0;
+	struct tm_wheel *tm_wheel = NULL;
 
 	ret = hid_parse(hdev);
 	if (ret) {
diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index 1b486f2..0e1183e 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -76,9 +76,12 @@
 #define LOADER_XFER_MODE_ISHTP			BIT(1)
 
 /* ISH Transport Loader client unique GUID */
-static const guid_t loader_ishtp_guid =
-	GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7,
-		  0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc);
+static const struct ishtp_device_id loader_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7,
+		  0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc) },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, loader_ishtp_id_table);
 
 #define FILENAME_SIZE				256
 
@@ -880,7 +883,7 @@
 
 	fw_client =
 		ishtp_fw_cl_get_client(ishtp_get_ishtp_device(loader_ishtp_cl),
-				       &loader_ishtp_guid);
+				       &loader_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(client_data),
 			"ISH client uuid not found\n");
@@ -1057,7 +1060,7 @@
 
 static struct ishtp_cl_driver	loader_ishtp_cl_driver = {
 	.name = "ish-loader",
-	.guid = &loader_ishtp_guid,
+	.id = loader_ishtp_id_table,
 	.probe = loader_ishtp_cl_probe,
 	.remove = loader_ishtp_cl_remove,
 	.reset = loader_ishtp_cl_reset,
@@ -1083,4 +1086,3 @@
 MODULE_AUTHOR("Rushikesh S Kadam <rushikesh.s.kadam@intel.com>");
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("ishtp:*");
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 91bf4d0..4338c9b 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -12,9 +12,12 @@
 #include "ishtp-hid.h"
 
 /* ISH Transport protocol (ISHTP in short) GUID */
-static const guid_t hid_ishtp_guid =
-	GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
-		  0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26);
+static const struct ishtp_device_id hid_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
+		  0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26), },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, hid_ishtp_id_table);
 
 /* Rx ring buffer pool size */
 #define HID_CL_RX_RING_SIZE	32
@@ -662,7 +665,7 @@
 	ishtp_set_tx_ring_size(hid_ishtp_cl, HID_CL_TX_RING_SIZE);
 	ishtp_set_rx_ring_size(hid_ishtp_cl, HID_CL_RX_RING_SIZE);
 
-	fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_guid);
+	fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(client_data),
 			"ish client uuid not found\n");
@@ -945,7 +948,7 @@
 
 static struct ishtp_cl_driver	hid_ishtp_cl_driver = {
 	.name = "ish-hid",
-	.guid = &hid_ishtp_guid,
+	.id = hid_ishtp_id_table,
 	.probe = hid_ishtp_cl_probe,
 	.remove = hid_ishtp_cl_remove,
 	.reset = hid_ishtp_cl_reset,
@@ -981,4 +984,3 @@
 MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
 
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("ishtp:*");
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index 334eac6..f68aba8 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -241,7 +241,7 @@
 	struct ishtp_cl_device *device = to_ishtp_cl_device(dev);
 	struct ishtp_cl_driver *driver = to_ishtp_cl_driver(drv);
 
-	return guid_equal(driver->guid,
+	return guid_equal(&driver->id[0].guid,
 			  &device->fw_client->props.protocol_name);
 }
 
@@ -350,7 +350,7 @@
 {
 	int len;
 
-	len = snprintf(buf, PAGE_SIZE, "ishtp:%s\n", dev_name(dev));
+	len = snprintf(buf, PAGE_SIZE, ISHTP_MODULE_PREFIX "%s\n", dev_name(dev));
 	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
 }
 static DEVICE_ATTR_RO(modalias);
@@ -363,7 +363,7 @@
 
 static int ishtp_cl_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-	if (add_uevent_var(env, "MODALIAS=ishtp:%s", dev_name(dev)))
+	if (add_uevent_var(env, "MODALIAS=" ISHTP_MODULE_PREFIX "%s", dev_name(dev)))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 33a6908..2a4cc39 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2603,6 +2603,9 @@
 		return;
 
 	switch (equivalent_usage) {
+	case HID_DG_CONFIDENCE:
+		wacom_wac->hid_data.confidence = value;
+		break;
 	case HID_GD_X:
 		wacom_wac->hid_data.x = value;
 		break;
@@ -2635,7 +2638,8 @@
 	}
 
 	if (usage->usage_index + 1 == field->report_count) {
-		if (equivalent_usage == wacom_wac->hid_data.last_slot_field)
+		if (equivalent_usage == wacom_wac->hid_data.last_slot_field &&
+		    wacom_wac->hid_data.confidence)
 			wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
 	}
 }
@@ -2653,6 +2657,8 @@
 
 	wacom_wac->is_invalid_bt_frame = false;
 
+	hid_data->confidence = true;
+
 	for (i = 0; i < report->maxfield; i++) {
 		struct hid_field *field = report->field[i];
 		int j;
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 8b2d4e5..466b62c 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -301,6 +301,7 @@
 	bool barrelswitch;
 	bool barrelswitch2;
 	bool serialhi;
+	bool confidence;
 	int x;
 	int y;
 	int pressure;
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 7f11ea0..ca873a3 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -480,7 +480,7 @@
 MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
 static atomic_t trans_id = ATOMIC_INIT(0);
 
-static int dm_ring_size = 20 * 1024;
+static int dm_ring_size = VMBUS_RING_SIZE(16 * 1024);
 
 /*
  * Driver specific state.
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 0518745..41446f9 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -191,6 +191,7 @@
 #define SMBSLVSTS_HST_NTFY_STS	BIT(0)
 
 /* Host Notify Command register bits */
+#define SMBSLVCMD_SMBALERT_DISABLE	BIT(2)
 #define SMBSLVCMD_HST_NTFY_INTREN	BIT(0)
 
 #define STATUS_ERROR_FLAGS	(SMBHSTSTS_FAILED | SMBHSTSTS_BUS_ERR | \
@@ -259,6 +260,7 @@
 	struct i2c_adapter adapter;
 	unsigned long smba;
 	unsigned char original_hstcfg;
+	unsigned char original_hstcnt;
 	unsigned char original_slvcmd;
 	struct pci_dev *pci_dev;
 	unsigned int features;
@@ -641,12 +643,20 @@
 		i801_isr_byte_done(priv);
 
 	/*
-	 * Clear irq sources and report transaction result.
+	 * Clear remaining IRQ sources: Completion of last command, errors
+	 * and the SMB_ALERT signal. SMB_ALERT status is set after signal
+	 * assertion independently of the interrupt generation being blocked
+	 * or not so clear it always when the status is set.
+	 */
+	status &= SMBHSTSTS_INTR | STATUS_ERROR_FLAGS | SMBHSTSTS_SMBALERT_STS;
+	if (status)
+		outb_p(status, SMBHSTSTS(priv));
+	status &= ~SMBHSTSTS_SMBALERT_STS; /* SMB_ALERT not reported */
+	/*
+	 * Report transaction result.
 	 * ->status must be cleared before the next transaction is started.
 	 */
-	status &= SMBHSTSTS_INTR | STATUS_ERROR_FLAGS;
 	if (status) {
-		outb_p(status, SMBHSTSTS(priv));
 		priv->status = status;
 		complete(&priv->done);
 	}
@@ -974,9 +984,13 @@
 	if (!(priv->features & FEATURE_HOST_NOTIFY))
 		return;
 
-	if (!(SMBSLVCMD_HST_NTFY_INTREN & priv->original_slvcmd))
-		outb_p(SMBSLVCMD_HST_NTFY_INTREN | priv->original_slvcmd,
-		       SMBSLVCMD(priv));
+	/*
+	 * Enable host notify interrupt and block the generation of interrupt
+	 * from the SMB_ALERT signal because the driver does not support
+	 * SMBus Alert.
+	 */
+	outb_p(SMBSLVCMD_HST_NTFY_INTREN | SMBSLVCMD_SMBALERT_DISABLE |
+	       priv->original_slvcmd, SMBSLVCMD(priv));
 
 	/* clear Host Notify bit to allow a new notification */
 	outb_p(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv));
@@ -1805,7 +1819,8 @@
 		outb_p(inb_p(SMBAUXCTL(priv)) &
 		       ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
 
-	/* Remember original Host Notify setting */
+	/* Remember original Interrupt and Host Notify settings */
+	priv->original_hstcnt = inb_p(SMBHSTCNT(priv)) & ~SMBHSTCNT_KILL;
 	if (priv->features & FEATURE_HOST_NOTIFY)
 		priv->original_slvcmd = inb_p(SMBSLVCMD(priv));
 
@@ -1869,6 +1884,7 @@
 {
 	struct i801_priv *priv = pci_get_drvdata(dev);
 
+	outb_p(priv->original_hstcnt, SMBHSTCNT(priv));
 	i801_disable_host_notify(priv);
 	i801_del_mux(priv);
 	i2c_del_adapter(&priv->adapter);
@@ -1892,6 +1908,7 @@
 	struct i801_priv *priv = pci_get_drvdata(dev);
 
 	/* Restore config registers to avoid hard hang on some systems */
+	outb_p(priv->original_hstcnt, SMBHSTCNT(priv));
 	i801_disable_host_notify(priv);
 	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
 }
@@ -1901,6 +1918,7 @@
 {
 	struct i801_priv *priv = dev_get_drvdata(dev);
 
+	outb_p(priv->original_hstcnt, SMBHSTCNT(priv));
 	pci_write_config_byte(priv->pci_dev, SMBHSTCFG, priv->original_hstcfg);
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
index 1ed4daa..9537878 100644
--- a/drivers/i2c/busses/i2c-virtio.c
+++ b/drivers/i2c/busses/i2c-virtio.c
@@ -104,11 +104,10 @@
 
 static int virtio_i2c_complete_reqs(struct virtqueue *vq,
 				    struct virtio_i2c_req *reqs,
-				    struct i2c_msg *msgs, int num,
-				    bool timedout)
+				    struct i2c_msg *msgs, int num)
 {
 	struct virtio_i2c_req *req;
-	bool failed = timedout;
+	bool failed = false;
 	unsigned int len;
 	int i, j = 0;
 
@@ -130,7 +129,7 @@
 			j++;
 	}
 
-	return timedout ? -ETIMEDOUT : j;
+	return j;
 }
 
 static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
@@ -139,7 +138,6 @@
 	struct virtio_i2c *vi = i2c_get_adapdata(adap);
 	struct virtqueue *vq = vi->vq;
 	struct virtio_i2c_req *reqs;
-	unsigned long time_left;
 	int count;
 
 	reqs = kcalloc(num, sizeof(*reqs), GFP_KERNEL);
@@ -162,11 +160,9 @@
 	reinit_completion(&vi->completion);
 	virtqueue_kick(vq);
 
-	time_left = wait_for_completion_timeout(&vi->completion, adap->timeout);
-	if (!time_left)
-		dev_err(&adap->dev, "virtio i2c backend timeout.\n");
+	wait_for_completion(&vi->completion);
 
-	count = virtio_i2c_complete_reqs(vq, reqs, msgs, count, !time_left);
+	count = virtio_i2c_complete_reqs(vq, reqs, msgs, count);
 
 err_free:
 	kfree(reqs);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index fedc0fa..f5aacaf 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1906,7 +1906,8 @@
 	int ret;
 
 	/* Currently only counter for QP is supported */
-	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+	if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+	    nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
 		return -EINVAL;
 
 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 692d5ff..c18634b 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1232,6 +1232,9 @@
 	INIT_LIST_HEAD(&qp->rdma_mrs);
 	INIT_LIST_HEAD(&qp->sig_mrs);
 
+	qp->send_cq = attr->send_cq;
+	qp->recv_cq = attr->recv_cq;
+
 	rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
 	WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
 	rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index ed9fa0d..dc9211f 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1628,8 +1628,7 @@
 			n++;
 
 	names_out =
-		kmalloc((n + num_extra_names) * sizeof(struct rdma_stat_desc) +
-				names_len,
+		kzalloc((n + num_extra_names) * sizeof(*q) + names_len,
 			GFP_KERNEL);
 	if (!names_out) {
 		*num_cntrs = 0;
@@ -1637,7 +1636,7 @@
 		return -ENOMEM;
 	}
 
-	p = names_out + (n + num_extra_names) * sizeof(struct rdma_stat_desc);
+	p = names_out + (n + num_extra_names) * sizeof(*q);
 	memcpy(p, names_in, names_len);
 
 	q = (struct rdma_stat_desc *)names_out;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ceca059..0d2fa33 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2215,6 +2215,11 @@
 	.get_hw_stats = mlx4_ib_get_hw_stats,
 };
 
+static const struct ib_device_ops mlx4_ib_hw_stats_ops1 = {
+	.alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
+	.get_hw_stats = mlx4_ib_get_hw_stats,
+};
+
 static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
 {
 	struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
@@ -2227,9 +2232,16 @@
 		return 0;
 
 	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
-		/* i == 1 means we are building port counters */
-		if (i && !per_port)
-			continue;
+		/*
+		 * i == 1 means we are building port counters, set a different
+		 * stats ops without port stats callback.
+		 */
+		if (i && !per_port) {
+			ib_set_device_ops(&ibdev->ib_dev,
+					  &mlx4_ib_hw_stats_ops1);
+
+			return 0;
+		}
 
 		ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs,
 						    &diag[i].offset,
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 882c3c8b..3088c5b 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -19,6 +19,7 @@
 #include <linux/input.h>
 #include <linux/gameport.h>
 #include <linux/jiffies.h>
+#include <linux/seq_buf.h>
 #include <linux/timex.h>
 #include <linux/timekeeping.h>
 
@@ -338,23 +339,24 @@
 
 static void analog_name(struct analog *analog)
 {
-	snprintf(analog->name, sizeof(analog->name), "Analog %d-axis %d-button",
+	struct seq_buf s;
+
+	seq_buf_init(&s, analog->name, sizeof(analog->name));
+	seq_buf_printf(&s, "Analog %d-axis %d-button",
 		 hweight8(analog->mask & ANALOG_AXES_STD),
 		 hweight8(analog->mask & ANALOG_BTNS_STD) + !!(analog->mask & ANALOG_BTNS_CHF) * 2 +
 		 hweight16(analog->mask & ANALOG_BTNS_GAMEPAD) + !!(analog->mask & ANALOG_HBTN_CHF) * 4);
 
 	if (analog->mask & ANALOG_HATS_ALL)
-		snprintf(analog->name, sizeof(analog->name), "%s %d-hat",
-			 analog->name, hweight16(analog->mask & ANALOG_HATS_ALL));
+		seq_buf_printf(&s, " %d-hat",
+			       hweight16(analog->mask & ANALOG_HATS_ALL));
 
 	if (analog->mask & ANALOG_HAT_FCS)
-		strlcat(analog->name, " FCS", sizeof(analog->name));
+		seq_buf_printf(&s, " FCS");
 	if (analog->mask & ANALOG_ANY_CHF)
-		strlcat(analog->name, (analog->mask & ANALOG_SAITEK) ? " Saitek" : " CHF",
-			sizeof(analog->name));
+		seq_buf_printf(&s, (analog->mask & ANALOG_SAITEK) ? " Saitek" : " CHF");
 
-	strlcat(analog->name, (analog->mask & ANALOG_GAMEPAD) ? " gamepad": " joystick",
-		sizeof(analog->name));
+	seq_buf_printf(&s, (analog->mask & ANALOG_GAMEPAD) ? " gamepad" : " joystick");
 }
 
 /*
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index 6c554c1..ea58805 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -92,7 +92,7 @@
 				 id,
 				 USB_TYPE_VENDOR | USB_DIR_IN |
 					USB_RECIP_INTERFACE,
-				 0, 0, buf, IFORCE_MAX_LENGTH, HZ);
+				 0, 0, buf, IFORCE_MAX_LENGTH, 1000);
 	if (status < 0) {
 		dev_err(&iforce_usb->intf->dev,
 			"usb_submit_urb failed: %d\n", status);
diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c
index f89e9aa..7416de8 100644
--- a/drivers/input/joystick/tmdc.c
+++ b/drivers/input/joystick/tmdc.c
@@ -83,7 +83,7 @@
 	const signed char *axes;
 	const short *buttons;
 } tmdc_models[] = {
-	{   1, "ThrustMaster Millenium 3D Inceptor",	  6, 2, { 4, 2 }, { 4, 6 }, tmdc_abs, tmdc_btn_joy },
+	{   1, "ThrustMaster Millennium 3D Inceptor",	  6, 2, { 4, 2 }, { 4, 6 }, tmdc_abs, tmdc_btn_joy },
 	{   3, "ThrustMaster Rage 3D Gamepad",		  2, 0, { 8, 2 }, { 0, 0 }, tmdc_abs, tmdc_btn_pad },
 	{   4, "ThrustMaster Attack Throttle",		  5, 2, { 4, 6 }, { 4, 2 }, tmdc_abs_at, tmdc_btn_at },
 	{   8, "ThrustMaster FragMaster",		  4, 0, { 8, 2 }, { 0, 0 }, tmdc_abs_fm, tmdc_btn_fm },
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index e75650e..0c607da 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -788,4 +788,14 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called pmic-keys.
 
+config KEYBOARD_CYPRESS_SF
+	tristate "Cypress StreetFighter touchkey support"
+	depends on I2C
+	help
+	  Say Y here if you want to enable support for Cypress StreetFighter
+	  touchkeys.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cypress-sf.
+
 endif
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 1d689fd..e3c8648 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -17,6 +17,7 @@
 obj-$(CONFIG_KEYBOARD_CAP11XX)		+= cap11xx.o
 obj-$(CONFIG_KEYBOARD_CLPS711X)		+= clps711x-keypad.o
 obj-$(CONFIG_KEYBOARD_CROS_EC)		+= cros_ec_keyb.o
+obj-$(CONFIG_KEYBOARD_CYPRESS_SF)	+= cypress-sf.o
 obj-$(CONFIG_KEYBOARD_DAVINCI)		+= davinci_keyscan.o
 obj-$(CONFIG_KEYBOARD_DLINK_DIR685)	+= dlink-dir685-touchkeys.o
 obj-$(CONFIG_KEYBOARD_EP93XX)		+= ep93xx_keypad.o
diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c
index 688e2be..7c85343 100644
--- a/drivers/input/keyboard/cap11xx.c
+++ b/drivers/input/keyboard/cap11xx.c
@@ -91,18 +91,21 @@
 	u8 product_id;
 	unsigned int num_channels;
 	unsigned int num_leds;
+	bool no_gain;
 };
 
 enum {
 	CAP1106,
 	CAP1126,
 	CAP1188,
+	CAP1206,
 };
 
 static const struct cap11xx_hw_model cap11xx_devices[] = {
-	[CAP1106] = { .product_id = 0x55, .num_channels = 6, .num_leds = 0 },
-	[CAP1126] = { .product_id = 0x53, .num_channels = 6, .num_leds = 2 },
-	[CAP1188] = { .product_id = 0x50, .num_channels = 8, .num_leds = 8 },
+	[CAP1106] = { .product_id = 0x55, .num_channels = 6, .num_leds = 0, .no_gain = false },
+	[CAP1126] = { .product_id = 0x53, .num_channels = 6, .num_leds = 2, .no_gain = false },
+	[CAP1188] = { .product_id = 0x50, .num_channels = 8, .num_leds = 8, .no_gain = false },
+	[CAP1206] = { .product_id = 0x67, .num_channels = 6, .num_leds = 0, .no_gain = true },
 };
 
 static const struct reg_default cap11xx_reg_defaults[] = {
@@ -378,17 +381,24 @@
 	node = dev->of_node;
 
 	if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) {
-		if (is_power_of_2(gain32) && gain32 <= 8)
+		if (cap->no_gain)
+			dev_warn(dev,
+				 "This version doesn't support sensor gain\n");
+		else if (is_power_of_2(gain32) && gain32 <= 8)
 			gain = ilog2(gain32);
 		else
 			dev_err(dev, "Invalid sensor-gain value %d\n", gain32);
 	}
 
-	if (of_property_read_bool(node, "microchip,irq-active-high")) {
-		error = regmap_update_bits(priv->regmap, CAP11XX_REG_CONFIG2,
-					   CAP11XX_REG_CONFIG2_ALT_POL, 0);
-		if (error)
-			return error;
+	if (id->driver_data != CAP1206) {
+		if (of_property_read_bool(node, "microchip,irq-active-high")) {
+			error = regmap_update_bits(priv->regmap,
+						   CAP11XX_REG_CONFIG2,
+						   CAP11XX_REG_CONFIG2_ALT_POL,
+						   0);
+			if (error)
+				return error;
+		}
 	}
 
 	/* Provide some useful defaults */
@@ -398,11 +408,14 @@
 	of_property_read_u32_array(node, "linux,keycodes",
 				   priv->keycodes, cap->num_channels);
 
-	error = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL,
-				   CAP11XX_REG_MAIN_CONTROL_GAIN_MASK,
-				   gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT);
-	if (error)
-		return error;
+	if (!cap->no_gain) {
+		error = regmap_update_bits(priv->regmap,
+				CAP11XX_REG_MAIN_CONTROL,
+				CAP11XX_REG_MAIN_CONTROL_GAIN_MASK,
+				gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT);
+		if (error)
+			return error;
+	}
 
 	/* Disable autorepeat. The Linux input system has its own handling. */
 	error = regmap_write(priv->regmap, CAP11XX_REG_REPEAT_RATE, 0);
@@ -470,6 +483,7 @@
 	{ .compatible = "microchip,cap1106", },
 	{ .compatible = "microchip,cap1126", },
 	{ .compatible = "microchip,cap1188", },
+	{ .compatible = "microchip,cap1206", },
 	{}
 };
 MODULE_DEVICE_TABLE(of, cap11xx_dt_ids);
@@ -478,6 +492,7 @@
 	{ "cap1106", CAP1106 },
 	{ "cap1126", CAP1126 },
 	{ "cap1188", CAP1188 },
+	{ "cap1206", CAP1206 },
 	{}
 };
 MODULE_DEVICE_TABLE(i2c, cap11xx_i2c_ids);
diff --git a/drivers/input/keyboard/cypress-sf.c b/drivers/input/keyboard/cypress-sf.c
new file mode 100644
index 0000000..c289960
--- /dev/null
+++ b/drivers/input/keyboard/cypress-sf.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Cypress StreetFighter Touchkey Driver
+ *
+ * Copyright (c) 2021 Yassine Oudjana <y.oudjana@protonmail.com>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/regulator/consumer.h>
+
+#define CYPRESS_SF_DEV_NAME "cypress-sf"
+
+#define CYPRESS_SF_REG_BUTTON_STATUS	0x4a
+
+struct cypress_sf_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	struct regulator_bulk_data regulators[2];
+	u32 *keycodes;
+	unsigned long keystates;
+	int num_keys;
+};
+
+static irqreturn_t cypress_sf_irq_handler(int irq, void *devid)
+{
+	struct cypress_sf_data *touchkey = devid;
+	unsigned long keystates, changed;
+	bool new_state;
+	int val, key;
+
+	val = i2c_smbus_read_byte_data(touchkey->client,
+				       CYPRESS_SF_REG_BUTTON_STATUS);
+	if (val < 0) {
+		dev_err(&touchkey->client->dev,
+			"Failed to read button status: %d", val);
+		return IRQ_NONE;
+	}
+	keystates = val;
+
+	bitmap_xor(&changed, &keystates, &touchkey->keystates,
+		   touchkey->num_keys);
+
+	for_each_set_bit(key, &changed, touchkey->num_keys) {
+		new_state = keystates & BIT(key);
+		dev_dbg(&touchkey->client->dev,
+			"Key %d changed to %d", key, new_state);
+		input_report_key(touchkey->input_dev,
+				 touchkey->keycodes[key], new_state);
+	}
+
+	input_sync(touchkey->input_dev);
+	touchkey->keystates = keystates;
+
+	return IRQ_HANDLED;
+}
+
+static int cypress_sf_probe(struct i2c_client *client)
+{
+	struct cypress_sf_data *touchkey;
+	int key, error;
+
+	touchkey = devm_kzalloc(&client->dev, sizeof(*touchkey), GFP_KERNEL);
+	if (!touchkey)
+		return -ENOMEM;
+
+	touchkey->client = client;
+	i2c_set_clientdata(client, touchkey);
+
+	touchkey->regulators[0].supply = "vdd";
+	touchkey->regulators[1].supply = "avdd";
+
+	error = devm_regulator_bulk_get(&client->dev,
+					ARRAY_SIZE(touchkey->regulators),
+					touchkey->regulators);
+	if (error) {
+		dev_err(&client->dev, "Failed to get regulators: %d\n", error);
+		return error;
+	}
+
+	touchkey->num_keys = device_property_read_u32_array(&client->dev,
+							    "linux,keycodes",
+							    NULL, 0);
+	if (touchkey->num_keys < 0) {
+		/* Default key count */
+		touchkey->num_keys = 2;
+	}
+
+	touchkey->keycodes = devm_kcalloc(&client->dev,
+					  touchkey->num_keys,
+					  sizeof(*touchkey->keycodes),
+					  GFP_KERNEL);
+	if (!touchkey->keycodes)
+		return -ENOMEM;
+
+	error = device_property_read_u32_array(&client->dev, "linux,keycodes",
+					       touchkey->keycodes,
+					       touchkey->num_keys);
+
+	if (error) {
+		dev_warn(&client->dev,
+			 "Failed to read keycodes: %d, using defaults\n",
+			 error);
+
+		/* Default keycodes */
+		touchkey->keycodes[0] = KEY_BACK;
+		touchkey->keycodes[1] = KEY_MENU;
+	}
+
+	error = regulator_bulk_enable(ARRAY_SIZE(touchkey->regulators),
+				      touchkey->regulators);
+	if (error) {
+		dev_err(&client->dev,
+			"Failed to enable regulators: %d\n", error);
+		return error;
+	}
+
+	touchkey->input_dev = devm_input_allocate_device(&client->dev);
+	if (!touchkey->input_dev) {
+		dev_err(&client->dev, "Failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	touchkey->input_dev->name = CYPRESS_SF_DEV_NAME;
+	touchkey->input_dev->id.bustype = BUS_I2C;
+
+	for (key = 0; key < touchkey->num_keys; ++key)
+		input_set_capability(touchkey->input_dev,
+				     EV_KEY, touchkey->keycodes[key]);
+
+	error = input_register_device(touchkey->input_dev);
+	if (error) {
+		dev_err(&client->dev,
+			"Failed to register input device: %d\n", error);
+		return error;
+	}
+
+	error = devm_request_threaded_irq(&client->dev, client->irq,
+					  NULL, cypress_sf_irq_handler,
+					  IRQF_ONESHOT,
+					  CYPRESS_SF_DEV_NAME, touchkey);
+	if (error) {
+		dev_err(&client->dev,
+			"Failed to register threaded irq: %d", error);
+		return error;
+	}
+
+	return 0;
+};
+
+static int __maybe_unused cypress_sf_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cypress_sf_data *touchkey = i2c_get_clientdata(client);
+	int error;
+
+	disable_irq(client->irq);
+
+	error = regulator_bulk_disable(ARRAY_SIZE(touchkey->regulators),
+				       touchkey->regulators);
+	if (error) {
+		dev_err(dev, "Failed to disable regulators: %d", error);
+		enable_irq(client->irq);
+		return error;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused cypress_sf_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cypress_sf_data *touchkey = i2c_get_clientdata(client);
+	int error;
+
+	error = regulator_bulk_enable(ARRAY_SIZE(touchkey->regulators),
+				      touchkey->regulators);
+	if (error) {
+		dev_err(dev, "Failed to enable regulators: %d", error);
+		return error;
+	}
+
+	enable_irq(client->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(cypress_sf_pm_ops,
+			 cypress_sf_suspend, cypress_sf_resume);
+
+static struct i2c_device_id cypress_sf_id_table[] = {
+	{ CYPRESS_SF_DEV_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, cypress_sf_id_table);
+
+#ifdef CONFIG_OF
+static const struct of_device_id cypress_sf_of_match[] = {
+	{ .compatible = "cypress,sf3155", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, cypress_sf_of_match);
+#endif
+
+static struct i2c_driver cypress_sf_driver = {
+	.driver = {
+		.name = CYPRESS_SF_DEV_NAME,
+		.pm = &cypress_sf_pm_ops,
+		.of_match_table = of_match_ptr(cypress_sf_of_match),
+	},
+	.id_table = cypress_sf_id_table,
+	.probe_new = cypress_sf_probe,
+};
+module_i2c_driver(cypress_sf_driver);
+
+MODULE_AUTHOR("Yassine Oudjana <y.oudjana@protonmail.com>");
+MODULE_DESCRIPTION("Cypress StreetFighter Touchkey Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c
index e0e931e..272a4f1 100644
--- a/drivers/input/keyboard/ep93xx_keypad.c
+++ b/drivers/input/keyboard/ep93xx_keypad.c
@@ -17,6 +17,7 @@
  * flag.
  */
 
+#include <linux/bits.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
@@ -26,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/soc/cirrus/ep93xx.h>
 #include <linux/platform_data/keypad-ep93xx.h>
+#include <linux/pm_wakeirq.h>
 
 /*
  * Keypad Interface Register offsets
@@ -35,28 +37,28 @@
 #define KEY_REG			0x08	/* Key Value Capture register */
 
 /* Key Scan Initialization Register bit defines */
-#define KEY_INIT_DBNC_MASK	(0x00ff0000)
-#define KEY_INIT_DBNC_SHIFT	(16)
-#define KEY_INIT_DIS3KY		(1<<15)
-#define KEY_INIT_DIAG		(1<<14)
-#define KEY_INIT_BACK		(1<<13)
-#define KEY_INIT_T2		(1<<12)
-#define KEY_INIT_PRSCL_MASK	(0x000003ff)
-#define KEY_INIT_PRSCL_SHIFT	(0)
+#define KEY_INIT_DBNC_MASK	GENMASK(23, 16)
+#define KEY_INIT_DBNC_SHIFT	16
+#define KEY_INIT_DIS3KY		BIT(15)
+#define KEY_INIT_DIAG		BIT(14)
+#define KEY_INIT_BACK		BIT(13)
+#define KEY_INIT_T2		BIT(12)
+#define KEY_INIT_PRSCL_MASK	GENMASK(9, 0)
+#define KEY_INIT_PRSCL_SHIFT	0
 
 /* Key Scan Diagnostic Register bit defines */
-#define KEY_DIAG_MASK		(0x0000003f)
-#define KEY_DIAG_SHIFT		(0)
+#define KEY_DIAG_MASK		GENMASK(5, 0)
+#define KEY_DIAG_SHIFT		0
 
 /* Key Value Capture Register bit defines */
-#define KEY_REG_K		(1<<15)
-#define KEY_REG_INT		(1<<14)
-#define KEY_REG_2KEYS		(1<<13)
-#define KEY_REG_1KEY		(1<<12)
-#define KEY_REG_KEY2_MASK	(0x00000fc0)
-#define KEY_REG_KEY2_SHIFT	(6)
-#define KEY_REG_KEY1_MASK	(0x0000003f)
-#define KEY_REG_KEY1_SHIFT	(0)
+#define KEY_REG_K		BIT(15)
+#define KEY_REG_INT		BIT(14)
+#define KEY_REG_2KEYS		BIT(13)
+#define KEY_REG_1KEY		BIT(12)
+#define KEY_REG_KEY2_MASK	GENMASK(11, 6)
+#define KEY_REG_KEY2_SHIFT	6
+#define KEY_REG_KEY1_MASK	GENMASK(5, 0)
+#define KEY_REG_KEY1_SHIFT	0
 
 #define EP93XX_MATRIX_SIZE	(EP93XX_MATRIX_ROWS * EP93XX_MATRIX_COLS)
 
@@ -175,8 +177,7 @@
 }
 
 
-#ifdef CONFIG_PM_SLEEP
-static int ep93xx_keypad_suspend(struct device *dev)
+static int __maybe_unused ep93xx_keypad_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct ep93xx_keypad *keypad = platform_get_drvdata(pdev);
@@ -191,21 +192,15 @@
 
 	mutex_unlock(&input_dev->mutex);
 
-	if (device_may_wakeup(&pdev->dev))
-		enable_irq_wake(keypad->irq);
-
 	return 0;
 }
 
-static int ep93xx_keypad_resume(struct device *dev)
+static int __maybe_unused ep93xx_keypad_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct ep93xx_keypad *keypad = platform_get_drvdata(pdev);
 	struct input_dev *input_dev = keypad->input_dev;
 
-	if (device_may_wakeup(&pdev->dev))
-		disable_irq_wake(keypad->irq);
-
 	mutex_lock(&input_dev->mutex);
 
 	if (input_device_enabled(input_dev)) {
@@ -220,11 +215,17 @@
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ep93xx_keypad_pm_ops,
 			 ep93xx_keypad_suspend, ep93xx_keypad_resume);
 
+static void ep93xx_keypad_release_gpio_action(void *_pdev)
+{
+	struct platform_device *pdev = _pdev;
+
+	ep93xx_keypad_release_gpio(pdev);
+}
+
 static int ep93xx_keypad_probe(struct platform_device *pdev)
 {
 	struct ep93xx_keypad *keypad;
@@ -233,61 +234,46 @@
 	struct resource *res;
 	int err;
 
-	keypad = kzalloc(sizeof(struct ep93xx_keypad), GFP_KERNEL);
+	keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad), GFP_KERNEL);
 	if (!keypad)
 		return -ENOMEM;
 
 	keypad->pdata = dev_get_platdata(&pdev->dev);
-	if (!keypad->pdata) {
-		err = -EINVAL;
-		goto failed_free;
-	}
+	if (!keypad->pdata)
+		return -EINVAL;
 
 	keymap_data = keypad->pdata->keymap_data;
-	if (!keymap_data) {
-		err = -EINVAL;
-		goto failed_free;
-	}
+	if (!keymap_data)
+		return -EINVAL;
 
 	keypad->irq = platform_get_irq(pdev, 0);
-	if (keypad->irq < 0) {
-		err = keypad->irq;
-		goto failed_free;
-	}
+	if (keypad->irq < 0)
+		return keypad->irq;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		err = -ENXIO;
-		goto failed_free;
-	}
+	if (!res)
+		return -ENXIO;
 
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (!res) {
-		err = -EBUSY;
-		goto failed_free;
-	}
-
-	keypad->mmio_base = ioremap(res->start, resource_size(res));
-	if (keypad->mmio_base == NULL) {
-		err = -ENXIO;
-		goto failed_free_mem;
-	}
+	keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(keypad->mmio_base))
+		return PTR_ERR(keypad->mmio_base);
 
 	err = ep93xx_keypad_acquire_gpio(pdev);
 	if (err)
-		goto failed_free_io;
+		return err;
 
-	keypad->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(keypad->clk)) {
-		err = PTR_ERR(keypad->clk);
-		goto failed_free_gpio;
-	}
+	err = devm_add_action_or_reset(&pdev->dev,
+				       ep93xx_keypad_release_gpio_action, pdev);
+	if (err)
+		return err;
 
-	input_dev = input_allocate_device();
-	if (!input_dev) {
-		err = -ENOMEM;
-		goto failed_put_clk;
-	}
+	keypad->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(keypad->clk))
+		return PTR_ERR(keypad->clk);
+
+	input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!input_dev)
+		return -ENOMEM;
 
 	keypad->input_dev = input_dev;
 
@@ -295,70 +281,40 @@
 	input_dev->id.bustype = BUS_HOST;
 	input_dev->open = ep93xx_keypad_open;
 	input_dev->close = ep93xx_keypad_close;
-	input_dev->dev.parent = &pdev->dev;
 
 	err = matrix_keypad_build_keymap(keymap_data, NULL,
 					 EP93XX_MATRIX_ROWS, EP93XX_MATRIX_COLS,
 					 keypad->keycodes, input_dev);
 	if (err)
-		goto failed_free_dev;
+		return err;
 
 	if (keypad->pdata->flags & EP93XX_KEYPAD_AUTOREPEAT)
 		__set_bit(EV_REP, input_dev->evbit);
 	input_set_drvdata(input_dev, keypad);
 
-	err = request_irq(keypad->irq, ep93xx_keypad_irq_handler,
-			  0, pdev->name, keypad);
+	err = devm_request_irq(&pdev->dev, keypad->irq,
+			       ep93xx_keypad_irq_handler,
+			       0, pdev->name, keypad);
 	if (err)
-		goto failed_free_dev;
+		return err;
 
 	err = input_register_device(input_dev);
 	if (err)
-		goto failed_free_irq;
+		return err;
 
 	platform_set_drvdata(pdev, keypad);
+
 	device_init_wakeup(&pdev->dev, 1);
+	err = dev_pm_set_wake_irq(&pdev->dev, keypad->irq);
+	if (err)
+		dev_warn(&pdev->dev, "failed to set up wakeup irq: %d\n", err);
 
 	return 0;
-
-failed_free_irq:
-	free_irq(keypad->irq, keypad);
-failed_free_dev:
-	input_free_device(input_dev);
-failed_put_clk:
-	clk_put(keypad->clk);
-failed_free_gpio:
-	ep93xx_keypad_release_gpio(pdev);
-failed_free_io:
-	iounmap(keypad->mmio_base);
-failed_free_mem:
-	release_mem_region(res->start, resource_size(res));
-failed_free:
-	kfree(keypad);
-	return err;
 }
 
 static int ep93xx_keypad_remove(struct platform_device *pdev)
 {
-	struct ep93xx_keypad *keypad = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	free_irq(keypad->irq, keypad);
-
-	if (keypad->enabled)
-		clk_disable(keypad->clk);
-	clk_put(keypad->clk);
-
-	input_unregister_device(keypad->input_dev);
-
-	ep93xx_keypad_release_gpio(pdev);
-
-	iounmap(keypad->mmio_base);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
-
-	kfree(keypad);
+	dev_pm_clear_wake_irq(&pdev->dev);
 
 	return 0;
 }
diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c
index 40d6e50..230ab3d 100644
--- a/drivers/input/keyboard/mpr121_touchkey.c
+++ b/drivers/input/keyboard/mpr121_touchkey.c
@@ -107,9 +107,9 @@
 		return ERR_PTR(err);
 	}
 
-	err = devm_add_action(dev, mpr121_vdd_supply_disable, vdd_supply);
+	err = devm_add_action_or_reset(dev, mpr121_vdd_supply_disable,
+				       vdd_supply);
 	if (err) {
-		regulator_disable(vdd_supply);
 		dev_err(dev, "failed to add disable regulator action: %d\n",
 			err);
 		return ERR_PTR(err);
diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c
index dbe836c..eb3a687 100644
--- a/drivers/input/keyboard/omap-keypad.c
+++ b/drivers/input/keyboard/omap-keypad.c
@@ -190,8 +190,7 @@
 	row_shift = get_count_order(pdata->cols);
 	keycodemax = pdata->rows << row_shift;
 
-	omap_kp = kzalloc(sizeof(struct omap_kp) +
-			keycodemax * sizeof(unsigned short), GFP_KERNEL);
+	omap_kp = kzalloc(struct_size(omap_kp, keymap, keycodemax), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!omap_kp || !input_dev) {
 		kfree(omap_kp);
diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c
index 6218b1c..632cd6c 100644
--- a/drivers/input/keyboard/tm2-touchkey.c
+++ b/drivers/input/keyboard/tm2-touchkey.c
@@ -156,6 +156,8 @@
 		goto out;
 	}
 
+	input_event(touchkey->input_dev, EV_MSC, MSC_SCAN, index);
+
 	if (data & TM2_TOUCHKEY_BIT_PRESS_EV) {
 		for (i = 0; i < touchkey->num_keycodes; i++)
 			input_report_key(touchkey->input_dev,
@@ -250,6 +252,11 @@
 	touchkey->input_dev->name = TM2_TOUCHKEY_DEV_NAME;
 	touchkey->input_dev->id.bustype = BUS_I2C;
 
+	touchkey->input_dev->keycode = touchkey->keycodes;
+	touchkey->input_dev->keycodemax = touchkey->num_keycodes;
+	touchkey->input_dev->keycodesize = sizeof(touchkey->keycodes[0]);
+
+	input_set_capability(touchkey->input_dev, EV_MSC, MSC_SCAN);
 	for (i = 0; i < touchkey->num_keycodes; i++)
 		input_set_capability(touchkey->input_dev, EV_KEY,
 				     touchkey->keycodes[i]);
diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c
index e64368a..a3b5f88 100644
--- a/drivers/input/misc/adxl34x-i2c.c
+++ b/drivers/input/misc/adxl34x-i2c.c
@@ -103,7 +103,9 @@
 {
 	struct adxl34x *ac = i2c_get_clientdata(client);
 
-	return adxl34x_remove(ac);
+	adxl34x_remove(ac);
+
+	return 0;
 }
 
 static int __maybe_unused adxl34x_i2c_suspend(struct device *dev)
diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c
index df6afa4..6e51c9bc 100644
--- a/drivers/input/misc/adxl34x-spi.c
+++ b/drivers/input/misc/adxl34x-spi.c
@@ -91,7 +91,9 @@
 {
 	struct adxl34x *ac = spi_get_drvdata(spi);
 
-	return adxl34x_remove(ac);
+	adxl34x_remove(ac);
+
+	return 0;
 }
 
 static int __maybe_unused adxl34x_spi_suspend(struct device *dev)
diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c
index 4cc4e8f..a4af314 100644
--- a/drivers/input/misc/adxl34x.c
+++ b/drivers/input/misc/adxl34x.c
@@ -237,7 +237,7 @@
 
 static void adxl34x_get_triple(struct adxl34x *ac, struct axis_triple *axis)
 {
-	short buf[3];
+	__le16 buf[3];
 
 	ac->bops->read_block(ac->dev, DATAX0, DATAZ1 - DATAX0 + 1, buf);
 
@@ -896,15 +896,13 @@
 }
 EXPORT_SYMBOL_GPL(adxl34x_probe);
 
-int adxl34x_remove(struct adxl34x *ac)
+void adxl34x_remove(struct adxl34x *ac)
 {
 	sysfs_remove_group(&ac->dev->kobj, &adxl34x_attr_group);
 	free_irq(ac->irq, ac);
 	input_unregister_device(ac->input);
 	dev_dbg(ac->dev, "unregistered accelerometer\n");
 	kfree(ac);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(adxl34x_remove);
 
diff --git a/drivers/input/misc/adxl34x.h b/drivers/input/misc/adxl34x.h
index 83a0eec..febf852 100644
--- a/drivers/input/misc/adxl34x.h
+++ b/drivers/input/misc/adxl34x.h
@@ -25,6 +25,6 @@
 struct adxl34x *adxl34x_probe(struct device *dev, int irq,
 			      bool fifo_delay_default,
 			      const struct adxl34x_bus_ops *bops);
-int adxl34x_remove(struct adxl34x *ac);
+void adxl34x_remove(struct adxl34x *ac);
 
 #endif
diff --git a/drivers/input/misc/ariel-pwrbutton.c b/drivers/input/misc/ariel-pwrbutton.c
index 17bbaac..cdc8071 100644
--- a/drivers/input/misc/ariel-pwrbutton.c
+++ b/drivers/input/misc/ariel-pwrbutton.c
@@ -149,12 +149,19 @@
 };
 MODULE_DEVICE_TABLE(of, ariel_pwrbutton_of_match);
 
+static const struct spi_device_id ariel_pwrbutton_spi_ids[] = {
+	{ .name = "wyse-ariel-ec-input" },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, ariel_pwrbutton_spi_ids);
+
 static struct spi_driver ariel_pwrbutton_driver = {
 	.driver = {
 		.name = "dell-wyse-ariel-ec-input",
 		.of_match_table = ariel_pwrbutton_of_match,
 	},
 	.probe = ariel_pwrbutton_probe,
+	.id_table = ariel_pwrbutton_spi_ids,
 };
 module_spi_driver(ariel_pwrbutton_driver);
 
diff --git a/drivers/input/misc/cpcap-pwrbutton.c b/drivers/input/misc/cpcap-pwrbutton.c
index 0abef63..879790b 100644
--- a/drivers/input/misc/cpcap-pwrbutton.c
+++ b/drivers/input/misc/cpcap-pwrbutton.c
@@ -54,9 +54,13 @@
 static int cpcap_power_button_probe(struct platform_device *pdev)
 {
 	struct cpcap_power_button *button;
-	int irq = platform_get_irq(pdev, 0);
+	int irq;
 	int err;
 
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
 	button = devm_kmalloc(&pdev->dev, sizeof(*button), GFP_KERNEL);
 	if (!button)
 		return -ENOMEM;
@@ -73,7 +77,6 @@
 
 	button->idev->name = "cpcap-pwrbutton";
 	button->idev->phys = "cpcap-pwrbutton/input0";
-	button->idev->dev.parent = button->dev;
 	input_set_capability(button->idev, EV_KEY, KEY_POWER);
 
 	err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c
index 0d09ffe..4369d3c 100644
--- a/drivers/input/misc/max77693-haptic.c
+++ b/drivers/input/misc/max77693-haptic.c
@@ -424,5 +424,4 @@
 MODULE_AUTHOR("Jaewon Kim <jaewon02.kim@samsung.com>");
 MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>");
 MODULE_DESCRIPTION("MAXIM 77693/77843 Haptic driver");
-MODULE_ALIAS("platform:max77693-haptic");
 MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c
index ffab4a4..4770cb5 100644
--- a/drivers/input/misc/max8925_onkey.c
+++ b/drivers/input/misc/max8925_onkey.c
@@ -1,4 +1,4 @@
-/**
+/*
  * MAX8925 ONKEY driver
  *
  * Copyright (C) 2009 Marvell International Ltd.
diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c
index 1e1baed..f9b05cf 100644
--- a/drivers/input/misc/palmas-pwrbutton.c
+++ b/drivers/input/misc/palmas-pwrbutton.c
@@ -210,6 +210,11 @@
 	INIT_DELAYED_WORK(&pwron->input_work, palmas_power_button_work);
 
 	pwron->irq = platform_get_irq(pdev, 0);
+	if (pwron->irq < 0) {
+		error = pwron->irq;
+		goto err_free_input;
+	}
+
 	error = request_threaded_irq(pwron->irq, NULL, pwron_irq,
 				     IRQF_TRIGGER_HIGH |
 					IRQF_TRIGGER_LOW |
diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c
index 3360960..89af524 100644
--- a/drivers/input/misc/pm8941-pwrkey.c
+++ b/drivers/input/misc/pm8941-pwrkey.c
@@ -29,6 +29,7 @@
 #define PON_PS_HOLD_RST_CTL2		0x5b
 #define  PON_PS_HOLD_ENABLE		BIT(7)
 #define  PON_PS_HOLD_TYPE_MASK		0x0f
+#define  PON_PS_HOLD_TYPE_WARM_RESET	1
 #define  PON_PS_HOLD_TYPE_SHUTDOWN	4
 #define  PON_PS_HOLD_TYPE_HARD_RESET	7
 
@@ -99,7 +100,10 @@
 		break;
 	case SYS_RESTART:
 	default:
-		reset_type = PON_PS_HOLD_TYPE_HARD_RESET;
+		if (reboot_mode == REBOOT_WARM)
+			reset_type = PON_PS_HOLD_TYPE_WARM_RESET;
+		else
+			reset_type = PON_PS_HOLD_TYPE_HARD_RESET;
 		break;
 	}
 
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 4ff5cd2..3d17a0b 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -542,6 +542,7 @@
 	.remove = xenkbd_remove,
 	.resume = xenkbd_resume,
 	.otherend_changed = xenkbd_backend_changed,
+	.not_essential = true,
 };
 
 static int __init xenkbd_init(void)
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 2d0bc02..956d9cd 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -517,6 +517,19 @@
 	case 0x16008020U:
 	case 0x26800010U:
 	case 0x36808000U:
+
+		/*
+		 * This firmware misreport coordinates for trackpoint
+		 * occasionally. Discard packets outside of [-127, 127] range
+		 * to prevent cursor jumps.
+		 */
+		if (packet[4] == 0x80 || packet[5] == 0x80 ||
+		    packet[1] >> 7 == packet[4] >> 7 ||
+		    packet[2] >> 7 == packet[5] >> 7) {
+			elantech_debug("discarding packet [%6ph]\n", packet);
+			break;
+
+		}
 		x = packet[4] - (int)((packet[1]^0x80) << 1);
 		y = (int)((packet[2]^0x80) << 1) - packet[5];
 
diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c
index 24f31a5..50a0134 100644
--- a/drivers/input/rmi4/rmi_bus.c
+++ b/drivers/input/rmi4/rmi_bus.c
@@ -90,6 +90,7 @@
 
 	rmi_dev->dev.bus = &rmi_bus_type;
 	rmi_dev->dev.type = &rmi_device_type;
+	rmi_dev->dev.parent = xport->dev;
 
 	xport->rmi_dev = rmi_dev;
 
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index a5a0035..aedd055 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -273,6 +273,13 @@
 		},
 	},
 	{
+		/* Fujitsu Lifebook T725 laptop */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"),
+		},
+	},
+	{
 		/* Fujitsu Lifebook U745 */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
@@ -841,6 +848,13 @@
 		},
 	},
 	{
+		/* Fujitsu Lifebook T725 laptop */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"),
+		},
+	},
+	{
 		/* Fujitsu U574 laptop */
 		/* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
 		.matches = {
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index d4e7473..2f6adfb 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -425,6 +425,7 @@
 config TOUCHSCREEN_ILI210X
 	tristate "Ilitek ILI210X based touchscreen"
 	depends on I2C
+	select CRC_CCITT
 	help
 	  Say Y here if you have a ILI210X based touchscreen
 	  controller. This driver supports models ILI2102,
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 7d34100..39a8127 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -6,6 +6,7 @@
 # Each configuration option enables a list of files.
 
 wm97xx-ts-y := wm97xx-core.o
+goodix_ts-y := goodix.o goodix_fwupload.o
 
 obj-$(CONFIG_TOUCHSCREEN_88PM860X)	+= 88pm860x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_AD7877)	+= ad7877.o
@@ -44,7 +45,7 @@
 obj-$(CONFIG_TOUCHSCREEN_EGALAX_SERIAL)	+= egalax_ts_serial.o
 obj-$(CONFIG_TOUCHSCREEN_EXC3000)	+= exc3000.o
 obj-$(CONFIG_TOUCHSCREEN_FUJITSU)	+= fujitsu_ts.o
-obj-$(CONFIG_TOUCHSCREEN_GOODIX)	+= goodix.o
+obj-$(CONFIG_TOUCHSCREEN_GOODIX)	+= goodix_ts.o
 obj-$(CONFIG_TOUCHSCREEN_HIDEEP)	+= hideep.o
 obj-$(CONFIG_TOUCHSCREEN_ILI210X)	+= ili210x.o
 obj-$(CONFIG_TOUCHSCREEN_ILITEK)	+= ilitek_ts_i2c.o
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index f113a27..a25a77d 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -101,10 +101,6 @@
 	struct spi_device	*spi;
 	struct regulator	*reg;
 
-#if IS_ENABLED(CONFIG_HWMON)
-	struct device		*hwmon;
-#endif
-
 	u16			model;
 	u16			vref_mv;
 	u16			vref_delay_usecs;
@@ -142,13 +138,18 @@
 
 	int			(*filter)(void *data, int data_idx, int *val);
 	void			*filter_data;
-	void			(*filter_cleanup)(void *data);
 	int			(*get_pendown_state)(void);
 	int			gpio_pendown;
 
 	void			(*wait_for_sync)(void);
 };
 
+enum ads7846_filter {
+	ADS7846_FILTER_OK,
+	ADS7846_FILTER_REPEAT,
+	ADS7846_FILTER_IGNORE,
+};
+
 /* leave chip selected when we're done, for quicker re-select? */
 #if	0
 #define	CS_CHANGE(xfer)	((xfer).cs_change = 1)
@@ -549,6 +550,8 @@
 
 static int ads784x_hwmon_register(struct spi_device *spi, struct ads7846 *ts)
 {
+	struct device *hwmon;
+
 	/* hwmon sensors need a reference voltage */
 	switch (ts->model) {
 	case 7846:
@@ -569,17 +572,11 @@
 		break;
 	}
 
-	ts->hwmon = hwmon_device_register_with_groups(&spi->dev, spi->modalias,
-						      ts, ads7846_attr_groups);
+	hwmon = devm_hwmon_device_register_with_groups(&spi->dev,
+						       spi->modalias, ts,
+						       ads7846_attr_groups);
 
-	return PTR_ERR_OR_ZERO(ts->hwmon);
-}
-
-static void ads784x_hwmon_unregister(struct spi_device *spi,
-				     struct ads7846 *ts)
-{
-	if (ts->hwmon)
-		hwmon_device_unregister(ts->hwmon);
+	return PTR_ERR_OR_ZERO(hwmon);
 }
 
 #else
@@ -588,11 +585,6 @@
 {
 	return 0;
 }
-
-static inline void ads784x_hwmon_unregister(struct spi_device *spi,
-					    struct ads7846 *ts)
-{
-}
 #endif
 
 static ssize_t ads7846_pen_down_show(struct device *dev,
@@ -1014,8 +1006,8 @@
 		ts->get_pendown_state = pdata->get_pendown_state;
 	} else if (gpio_is_valid(pdata->gpio_pendown)) {
 
-		err = gpio_request_one(pdata->gpio_pendown, GPIOF_IN,
-				       "ads7846_pendown");
+		err = devm_gpio_request_one(&spi->dev, pdata->gpio_pendown,
+					    GPIOF_IN, "ads7846_pendown");
 		if (err) {
 			dev_err(&spi->dev,
 				"failed to request/setup pendown GPIO%d: %d\n",
@@ -1212,24 +1204,30 @@
 }
 #endif
 
+static void ads7846_regulator_disable(void *regulator)
+{
+	regulator_disable(regulator);
+}
+
 static int ads7846_probe(struct spi_device *spi)
 {
 	const struct ads7846_platform_data *pdata;
 	struct ads7846 *ts;
+	struct device *dev = &spi->dev;
 	struct ads7846_packet *packet;
 	struct input_dev *input_dev;
 	unsigned long irq_flags;
 	int err;
 
 	if (!spi->irq) {
-		dev_dbg(&spi->dev, "no IRQ?\n");
+		dev_dbg(dev, "no IRQ?\n");
 		return -EINVAL;
 	}
 
 	/* don't exceed max specified sample rate */
 	if (spi->max_speed_hz > (125000 * SAMPLE_BITS)) {
-		dev_err(&spi->dev, "f(sample) %d KHz?\n",
-				(spi->max_speed_hz/SAMPLE_BITS)/1000);
+		dev_err(dev, "f(sample) %d KHz?\n",
+			(spi->max_speed_hz/SAMPLE_BITS)/1000);
 		return -EINVAL;
 	}
 
@@ -1245,13 +1243,17 @@
 	if (err < 0)
 		return err;
 
-	ts = kzalloc(sizeof(struct ads7846), GFP_KERNEL);
-	packet = kzalloc(sizeof(struct ads7846_packet), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!ts || !packet || !input_dev) {
-		err = -ENOMEM;
-		goto err_free_mem;
-	}
+	ts = devm_kzalloc(dev, sizeof(struct ads7846), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	packet = devm_kzalloc(dev, sizeof(struct ads7846_packet), GFP_KERNEL);
+	if (!packet)
+		return -ENOMEM;
+
+	input_dev = devm_input_allocate_device(dev);
+	if (!input_dev)
+		return -ENOMEM;
 
 	spi_set_drvdata(spi, ts);
 
@@ -1262,13 +1264,11 @@
 	mutex_init(&ts->lock);
 	init_waitqueue_head(&ts->wait);
 
-	pdata = dev_get_platdata(&spi->dev);
+	pdata = dev_get_platdata(dev);
 	if (!pdata) {
-		pdata = ads7846_probe_dt(&spi->dev);
-		if (IS_ERR(pdata)) {
-			err = PTR_ERR(pdata);
-			goto err_free_mem;
-		}
+		pdata = ads7846_probe_dt(dev);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
 	}
 
 	ts->model = pdata->model ? : 7846;
@@ -1276,15 +1276,7 @@
 	ts->x_plate_ohms = pdata->x_plate_ohms ? : 400;
 	ts->vref_mv = pdata->vref_mv;
 
-	if (pdata->filter != NULL) {
-		if (pdata->filter_init != NULL) {
-			err = pdata->filter_init(pdata, &ts->filter_data);
-			if (err < 0)
-				goto err_free_mem;
-		}
-		ts->filter = pdata->filter;
-		ts->filter_cleanup = pdata->filter_cleanup;
-	} else if (pdata->debounce_max) {
+	if (pdata->debounce_max) {
 		ts->debounce_max = pdata->debounce_max;
 		if (ts->debounce_max < 2)
 			ts->debounce_max = 2;
@@ -1298,7 +1290,7 @@
 
 	err = ads7846_setup_pendown(spi, ts, pdata);
 	if (err)
-		goto err_cleanup_filter;
+		return err;
 
 	if (pdata->penirq_recheck_delay_usecs)
 		ts->penirq_recheck_delay_usecs =
@@ -1306,15 +1298,16 @@
 
 	ts->wait_for_sync = pdata->wait_for_sync ? : null_wait_for_sync;
 
-	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&spi->dev));
+	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(dev));
 	snprintf(ts->name, sizeof(ts->name), "ADS%d Touchscreen", ts->model);
 
 	input_dev->name = ts->name;
 	input_dev->phys = ts->phys;
-	input_dev->dev.parent = &spi->dev;
 
-	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
-	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+	input_dev->id.bustype = BUS_SPI;
+	input_dev->id.product = pdata->model;
+
+	input_set_capability(input_dev, EV_KEY, BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X,
 			pdata->x_min ? : 0,
 			pdata->x_max ? : MAX_12BIT,
@@ -1345,125 +1338,84 @@
 
 	ads7846_setup_spi_msg(ts, pdata);
 
-	ts->reg = regulator_get(&spi->dev, "vcc");
+	ts->reg = devm_regulator_get(dev, "vcc");
 	if (IS_ERR(ts->reg)) {
 		err = PTR_ERR(ts->reg);
-		dev_err(&spi->dev, "unable to get regulator: %d\n", err);
-		goto err_free_gpio;
+		dev_err(dev, "unable to get regulator: %d\n", err);
+		return err;
 	}
 
 	err = regulator_enable(ts->reg);
 	if (err) {
-		dev_err(&spi->dev, "unable to enable regulator: %d\n", err);
-		goto err_put_regulator;
+		dev_err(dev, "unable to enable regulator: %d\n", err);
+		return err;
 	}
 
+	err = devm_add_action_or_reset(dev, ads7846_regulator_disable, ts->reg);
+	if (err)
+		return err;
+
 	irq_flags = pdata->irq_flags ? : IRQF_TRIGGER_FALLING;
 	irq_flags |= IRQF_ONESHOT;
 
-	err = request_threaded_irq(spi->irq, ads7846_hard_irq, ads7846_irq,
-				   irq_flags, spi->dev.driver->name, ts);
-	if (err && !pdata->irq_flags) {
-		dev_info(&spi->dev,
+	err = devm_request_threaded_irq(dev, spi->irq,
+					ads7846_hard_irq, ads7846_irq,
+					irq_flags, dev->driver->name, ts);
+	if (err && err != -EPROBE_DEFER && !pdata->irq_flags) {
+		dev_info(dev,
 			"trying pin change workaround on irq %d\n", spi->irq);
 		irq_flags |= IRQF_TRIGGER_RISING;
-		err = request_threaded_irq(spi->irq,
-				  ads7846_hard_irq, ads7846_irq,
-				  irq_flags, spi->dev.driver->name, ts);
+		err = devm_request_threaded_irq(dev, spi->irq,
+						ads7846_hard_irq, ads7846_irq,
+						irq_flags, dev->driver->name,
+						ts);
 	}
 
 	if (err) {
-		dev_dbg(&spi->dev, "irq %d busy?\n", spi->irq);
-		goto err_disable_regulator;
+		dev_dbg(dev, "irq %d busy?\n", spi->irq);
+		return err;
 	}
 
 	err = ads784x_hwmon_register(spi, ts);
 	if (err)
-		goto err_free_irq;
+		return err;
 
-	dev_info(&spi->dev, "touchscreen, irq %d\n", spi->irq);
+	dev_info(dev, "touchscreen, irq %d\n", spi->irq);
 
 	/*
 	 * Take a first sample, leaving nPENIRQ active and vREF off; avoid
 	 * the touchscreen, in case it's not connected.
 	 */
 	if (ts->model == 7845)
-		ads7845_read12_ser(&spi->dev, PWRDOWN);
+		ads7845_read12_ser(dev, PWRDOWN);
 	else
-		(void) ads7846_read12_ser(&spi->dev, READ_12BIT_SER(vaux));
+		(void) ads7846_read12_ser(dev, READ_12BIT_SER(vaux));
 
-	err = sysfs_create_group(&spi->dev.kobj, &ads784x_attr_group);
+	err = devm_device_add_group(dev, &ads784x_attr_group);
 	if (err)
-		goto err_remove_hwmon;
+		return err;
 
 	err = input_register_device(input_dev);
 	if (err)
-		goto err_remove_attr_group;
+		return err;
 
-	device_init_wakeup(&spi->dev, pdata->wakeup);
+	device_init_wakeup(dev, pdata->wakeup);
 
 	/*
 	 * If device does not carry platform data we must have allocated it
 	 * when parsing DT data.
 	 */
-	if (!dev_get_platdata(&spi->dev))
-		devm_kfree(&spi->dev, (void *)pdata);
+	if (!dev_get_platdata(dev))
+		devm_kfree(dev, (void *)pdata);
 
 	return 0;
-
- err_remove_attr_group:
-	sysfs_remove_group(&spi->dev.kobj, &ads784x_attr_group);
- err_remove_hwmon:
-	ads784x_hwmon_unregister(spi, ts);
- err_free_irq:
-	free_irq(spi->irq, ts);
- err_disable_regulator:
-	regulator_disable(ts->reg);
- err_put_regulator:
-	regulator_put(ts->reg);
- err_free_gpio:
-	if (!ts->get_pendown_state)
-		gpio_free(ts->gpio_pendown);
- err_cleanup_filter:
-	if (ts->filter_cleanup)
-		ts->filter_cleanup(ts->filter_data);
- err_free_mem:
-	input_free_device(input_dev);
-	kfree(packet);
-	kfree(ts);
-	return err;
 }
 
 static int ads7846_remove(struct spi_device *spi)
 {
 	struct ads7846 *ts = spi_get_drvdata(spi);
 
-	sysfs_remove_group(&spi->dev.kobj, &ads784x_attr_group);
-
-	ads7846_disable(ts);
-	free_irq(ts->spi->irq, ts);
-
-	input_unregister_device(ts->input);
-
-	ads784x_hwmon_unregister(spi, ts);
-
-	regulator_put(ts->reg);
-
-	if (!ts->get_pendown_state) {
-		/*
-		 * If we are not using specialized pendown method we must
-		 * have been relying on gpio we set up ourselves.
-		 */
-		gpio_free(ts->gpio_pendown);
-	}
-
-	if (ts->filter_cleanup)
-		ts->filter_cleanup(ts->filter_data);
-
-	kfree(ts->packet);
-	kfree(ts);
-
-	dev_dbg(&spi->dev, "unregistered touchscreen\n");
+	ads7846_stop(ts);
 
 	return 0;
 }
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index 68f542b..7e13a66 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -1439,11 +1439,11 @@
 	if (error)
 		return error;
 
-	error = devm_add_action(&client->dev, elants_i2c_power_off, ts);
+	error = devm_add_action_or_reset(&client->dev,
+					 elants_i2c_power_off, ts);
 	if (error) {
 		dev_err(&client->dev,
 			"failed to install power off action: %d\n", error);
-		elants_i2c_power_off(ts);
 		return error;
 	}
 
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index 4f53d3c..b5cc917 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -14,20 +14,15 @@
 #include <linux/kernel.h>
 #include <linux/dmi.h>
 #include <linux/firmware.h>
-#include <linux/gpio/consumer.h>
-#include <linux/i2c.h>
-#include <linux/input.h>
-#include <linux/input/mt.h>
-#include <linux/input/touchscreen.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/of.h>
 #include <asm/unaligned.h>
+#include "goodix.h"
 
 #define GOODIX_GPIO_INT_NAME		"irq"
 #define GOODIX_GPIO_RST_NAME		"reset"
@@ -38,22 +33,11 @@
 #define GOODIX_CONTACT_SIZE		8
 #define GOODIX_MAX_CONTACT_SIZE		9
 #define GOODIX_MAX_CONTACTS		10
-#define GOODIX_MAX_KEYS			7
 
 #define GOODIX_CONFIG_MIN_LENGTH	186
 #define GOODIX_CONFIG_911_LENGTH	186
 #define GOODIX_CONFIG_967_LENGTH	228
 #define GOODIX_CONFIG_GT9X_LENGTH	240
-#define GOODIX_CONFIG_MAX_LENGTH	240
-
-/* Register defines */
-#define GOODIX_REG_COMMAND		0x8040
-#define GOODIX_CMD_SCREEN_OFF		0x05
-
-#define GOODIX_READ_COOR_ADDR		0x814E
-#define GOODIX_GT1X_REG_CONFIG_DATA	0x8050
-#define GOODIX_GT9X_REG_CONFIG_DATA	0x8047
-#define GOODIX_REG_ID			0x8140
 
 #define GOODIX_BUFFER_STATUS_READY	BIT(7)
 #define GOODIX_HAVE_KEY			BIT(4)
@@ -68,55 +52,11 @@
 #define ACPI_GPIO_SUPPORT
 #endif
 
-struct goodix_ts_data;
-
-enum goodix_irq_pin_access_method {
-	IRQ_PIN_ACCESS_NONE,
-	IRQ_PIN_ACCESS_GPIO,
-	IRQ_PIN_ACCESS_ACPI_GPIO,
-	IRQ_PIN_ACCESS_ACPI_METHOD,
-};
-
-struct goodix_chip_data {
-	u16 config_addr;
-	int config_len;
-	int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len);
-	void (*calc_config_checksum)(struct goodix_ts_data *ts);
-};
-
 struct goodix_chip_id {
 	const char *id;
 	const struct goodix_chip_data *data;
 };
 
-#define GOODIX_ID_MAX_LEN	4
-
-struct goodix_ts_data {
-	struct i2c_client *client;
-	struct input_dev *input_dev;
-	const struct goodix_chip_data *chip;
-	struct touchscreen_properties prop;
-	unsigned int max_touch_num;
-	unsigned int int_trigger_type;
-	struct regulator *avdd28;
-	struct regulator *vddio;
-	struct gpio_desc *gpiod_int;
-	struct gpio_desc *gpiod_rst;
-	int gpio_count;
-	int gpio_int_idx;
-	char id[GOODIX_ID_MAX_LEN + 1];
-	u16 version;
-	const char *cfg_name;
-	bool reset_controller_at_probe;
-	bool load_cfg_from_disk;
-	struct completion firmware_loading_complete;
-	unsigned long irq_flags;
-	enum goodix_irq_pin_access_method irq_pin_access_method;
-	unsigned int contact_size;
-	u8 config[GOODIX_CONFIG_MAX_LENGTH];
-	unsigned short keymap[GOODIX_MAX_KEYS];
-};
-
 static int goodix_check_cfg_8(struct goodix_ts_data *ts,
 			      const u8 *cfg, int len);
 static int goodix_check_cfg_16(struct goodix_ts_data *ts,
@@ -215,8 +155,7 @@
  * @buf: raw write data buffer.
  * @len: length of the buffer to write
  */
-static int goodix_i2c_read(struct i2c_client *client,
-			   u16 reg, u8 *buf, int len)
+int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len)
 {
 	struct i2c_msg msgs[2];
 	__be16 wbuf = cpu_to_be16(reg);
@@ -233,7 +172,13 @@
 	msgs[1].buf   = buf;
 
 	ret = i2c_transfer(client->adapter, msgs, 2);
-	return ret < 0 ? ret : (ret != ARRAY_SIZE(msgs) ? -EIO : 0);
+	if (ret >= 0)
+		ret = (ret == ARRAY_SIZE(msgs) ? 0 : -EIO);
+
+	if (ret)
+		dev_err(&client->dev, "Error reading %d bytes from 0x%04x: %d\n",
+			len, reg, ret);
+	return ret;
 }
 
 /**
@@ -244,8 +189,7 @@
  * @buf: raw data buffer to write.
  * @len: length of the buffer to write
  */
-static int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf,
-			    unsigned len)
+int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len)
 {
 	u8 *addr_buf;
 	struct i2c_msg msg;
@@ -265,11 +209,18 @@
 	msg.len = len + 2;
 
 	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret >= 0)
+		ret = (ret == 1 ? 0 : -EIO);
+
 	kfree(addr_buf);
-	return ret < 0 ? ret : (ret != 1 ? -EIO : 0);
+
+	if (ret)
+		dev_err(&client->dev, "Error writing %d bytes to 0x%04x: %d\n",
+			len, reg, ret);
+	return ret;
 }
 
-static int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value)
+int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value)
 {
 	return goodix_i2c_write(client, reg, &value, sizeof(value));
 }
@@ -308,11 +259,8 @@
 	do {
 		error = goodix_i2c_read(ts->client, addr, data,
 					header_contact_keycode_size);
-		if (error) {
-			dev_err(&ts->client->dev, "I2C transfer error: %d\n",
-					error);
+		if (error)
 			return error;
-		}
 
 		if (data[0] & GOODIX_BUFFER_STATUS_READY) {
 			touch_num = data[0] & 0x0f;
@@ -333,6 +281,11 @@
 			return touch_num;
 		}
 
+		if (data[0] == 0 && ts->firmware_name) {
+			if (goodix_handle_fw_request(ts))
+				return 0;
+		}
+
 		usleep_range(1000, 2000); /* Poll every 1 - 2 ms */
 	} while (time_before(jiffies, max_timeout));
 
@@ -435,9 +388,7 @@
 	struct goodix_ts_data *ts = dev_id;
 
 	goodix_process_events(ts);
-
-	if (goodix_i2c_write_u8(ts->client, GOODIX_READ_COOR_ADDR, 0) < 0)
-		dev_err(&ts->client->dev, "I2C write end_cmd error\n");
+	goodix_i2c_write_u8(ts->client, GOODIX_READ_COOR_ADDR, 0);
 
 	return IRQ_HANDLED;
 }
@@ -553,7 +504,7 @@
  * @cfg: config firmware to write to device
  * @len: config data length
  */
-static int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len)
+int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len)
 {
 	int error;
 
@@ -562,11 +513,9 @@
 		return error;
 
 	error = goodix_i2c_write(ts->client, ts->chip->config_addr, cfg, len);
-	if (error) {
-		dev_err(&ts->client->dev, "Failed to write config data: %d",
-			error);
+	if (error)
 		return error;
-	}
+
 	dev_dbg(&ts->client->dev, "Config sent successfully.");
 
 	/* Let the firmware reconfigure itself, so sleep for 10ms */
@@ -651,21 +600,66 @@
 	return -EINVAL; /* Never reached */
 }
 
-static int goodix_int_sync(struct goodix_ts_data *ts)
+int goodix_int_sync(struct goodix_ts_data *ts)
 {
 	int error;
 
 	error = goodix_irq_direction_output(ts, 0);
 	if (error)
-		return error;
+		goto error;
 
 	msleep(50);				/* T5: 50ms */
 
 	error = goodix_irq_direction_input(ts);
 	if (error)
-		return error;
+		goto error;
 
 	return 0;
+
+error:
+	dev_err(&ts->client->dev, "Controller irq sync failed.\n");
+	return error;
+}
+
+/**
+ * goodix_reset_no_int_sync - Reset device, leaving interrupt line in output mode
+ *
+ * @ts: goodix_ts_data pointer
+ */
+int goodix_reset_no_int_sync(struct goodix_ts_data *ts)
+{
+	int error;
+
+	/* begin select I2C slave addr */
+	error = gpiod_direction_output(ts->gpiod_rst, 0);
+	if (error)
+		goto error;
+
+	msleep(20);				/* T2: > 10ms */
+
+	/* HIGH: 0x28/0x29, LOW: 0xBA/0xBB */
+	error = goodix_irq_direction_output(ts, ts->client->addr == 0x14);
+	if (error)
+		goto error;
+
+	usleep_range(100, 2000);		/* T3: > 100us */
+
+	error = gpiod_direction_output(ts->gpiod_rst, 1);
+	if (error)
+		goto error;
+
+	usleep_range(6000, 10000);		/* T4: > 5ms */
+
+	/* end select I2C slave addr */
+	error = gpiod_direction_input(ts->gpiod_rst);
+	if (error)
+		goto error;
+
+	return 0;
+
+error:
+	dev_err(&ts->client->dev, "Controller reset failed.\n");
+	return error;
 }
 
 /**
@@ -677,36 +671,11 @@
 {
 	int error;
 
-	/* begin select I2C slave addr */
-	error = gpiod_direction_output(ts->gpiod_rst, 0);
+	error = goodix_reset_no_int_sync(ts);
 	if (error)
 		return error;
 
-	msleep(20);				/* T2: > 10ms */
-
-	/* HIGH: 0x28/0x29, LOW: 0xBA/0xBB */
-	error = goodix_irq_direction_output(ts, ts->client->addr == 0x14);
-	if (error)
-		return error;
-
-	usleep_range(100, 2000);		/* T3: > 100us */
-
-	error = gpiod_direction_output(ts->gpiod_rst, 1);
-	if (error)
-		return error;
-
-	usleep_range(6000, 10000);		/* T4: > 5ms */
-
-	/* end select I2C slave addr */
-	error = gpiod_direction_input(ts->gpiod_rst);
-	if (error)
-		return error;
-
-	error = goodix_int_sync(ts);
-	if (error)
-		return error;
-
-	return 0;
+	return goodix_int_sync(ts);
 }
 
 #ifdef ACPI_GPIO_SUPPORT
@@ -931,14 +900,19 @@
 	int x_max, y_max;
 	int error;
 
-	error = goodix_i2c_read(ts->client, ts->chip->config_addr,
-				ts->config, ts->chip->config_len);
-	if (error) {
-		dev_warn(&ts->client->dev, "Error reading config: %d\n",
-			 error);
-		ts->int_trigger_type = GOODIX_INT_TRIGGER;
-		ts->max_touch_num = GOODIX_MAX_CONTACTS;
-		return;
+	/*
+	 * On controllers where we need to upload the firmware
+	 * (controllers without flash) ts->config already has the config
+	 * at this point and the controller itself does not have it yet!
+	 */
+	if (!ts->firmware_name) {
+		error = goodix_i2c_read(ts->client, ts->chip->config_addr,
+					ts->config, ts->chip->config_len);
+		if (error) {
+			ts->int_trigger_type = GOODIX_INT_TRIGGER;
+			ts->max_touch_num = GOODIX_MAX_CONTACTS;
+			return;
+		}
 	}
 
 	ts->int_trigger_type = ts->config[TRIGGER_LOC] & 0x03;
@@ -966,10 +940,8 @@
 	char id_str[GOODIX_ID_MAX_LEN + 1];
 
 	error = goodix_i2c_read(ts->client, GOODIX_REG_ID, buf, sizeof(buf));
-	if (error) {
-		dev_err(&ts->client->dev, "read version failed: %d\n", error);
+	if (error)
 		return error;
-	}
 
 	memcpy(id_str, buf, GOODIX_ID_MAX_LEN);
 	id_str[GOODIX_ID_MAX_LEN] = 0;
@@ -995,13 +967,10 @@
 	u8 test;
 
 	while (retry++ < 2) {
-		error = goodix_i2c_read(client, GOODIX_REG_ID,
-					&test, 1);
+		error = goodix_i2c_read(client, GOODIX_REG_ID, &test, 1);
 		if (!error)
 			return 0;
 
-		dev_err(&client->dev, "i2c test failed attempt %d: %d\n",
-			retry, error);
 		msleep(20);
 	}
 
@@ -1130,7 +1099,16 @@
 	struct goodix_ts_data *ts = ctx;
 	int error;
 
-	if (cfg) {
+	if (ts->firmware_name) {
+		if (!cfg)
+			goto err_release_cfg;
+
+		error = goodix_check_cfg(ts, cfg->data, cfg->size);
+		if (error)
+			goto err_release_cfg;
+
+		memcpy(ts->config, cfg->data, cfg->size);
+	} else if (cfg) {
 		/* send device configuration to the firmware */
 		error = goodix_send_cfg(ts, cfg->data, cfg->size);
 		if (error)
@@ -1156,6 +1134,7 @@
 			   const struct i2c_device_id *id)
 {
 	struct goodix_ts_data *ts;
+	const char *cfg_name;
 	int error;
 
 	dev_dbg(&client->dev, "I2C Address: 0x%02x\n", client->addr);
@@ -1205,10 +1184,8 @@
 	if (ts->reset_controller_at_probe) {
 		/* reset the controller */
 		error = goodix_reset(ts);
-		if (error) {
-			dev_err(&client->dev, "Controller reset failed.\n");
+		if (error)
 			return error;
-		}
 	}
 
 	error = goodix_i2c_test(client);
@@ -1223,20 +1200,27 @@
 		return error;
 	}
 
-	error = goodix_read_version(ts);
-	if (error) {
-		dev_err(&client->dev, "Read version failed.\n");
+	error = goodix_firmware_check(ts);
+	if (error)
 		return error;
-	}
+
+	error = goodix_read_version(ts);
+	if (error)
+		return error;
 
 	ts->chip = goodix_get_chip_data(ts->id);
 
 	if (ts->load_cfg_from_disk) {
 		/* update device config */
-		ts->cfg_name = devm_kasprintf(&client->dev, GFP_KERNEL,
-					      "goodix_%s_cfg.bin", ts->id);
-		if (!ts->cfg_name)
-			return -ENOMEM;
+		error = device_property_read_string(&client->dev,
+						    "goodix,config-name",
+						    &cfg_name);
+		if (!error)
+			snprintf(ts->cfg_name, sizeof(ts->cfg_name),
+				 "goodix/%s", cfg_name);
+		else
+			snprintf(ts->cfg_name, sizeof(ts->cfg_name),
+				 "goodix_%s_cfg.bin", ts->id);
 
 		error = request_firmware_nowait(THIS_MODULE, true, ts->cfg_name,
 						&client->dev, GFP_KERNEL, ts,
@@ -1286,6 +1270,9 @@
 	/* Free IRQ as IRQ pin is used as output in the suspend sequence */
 	goodix_free_irq(ts);
 
+	/* Save reference (calibration) info if necessary */
+	goodix_save_bak_ref(ts);
+
 	/* Output LOW on the INT pin for 5 ms */
 	error = goodix_irq_direction_output(ts, 0);
 	if (error) {
@@ -1298,7 +1285,6 @@
 	error = goodix_i2c_write_u8(ts->client, GOODIX_REG_COMMAND,
 				    GOODIX_CMD_SCREEN_OFF);
 	if (error) {
-		dev_err(&ts->client->dev, "Screen off command failed\n");
 		goodix_irq_direction_input(ts);
 		goodix_request_irq(ts);
 		return -EAGAIN;
@@ -1341,19 +1327,14 @@
 
 	error = goodix_i2c_read(ts->client, ts->chip->config_addr,
 				&config_ver, 1);
-	if (error)
-		dev_warn(dev, "Error reading config version: %d, resetting controller\n",
-			 error);
-	else if (config_ver != ts->config[0])
+	if (!error && config_ver != ts->config[0])
 		dev_info(dev, "Config version mismatch %d != %d, resetting controller\n",
 			 config_ver, ts->config[0]);
 
 	if (error != 0 || config_ver != ts->config[0]) {
 		error = goodix_reset(ts);
-		if (error) {
-			dev_err(dev, "Controller reset failed.\n");
+		if (error)
 			return error;
-		}
 
 		error = goodix_send_cfg(ts, ts->config, ts->chip->config_len);
 		if (error)
diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h
new file mode 100644
index 0000000..62138f9
--- /dev/null
+++ b/drivers/input/touchscreen/goodix.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __GOODIX_H__
+#define __GOODIX_H__
+
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/regulator/consumer.h>
+
+/* Register defines */
+#define GOODIX_REG_MISCTL_DSP_CTL		0x4010
+#define GOODIX_REG_MISCTL_SRAM_BANK		0x4048
+#define GOODIX_REG_MISCTL_MEM_CD_EN		0x4049
+#define GOODIX_REG_MISCTL_CACHE_EN		0x404B
+#define GOODIX_REG_MISCTL_TMR0_EN		0x40B0
+#define GOODIX_REG_MISCTL_SWRST			0x4180
+#define GOODIX_REG_MISCTL_CPU_SWRST_PULSE	0x4184
+#define GOODIX_REG_MISCTL_BOOTCTL		0x4190
+#define GOODIX_REG_MISCTL_BOOT_OPT		0x4218
+#define GOODIX_REG_MISCTL_BOOT_CTL		0x5094
+
+#define GOODIX_REG_FW_SIG			0x8000
+#define GOODIX_FW_SIG_LEN			10
+
+#define GOODIX_REG_MAIN_CLK			0x8020
+#define GOODIX_MAIN_CLK_LEN			6
+
+#define GOODIX_REG_COMMAND			0x8040
+#define GOODIX_CMD_SCREEN_OFF			0x05
+
+#define GOODIX_REG_SW_WDT			0x8041
+
+#define GOODIX_REG_REQUEST			0x8043
+#define GOODIX_RQST_RESPONDED			0x00
+#define GOODIX_RQST_CONFIG			0x01
+#define GOODIX_RQST_BAK_REF			0x02
+#define GOODIX_RQST_RESET			0x03
+#define GOODIX_RQST_MAIN_CLOCK			0x04
+/*
+ * Unknown request which gets send by the controller aprox.
+ * every 34 seconds once it is up and running.
+ */
+#define GOODIX_RQST_UNKNOWN			0x06
+#define GOODIX_RQST_IDLE			0xFF
+
+#define GOODIX_REG_STATUS			0x8044
+
+#define GOODIX_GT1X_REG_CONFIG_DATA		0x8050
+#define GOODIX_GT9X_REG_CONFIG_DATA		0x8047
+#define GOODIX_REG_ID				0x8140
+#define GOODIX_READ_COOR_ADDR			0x814E
+#define GOODIX_REG_BAK_REF			0x99D0
+
+#define GOODIX_ID_MAX_LEN			4
+#define GOODIX_CONFIG_MAX_LENGTH		240
+#define GOODIX_MAX_KEYS				7
+
+enum goodix_irq_pin_access_method {
+	IRQ_PIN_ACCESS_NONE,
+	IRQ_PIN_ACCESS_GPIO,
+	IRQ_PIN_ACCESS_ACPI_GPIO,
+	IRQ_PIN_ACCESS_ACPI_METHOD,
+};
+
+struct goodix_ts_data;
+
+struct goodix_chip_data {
+	u16 config_addr;
+	int config_len;
+	int (*check_config)(struct goodix_ts_data *ts, const u8 *cfg, int len);
+	void (*calc_config_checksum)(struct goodix_ts_data *ts);
+};
+
+struct goodix_ts_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	const struct goodix_chip_data *chip;
+	const char *firmware_name;
+	struct touchscreen_properties prop;
+	unsigned int max_touch_num;
+	unsigned int int_trigger_type;
+	struct regulator *avdd28;
+	struct regulator *vddio;
+	struct gpio_desc *gpiod_int;
+	struct gpio_desc *gpiod_rst;
+	int gpio_count;
+	int gpio_int_idx;
+	char id[GOODIX_ID_MAX_LEN + 1];
+	char cfg_name[64];
+	u16 version;
+	bool reset_controller_at_probe;
+	bool load_cfg_from_disk;
+	struct completion firmware_loading_complete;
+	unsigned long irq_flags;
+	enum goodix_irq_pin_access_method irq_pin_access_method;
+	unsigned int contact_size;
+	u8 config[GOODIX_CONFIG_MAX_LENGTH];
+	unsigned short keymap[GOODIX_MAX_KEYS];
+	u8 main_clk[GOODIX_MAIN_CLK_LEN];
+	int bak_ref_len;
+	u8 *bak_ref;
+};
+
+int goodix_i2c_read(struct i2c_client *client, u16 reg, u8 *buf, int len);
+int goodix_i2c_write(struct i2c_client *client, u16 reg, const u8 *buf, int len);
+int goodix_i2c_write_u8(struct i2c_client *client, u16 reg, u8 value);
+int goodix_send_cfg(struct goodix_ts_data *ts, const u8 *cfg, int len);
+int goodix_int_sync(struct goodix_ts_data *ts);
+int goodix_reset_no_int_sync(struct goodix_ts_data *ts);
+
+int goodix_firmware_check(struct goodix_ts_data *ts);
+bool goodix_handle_fw_request(struct goodix_ts_data *ts);
+void goodix_save_bak_ref(struct goodix_ts_data *ts);
+
+#endif
diff --git a/drivers/input/touchscreen/goodix_fwupload.c b/drivers/input/touchscreen/goodix_fwupload.c
new file mode 100644
index 0000000..c1e7a24
--- /dev/null
+++ b/drivers/input/touchscreen/goodix_fwupload.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Goodix Touchscreen firmware upload support
+ *
+ * Copyright (c) 2021 Hans de Goede <hdegoede@redhat.com>
+ *
+ * This is a rewrite of gt9xx_update.c from the Allwinner H3 BSP which is:
+ * Copyright (c) 2010 - 2012 Goodix Technology.
+ * Author: andrew@goodix.com
+ */
+
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include "goodix.h"
+
+#define GOODIX_FW_HEADER_LENGTH		sizeof(struct goodix_fw_header)
+#define GOODIX_FW_SECTION_LENGTH	0x2000
+#define GOODIX_FW_DSP_LENGTH		0x1000
+#define GOODIX_FW_UPLOAD_ADDRESS	0xc000
+
+#define GOODIX_CFG_LOC_HAVE_KEY		 7
+#define GOODIX_CFG_LOC_DRVA_NUM		27
+#define GOODIX_CFG_LOC_DRVB_NUM		28
+#define GOODIX_CFG_LOC_SENS_NUM		29
+
+struct goodix_fw_header {
+	u8 hw_info[4];
+	u8 pid[8];
+	u8 vid[2];
+} __packed;
+
+static u16 goodix_firmware_checksum(const u8 *data, int size)
+{
+	u16 checksum = 0;
+	int i;
+
+	for (i = 0; i < size; i += 2)
+		checksum += (data[i] << 8) + data[i + 1];
+
+	return checksum;
+}
+
+static int goodix_firmware_verify(struct device *dev, const struct firmware *fw)
+{
+	const struct goodix_fw_header *fw_header;
+	size_t expected_size;
+	const u8 *data;
+	u16 checksum;
+	char buf[9];
+
+	expected_size = GOODIX_FW_HEADER_LENGTH + 4 * GOODIX_FW_SECTION_LENGTH +
+			GOODIX_FW_DSP_LENGTH;
+	if (fw->size != expected_size) {
+		dev_err(dev, "Firmware has wrong size, expected %zu got %zu\n",
+			expected_size, fw->size);
+		return -EINVAL;
+	}
+
+	data = fw->data + GOODIX_FW_HEADER_LENGTH;
+	checksum = goodix_firmware_checksum(data, 4 * GOODIX_FW_SECTION_LENGTH);
+	if (checksum) {
+		dev_err(dev, "Main firmware checksum error\n");
+		return -EINVAL;
+	}
+
+	data += 4 * GOODIX_FW_SECTION_LENGTH;
+	checksum = goodix_firmware_checksum(data, GOODIX_FW_DSP_LENGTH);
+	if (checksum) {
+		dev_err(dev, "DSP firmware checksum error\n");
+		return -EINVAL;
+	}
+
+	fw_header = (const struct goodix_fw_header *)fw->data;
+	dev_info(dev, "Firmware hardware info %02x%02x%02x%02x\n",
+		 fw_header->hw_info[0], fw_header->hw_info[1],
+		 fw_header->hw_info[2], fw_header->hw_info[3]);
+	/* pid is a 8 byte buffer containing a string, weird I know */
+	memcpy(buf, fw_header->pid, 8);
+	buf[8] = 0;
+	dev_info(dev, "Firmware PID: %s VID: %02x%02x\n", buf,
+		 fw_header->vid[0], fw_header->vid[1]);
+	return 0;
+}
+
+static int goodix_enter_upload_mode(struct i2c_client *client)
+{
+	int tries, error;
+	u8 val;
+
+	tries = 200;
+	do {
+		error = goodix_i2c_write_u8(client,
+					    GOODIX_REG_MISCTL_SWRST, 0x0c);
+		if (error)
+			return error;
+
+		error = goodix_i2c_read(client,
+					GOODIX_REG_MISCTL_SWRST, &val, 1);
+		if (error)
+			return error;
+
+		if (val == 0x0c)
+			break;
+	} while (--tries);
+
+	if (!tries) {
+		dev_err(&client->dev, "Error could not hold ss51 & dsp\n");
+		return -EIO;
+	}
+
+	/* DSP_CK and DSP_ALU_CK PowerOn */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_DSP_CTL, 0x00);
+	if (error)
+		return error;
+
+	/* Disable watchdog */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_TMR0_EN, 0x00);
+	if (error)
+		return error;
+
+	/* Clear cache enable */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_CACHE_EN, 0x00);
+	if (error)
+		return error;
+
+	/* Set boot from SRAM */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_BOOTCTL, 0x02);
+	if (error)
+		return error;
+
+	/* Software reboot */
+	error = goodix_i2c_write_u8(client,
+				    GOODIX_REG_MISCTL_CPU_SWRST_PULSE, 0x01);
+	if (error)
+		return error;
+
+	/* Clear control flag */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_BOOTCTL, 0x00);
+	if (error)
+		return error;
+
+	/* Set scramble */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_BOOT_OPT, 0x00);
+	if (error)
+		return error;
+
+	/* Enable accessing code */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_MEM_CD_EN, 0x01);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static int goodix_start_firmware(struct i2c_client *client)
+{
+	int error;
+	u8 val;
+
+	/* Init software watchdog */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_SW_WDT, 0xaa);
+	if (error)
+		return error;
+
+	/* Release SS51 & DSP */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_MISCTL_SWRST, 0x00);
+	if (error)
+		return error;
+
+	error = goodix_i2c_read(client, GOODIX_REG_SW_WDT, &val, 1);
+	if (error)
+		return error;
+
+	/* The value we've written to SW_WDT should have been cleared now */
+	if (val == 0xaa) {
+		dev_err(&client->dev, "Error SW_WDT reg not cleared on fw startup\n");
+		return -EIO;
+	}
+
+	/* Re-init software watchdog */
+	error = goodix_i2c_write_u8(client, GOODIX_REG_SW_WDT, 0xaa);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static int goodix_firmware_upload(struct goodix_ts_data *ts)
+{
+	const struct firmware *fw;
+	char fw_name[64];
+	const u8 *data;
+	int error;
+
+	snprintf(fw_name, sizeof(fw_name), "goodix/%s", ts->firmware_name);
+
+	error = request_firmware(&fw, fw_name, &ts->client->dev);
+	if (error) {
+		dev_err(&ts->client->dev, "Firmware request error %d\n", error);
+		return error;
+	}
+
+	error = goodix_firmware_verify(&ts->client->dev, fw);
+	if (error)
+		goto release;
+
+	error = goodix_reset_no_int_sync(ts);
+	if (error)
+		return error;
+
+	error = goodix_enter_upload_mode(ts->client);
+	if (error)
+		goto release;
+
+	/* Select SRAM bank 0 and upload section 1 & 2 */
+	error = goodix_i2c_write_u8(ts->client,
+				    GOODIX_REG_MISCTL_SRAM_BANK, 0x00);
+	if (error)
+		goto release;
+
+	data = fw->data + GOODIX_FW_HEADER_LENGTH;
+	error = goodix_i2c_write(ts->client, GOODIX_FW_UPLOAD_ADDRESS,
+				 data, 2 * GOODIX_FW_SECTION_LENGTH);
+	if (error)
+		goto release;
+
+	/* Select SRAM bank 1 and upload section 3 & 4 */
+	error = goodix_i2c_write_u8(ts->client,
+				    GOODIX_REG_MISCTL_SRAM_BANK, 0x01);
+	if (error)
+		goto release;
+
+	data += 2 * GOODIX_FW_SECTION_LENGTH;
+	error = goodix_i2c_write(ts->client, GOODIX_FW_UPLOAD_ADDRESS,
+				 data, 2 * GOODIX_FW_SECTION_LENGTH);
+	if (error)
+		goto release;
+
+	/* Select SRAM bank 2 and upload the DSP firmware */
+	error = goodix_i2c_write_u8(ts->client,
+				    GOODIX_REG_MISCTL_SRAM_BANK, 0x02);
+	if (error)
+		goto release;
+
+	data += 2 * GOODIX_FW_SECTION_LENGTH;
+	error = goodix_i2c_write(ts->client, GOODIX_FW_UPLOAD_ADDRESS,
+				 data, GOODIX_FW_DSP_LENGTH);
+	if (error)
+		goto release;
+
+	error = goodix_start_firmware(ts->client);
+	if (error)
+		goto release;
+
+	error = goodix_int_sync(ts);
+release:
+	release_firmware(fw);
+	return error;
+}
+
+static int goodix_prepare_bak_ref(struct goodix_ts_data *ts)
+{
+	u8 have_key, driver_num, sensor_num;
+
+	if (ts->bak_ref)
+		return 0; /* Already done */
+
+	have_key = (ts->config[GOODIX_CFG_LOC_HAVE_KEY] & 0x01);
+
+	driver_num = (ts->config[GOODIX_CFG_LOC_DRVA_NUM] & 0x1f) +
+		     (ts->config[GOODIX_CFG_LOC_DRVB_NUM] & 0x1f);
+	if (have_key)
+		driver_num--;
+
+	sensor_num = (ts->config[GOODIX_CFG_LOC_SENS_NUM] & 0x0f) +
+		     ((ts->config[GOODIX_CFG_LOC_SENS_NUM] >> 4) & 0x0f);
+
+	dev_dbg(&ts->client->dev, "Drv %d Sen %d Key %d\n",
+		driver_num, sensor_num, have_key);
+
+	ts->bak_ref_len = (driver_num * (sensor_num - 2) + 2) * 2;
+
+	ts->bak_ref = devm_kzalloc(&ts->client->dev,
+				   ts->bak_ref_len, GFP_KERNEL);
+	if (!ts->bak_ref)
+		return -ENOMEM;
+
+	/*
+	 * The bak_ref array contains the backup of an array of (self/auto)
+	 * calibration related values which the Android version of the driver
+	 * stores on the filesystem so that it can be restored after reboot.
+	 * The mainline kernel never writes directly to the filesystem like
+	 * this, we always start will all the values which give a correction
+	 * factor in approx. the -20 - +20 range (in 2s complement) set to 0.
+	 *
+	 * Note the touchscreen works fine without restoring the reference
+	 * values after a reboot / power-cycle.
+	 *
+	 * The last 2 bytes are a 16 bits unsigned checksum which is expected
+	 * to make the addition al all 16 bit unsigned values in the array add
+	 * up to 1 (rather then the usual 0), so we must set the last byte to 1.
+	 */
+	ts->bak_ref[ts->bak_ref_len - 1] = 1;
+
+	return 0;
+}
+
+static int goodix_send_main_clock(struct goodix_ts_data *ts)
+{
+	u32 main_clk = 54; /* Default main clock */
+	u8 checksum = 0;
+	int i;
+
+	device_property_read_u32(&ts->client->dev,
+				 "goodix,main-clk", &main_clk);
+
+	for (i = 0; i < (GOODIX_MAIN_CLK_LEN - 1); i++) {
+		ts->main_clk[i] = main_clk;
+		checksum += main_clk;
+	}
+
+	/* The value of all bytes combines must be 0 */
+	ts->main_clk[GOODIX_MAIN_CLK_LEN - 1] = 256 - checksum;
+
+	return goodix_i2c_write(ts->client, GOODIX_REG_MAIN_CLK,
+				ts->main_clk, GOODIX_MAIN_CLK_LEN);
+}
+
+int goodix_firmware_check(struct goodix_ts_data *ts)
+{
+	device_property_read_string(&ts->client->dev,
+				    "firmware-name", &ts->firmware_name);
+	if (!ts->firmware_name)
+		return 0;
+
+	if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_NONE) {
+		dev_err(&ts->client->dev, "Error no IRQ-pin access method, cannot upload fw.\n");
+		return -EINVAL;
+	}
+
+	dev_info(&ts->client->dev, "Touchscreen controller needs fw-upload\n");
+	ts->load_cfg_from_disk = true;
+
+	return goodix_firmware_upload(ts);
+}
+
+bool goodix_handle_fw_request(struct goodix_ts_data *ts)
+{
+	int error;
+	u8 val;
+
+	error = goodix_i2c_read(ts->client, GOODIX_REG_REQUEST, &val, 1);
+	if (error)
+		return false;
+
+	switch (val) {
+	case GOODIX_RQST_RESPONDED:
+		/*
+		 * If we read back our own last ack the IRQ was not for
+		 * a request.
+		 */
+		return false;
+	case GOODIX_RQST_CONFIG:
+		error = goodix_send_cfg(ts, ts->config, ts->chip->config_len);
+		if (error)
+			return false;
+
+		break;
+	case GOODIX_RQST_BAK_REF:
+		error = goodix_prepare_bak_ref(ts);
+		if (error)
+			return false;
+
+		error = goodix_i2c_write(ts->client, GOODIX_REG_BAK_REF,
+					 ts->bak_ref, ts->bak_ref_len);
+		if (error)
+			return false;
+
+		break;
+	case GOODIX_RQST_RESET:
+		error = goodix_firmware_upload(ts);
+		if (error)
+			return false;
+
+		break;
+	case GOODIX_RQST_MAIN_CLOCK:
+		error = goodix_send_main_clock(ts);
+		if (error)
+			return false;
+
+		break;
+	case GOODIX_RQST_UNKNOWN:
+	case GOODIX_RQST_IDLE:
+		break;
+	default:
+		dev_err_ratelimited(&ts->client->dev, "Unknown Request: 0x%02x\n", val);
+	}
+
+	/* Ack the request */
+	goodix_i2c_write_u8(ts->client,
+			    GOODIX_REG_REQUEST, GOODIX_RQST_RESPONDED);
+	return true;
+}
+
+void goodix_save_bak_ref(struct goodix_ts_data *ts)
+{
+	int error;
+	u8 val;
+
+	if (!ts->firmware_name)
+		return;
+
+	error = goodix_i2c_read(ts->client, GOODIX_REG_STATUS, &val, 1);
+	if (error)
+		return;
+
+	if (!(val & 0x80))
+		return;
+
+	error = goodix_i2c_read(ts->client, GOODIX_REG_BAK_REF,
+				ts->bak_ref, ts->bak_ref_len);
+	if (error) {
+		memset(ts->bak_ref, 0, ts->bak_ref_len);
+		ts->bak_ref[ts->bak_ref_len - 1] = 1;
+	}
+}
diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 30576a5..2bd407d 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <linux/crc-ccitt.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
+#include <linux/ihex.h>
 #include <linux/input.h>
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
@@ -12,7 +14,7 @@
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
-#define ILI2XXX_POLL_PERIOD	20
+#define ILI2XXX_POLL_PERIOD	15
 
 #define ILI210X_DATA_SIZE	64
 #define ILI211X_DATA_SIZE	43
@@ -22,8 +24,23 @@
 /* Touchscreen commands */
 #define REG_TOUCHDATA		0x10
 #define REG_PANEL_INFO		0x20
+#define REG_FIRMWARE_VERSION	0x40
+#define REG_PROTOCOL_VERSION	0x42
+#define REG_KERNEL_VERSION	0x61
+#define REG_IC_BUSY		0x80
+#define REG_IC_BUSY_NOT_BUSY	0x50
+#define REG_GET_MODE		0xc0
+#define REG_GET_MODE_AP		0x5a
+#define REG_GET_MODE_BL		0x55
+#define REG_SET_MODE_AP		0xc1
+#define REG_SET_MODE_BL		0xc2
+#define REG_WRITE_DATA		0xc3
+#define REG_WRITE_ENABLE	0xc4
+#define REG_READ_DATA_CRC	0xc7
 #define REG_CALIBRATE		0xcc
 
+#define ILI251X_FW_FILENAME	"ilitek/ili251x.bin"
+
 struct ili2xxx_chip {
 	int (*read_reg)(struct i2c_client *client, u8 reg,
 			void *buf, size_t len);
@@ -35,6 +52,7 @@
 	unsigned int max_touches;
 	unsigned int resolution;
 	bool has_calibrate_reg;
+	bool has_firmware_proto;
 	bool has_pressure_reg;
 };
 
@@ -44,6 +62,10 @@
 	struct gpio_desc *reset_gpio;
 	struct touchscreen_properties prop;
 	const struct ili2xxx_chip *chip;
+	u8 version_firmware[8];
+	u8 version_kernel[5];
+	u8 version_proto[2];
+	u8 ic_mode[2];
 	bool stop;
 };
 
@@ -202,15 +224,17 @@
 	.has_calibrate_reg	= true,
 };
 
-static int ili251x_read_reg(struct i2c_client *client,
-			    u8 reg, void *buf, size_t len)
+static int ili251x_read_reg_common(struct i2c_client *client,
+				   u8 reg, void *buf, size_t len,
+				   unsigned int delay)
 {
 	int error;
 	int ret;
 
 	ret = i2c_master_send(client, &reg, 1);
 	if (ret == 1) {
-		usleep_range(5000, 5500);
+		if (delay)
+			usleep_range(delay, delay + 500);
 
 		ret = i2c_master_recv(client, buf, len);
 		if (ret == len)
@@ -222,12 +246,18 @@
 	return ret;
 }
 
+static int ili251x_read_reg(struct i2c_client *client,
+			    u8 reg, void *buf, size_t len)
+{
+	return ili251x_read_reg_common(client, reg, buf, len, 5000);
+}
+
 static int ili251x_read_touch_data(struct i2c_client *client, u8 *data)
 {
 	int error;
 
-	error = ili251x_read_reg(client, REG_TOUCHDATA,
-				 data, ILI251X_DATA_SIZE1);
+	error = ili251x_read_reg_common(client, REG_TOUCHDATA,
+					data, ILI251X_DATA_SIZE1, 0);
 	if (!error && data[0] == 2) {
 		error = i2c_master_recv(client, data + ILI251X_DATA_SIZE1,
 					ILI251X_DATA_SIZE2);
@@ -268,6 +298,7 @@
 	.continue_polling	= ili251x_check_continue_polling,
 	.max_touches		= 10,
 	.has_calibrate_reg	= true,
+	.has_firmware_proto	= true,
 	.has_pressure_reg	= true,
 };
 
@@ -303,10 +334,13 @@
 	const struct ili2xxx_chip *chip = priv->chip;
 	u8 touchdata[ILI210X_DATA_SIZE] = { 0 };
 	bool keep_polling;
+	ktime_t time_next;
+	s64 time_delta;
 	bool touch;
 	int error;
 
 	do {
+		time_next = ktime_add_ms(ktime_get(), ILI2XXX_POLL_PERIOD);
 		error = chip->get_touch_data(client, touchdata);
 		if (error) {
 			dev_err(&client->dev,
@@ -316,13 +350,201 @@
 
 		touch = ili210x_report_events(priv, touchdata);
 		keep_polling = chip->continue_polling(touchdata, touch);
-		if (keep_polling)
-			msleep(ILI2XXX_POLL_PERIOD);
+		if (keep_polling) {
+			time_delta = ktime_us_delta(time_next, ktime_get());
+			if (time_delta > 0)
+				usleep_range(time_delta, time_delta + 1000);
+		}
 	} while (!priv->stop && keep_polling);
 
 	return IRQ_HANDLED;
 }
 
+static int ili251x_firmware_update_resolution(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	u16 resx, resy;
+	u8 rs[10];
+	int error;
+
+	/* The firmware update blob might have changed the resolution. */
+	error = priv->chip->read_reg(client, REG_PANEL_INFO, &rs, sizeof(rs));
+	if (error)
+		return error;
+
+	resx = le16_to_cpup((__le16 *)rs);
+	resy = le16_to_cpup((__le16 *)(rs + 2));
+
+	/* The value reported by the firmware is invalid. */
+	if (!resx || resx == 0xffff || !resy || resy == 0xffff)
+		return -EINVAL;
+
+	input_abs_set_max(priv->input, ABS_X, resx - 1);
+	input_abs_set_max(priv->input, ABS_Y, resy - 1);
+	input_abs_set_max(priv->input, ABS_MT_POSITION_X, resx - 1);
+	input_abs_set_max(priv->input, ABS_MT_POSITION_Y, resy - 1);
+
+	return 0;
+}
+
+static ssize_t ili251x_firmware_update_firmware_version(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	int error;
+	u8 fw[8];
+
+	/* Get firmware version */
+	error = priv->chip->read_reg(client, REG_FIRMWARE_VERSION,
+				     &fw, sizeof(fw));
+	if (!error)
+		memcpy(priv->version_firmware, fw, sizeof(fw));
+
+	return error;
+}
+
+static ssize_t ili251x_firmware_update_kernel_version(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	int error;
+	u8 kv[5];
+
+	/* Get kernel version */
+	error = priv->chip->read_reg(client, REG_KERNEL_VERSION,
+				     &kv, sizeof(kv));
+	if (!error)
+		memcpy(priv->version_kernel, kv, sizeof(kv));
+
+	return error;
+}
+
+static ssize_t ili251x_firmware_update_protocol_version(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	int error;
+	u8 pv[2];
+
+	/* Get protocol version */
+	error = priv->chip->read_reg(client, REG_PROTOCOL_VERSION,
+				     &pv, sizeof(pv));
+	if (!error)
+		memcpy(priv->version_proto, pv, sizeof(pv));
+
+	return error;
+}
+
+static ssize_t ili251x_firmware_update_ic_mode(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	int error;
+	u8 md[2];
+
+	/* Get chip boot mode */
+	error = priv->chip->read_reg(client, REG_GET_MODE, &md, sizeof(md));
+	if (!error)
+		memcpy(priv->ic_mode, md, sizeof(md));
+
+	return error;
+}
+
+static int ili251x_firmware_update_cached_state(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	int error;
+
+	if (!priv->chip->has_firmware_proto)
+		return 0;
+
+	/* Wait for firmware to boot and stabilize itself. */
+	msleep(200);
+
+	/* Firmware does report valid information. */
+	error = ili251x_firmware_update_resolution(dev);
+	if (error)
+		return error;
+
+	error = ili251x_firmware_update_firmware_version(dev);
+	if (error)
+		return error;
+
+	error = ili251x_firmware_update_kernel_version(dev);
+	if (error)
+		return error;
+
+	error = ili251x_firmware_update_protocol_version(dev);
+	if (error)
+		return error;
+
+	error = ili251x_firmware_update_ic_mode(dev);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static ssize_t ili251x_firmware_version_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	u8 *fw = priv->version_firmware;
+
+	return sysfs_emit(buf, "%02x%02x.%02x%02x.%02x%02x.%02x%02x\n",
+			  fw[0], fw[1], fw[2], fw[3],
+			  fw[4], fw[5], fw[6], fw[7]);
+}
+static DEVICE_ATTR(firmware_version, 0444, ili251x_firmware_version_show, NULL);
+
+static ssize_t ili251x_kernel_version_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	u8 *kv = priv->version_kernel;
+
+	return sysfs_emit(buf, "%02x.%02x.%02x.%02x.%02x\n",
+			  kv[0], kv[1], kv[2], kv[3], kv[4]);
+}
+static DEVICE_ATTR(kernel_version, 0444, ili251x_kernel_version_show, NULL);
+
+static ssize_t ili251x_protocol_version_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	u8 *pv = priv->version_proto;
+
+	return sysfs_emit(buf, "%02x.%02x\n", pv[0], pv[1]);
+}
+static DEVICE_ATTR(protocol_version, 0444, ili251x_protocol_version_show, NULL);
+
+static ssize_t ili251x_mode_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	u8 *md = priv->ic_mode;
+	char *mode = "AP";
+
+	if (md[0] == REG_GET_MODE_AP)		/* Application Mode */
+		mode = "AP";
+	else if (md[0] == REG_GET_MODE_BL)	/* BootLoader Mode */
+		mode = "BL";
+	else					/* Unknown Mode */
+		mode = "??";
+
+	return sysfs_emit(buf, "%02x.%02x:%s\n", md[0], md[1], mode);
+}
+static DEVICE_ATTR(mode, 0444, ili251x_mode_show, NULL);
+
 static ssize_t ili210x_calibrate(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf, size_t count)
@@ -349,24 +571,333 @@
 }
 static DEVICE_ATTR(calibrate, S_IWUSR, NULL, ili210x_calibrate);
 
+static int ili251x_firmware_to_buffer(const struct firmware *fw,
+				      u8 **buf, u16 *ac_end, u16 *df_end)
+{
+	const struct ihex_binrec *rec;
+	u32 fw_addr, fw_last_addr = 0;
+	u16 fw_len;
+	u8 *fw_buf;
+	int error;
+
+	/*
+	 * The firmware ihex blob can never be bigger than 64 kiB, so make this
+	 * simple -- allocate a 64 kiB buffer, iterate over the ihex blob records
+	 * once, copy them all into this buffer at the right locations, and then
+	 * do all operations on this linear buffer.
+	 */
+	fw_buf = kzalloc(SZ_64K, GFP_KERNEL);
+	if (!fw_buf)
+		return -ENOMEM;
+
+	rec = (const struct ihex_binrec *)fw->data;
+	while (rec) {
+		fw_addr = be32_to_cpu(rec->addr);
+		fw_len = be16_to_cpu(rec->len);
+
+		/* The last 32 Byte firmware block can be 0xffe0 */
+		if (fw_addr + fw_len > SZ_64K || fw_addr > SZ_64K - 32) {
+			error = -EFBIG;
+			goto err_big;
+		}
+
+		/* Find the last address before DF start address, that is AC end */
+		if (fw_addr == 0xf000)
+			*ac_end = fw_last_addr;
+		fw_last_addr = fw_addr + fw_len;
+
+		memcpy(fw_buf + fw_addr, rec->data, fw_len);
+		rec = ihex_next_binrec(rec);
+	}
+
+	/* DF end address is the last address in the firmware blob */
+	*df_end = fw_addr + fw_len;
+	*buf = fw_buf;
+	return 0;
+
+err_big:
+	kfree(fw_buf);
+	return error;
+}
+
+/* Switch mode between Application and BootLoader */
+static int ili251x_switch_ic_mode(struct i2c_client *client, u8 cmd_mode)
+{
+	struct ili210x *priv = i2c_get_clientdata(client);
+	u8 cmd_wren[3] = { REG_WRITE_ENABLE, 0x5a, 0xa5 };
+	u8 md[2];
+	int error;
+
+	error = priv->chip->read_reg(client, REG_GET_MODE, md, sizeof(md));
+	if (error)
+		return error;
+	/* Mode already set */
+	if ((cmd_mode == REG_SET_MODE_AP && md[0] == REG_GET_MODE_AP) ||
+	    (cmd_mode == REG_SET_MODE_BL && md[0] == REG_GET_MODE_BL))
+		return 0;
+
+	/* Unlock writes */
+	error = i2c_master_send(client, cmd_wren, sizeof(cmd_wren));
+	if (error != sizeof(cmd_wren))
+		return -EINVAL;
+
+	mdelay(20);
+
+	/* Select mode (BootLoader or Application) */
+	error = i2c_master_send(client, &cmd_mode, 1);
+	if (error != 1)
+		return -EINVAL;
+
+	mdelay(200);	/* Reboot into bootloader takes a lot of time ... */
+
+	/* Read back mode */
+	error = priv->chip->read_reg(client, REG_GET_MODE, md, sizeof(md));
+	if (error)
+		return error;
+	/* Check if mode is correct now. */
+	if ((cmd_mode == REG_SET_MODE_AP && md[0] == REG_GET_MODE_AP) ||
+	    (cmd_mode == REG_SET_MODE_BL && md[0] == REG_GET_MODE_BL))
+		return 0;
+
+	return -EINVAL;
+}
+
+static int ili251x_firmware_busy(struct i2c_client *client)
+{
+	struct ili210x *priv = i2c_get_clientdata(client);
+	int error, i = 0;
+	u8 data;
+
+	do {
+		/* The read_reg already contains suitable delay */
+		error = priv->chip->read_reg(client, REG_IC_BUSY, &data, 1);
+		if (error)
+			return error;
+		if (i++ == 100000)
+			return -ETIMEDOUT;
+	} while (data != REG_IC_BUSY_NOT_BUSY);
+
+	return 0;
+}
+
+static int ili251x_firmware_write_to_ic(struct device *dev, u8 *fwbuf,
+					u16 start, u16 end, u8 dataflash)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+	u8 cmd_crc = REG_READ_DATA_CRC;
+	u8 crcrb[4] = { 0 };
+	u8 fw_data[33];
+	u16 fw_addr;
+	int error;
+
+	/*
+	 * The DF (dataflash) needs 2 bytes offset for unknown reasons,
+	 * the AC (application) has 2 bytes CRC16-CCITT at the end.
+	 */
+	u16 crc = crc_ccitt(0, fwbuf + start + (dataflash ? 2 : 0),
+			    end - start - 2);
+
+	/* Unlock write to either AC (application) or DF (dataflash) area */
+	u8 cmd_wr[10] = {
+		REG_WRITE_ENABLE, 0x5a, 0xa5, dataflash,
+		(end >> 16) & 0xff, (end >> 8) & 0xff, end & 0xff,
+		(crc >> 16) & 0xff, (crc >> 8) & 0xff, crc & 0xff
+	};
+
+	error = i2c_master_send(client, cmd_wr, sizeof(cmd_wr));
+	if (error != sizeof(cmd_wr))
+		return -EINVAL;
+
+	error = ili251x_firmware_busy(client);
+	if (error)
+		return error;
+
+	for (fw_addr = start; fw_addr < end; fw_addr += 32) {
+		fw_data[0] = REG_WRITE_DATA;
+		memcpy(&(fw_data[1]), fwbuf + fw_addr, 32);
+		error = i2c_master_send(client, fw_data, 33);
+		if (error != sizeof(fw_data))
+			return error;
+		error = ili251x_firmware_busy(client);
+		if (error)
+			return error;
+	}
+
+	error = i2c_master_send(client, &cmd_crc, 1);
+	if (error != 1)
+		return -EINVAL;
+
+	error = ili251x_firmware_busy(client);
+	if (error)
+		return error;
+
+	error = priv->chip->read_reg(client, REG_READ_DATA_CRC,
+				   &crcrb, sizeof(crcrb));
+	if (error)
+		return error;
+
+	/* Check CRC readback */
+	if ((crcrb[0] != (crc & 0xff)) || crcrb[1] != ((crc >> 8) & 0xff))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ili251x_firmware_reset(struct i2c_client *client)
+{
+	u8 cmd_reset[2] = { 0xf2, 0x01 };
+	int error;
+
+	error = i2c_master_send(client, cmd_reset, sizeof(cmd_reset));
+	if (error != sizeof(cmd_reset))
+		return -EINVAL;
+
+	return ili251x_firmware_busy(client);
+}
+
+static void ili251x_hardware_reset(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ili210x *priv = i2c_get_clientdata(client);
+
+	/* Reset the controller */
+	gpiod_set_value_cansleep(priv->reset_gpio, 1);
+	usleep_range(10000, 15000);
+	gpiod_set_value_cansleep(priv->reset_gpio, 0);
+	msleep(300);
+}
+
+static ssize_t ili210x_firmware_update_store(struct device *dev,
+					     struct device_attribute *attr,
+					     const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	const char *fwname = ILI251X_FW_FILENAME;
+	const struct firmware *fw;
+	u16 ac_end, df_end;
+	u8 *fwbuf;
+	int error;
+	int i;
+
+	error = request_ihex_firmware(&fw, fwname, dev);
+	if (error) {
+		dev_err(dev, "Failed to request firmware %s, error=%d\n",
+			fwname, error);
+		return error;
+	}
+
+	error = ili251x_firmware_to_buffer(fw, &fwbuf, &ac_end, &df_end);
+	release_firmware(fw);
+	if (error)
+		return error;
+
+	/*
+	 * Disable touchscreen IRQ, so that we would not get spurious touch
+	 * interrupt during firmware update, and so that the IRQ handler won't
+	 * trigger and interfere with the firmware update. There is no bit in
+	 * the touch controller to disable the IRQs during update, so we have
+	 * to do it this way here.
+	 */
+	disable_irq(client->irq);
+
+	dev_dbg(dev, "Firmware update started, firmware=%s\n", fwname);
+
+	ili251x_hardware_reset(dev);
+
+	error = ili251x_firmware_reset(client);
+	if (error)
+		goto exit;
+
+	/* This may not succeed on first try, so re-try a few times. */
+	for (i = 0; i < 5; i++) {
+		error = ili251x_switch_ic_mode(client, REG_SET_MODE_BL);
+		if (!error)
+			break;
+	}
+
+	if (error)
+		goto exit;
+
+	dev_dbg(dev, "IC is now in BootLoader mode\n");
+
+	msleep(200);	/* The bootloader seems to need some time too. */
+
+	error = ili251x_firmware_write_to_ic(dev, fwbuf, 0xf000, df_end, 1);
+	if (error) {
+		dev_err(dev, "DF firmware update failed, error=%d\n", error);
+		goto exit;
+	}
+
+	dev_dbg(dev, "DataFlash firmware written\n");
+
+	error = ili251x_firmware_write_to_ic(dev, fwbuf, 0x2000, ac_end, 0);
+	if (error) {
+		dev_err(dev, "AC firmware update failed, error=%d\n", error);
+		goto exit;
+	}
+
+	dev_dbg(dev, "Application firmware written\n");
+
+	/* This may not succeed on first try, so re-try a few times. */
+	for (i = 0; i < 5; i++) {
+		error = ili251x_switch_ic_mode(client, REG_SET_MODE_AP);
+		if (!error)
+			break;
+	}
+
+	if (error)
+		goto exit;
+
+	dev_dbg(dev, "IC is now in Application mode\n");
+
+	error = ili251x_firmware_update_cached_state(dev);
+	if (error)
+		goto exit;
+
+	error = count;
+
+exit:
+	ili251x_hardware_reset(dev);
+	dev_dbg(dev, "Firmware update ended, error=%i\n", error);
+	enable_irq(client->irq);
+	kfree(fwbuf);
+	return error;
+}
+
+static DEVICE_ATTR(firmware_update, 0200, NULL, ili210x_firmware_update_store);
+
 static struct attribute *ili210x_attributes[] = {
 	&dev_attr_calibrate.attr,
+	&dev_attr_firmware_update.attr,
+	&dev_attr_firmware_version.attr,
+	&dev_attr_kernel_version.attr,
+	&dev_attr_protocol_version.attr,
+	&dev_attr_mode.attr,
 	NULL,
 };
 
-static umode_t ili210x_calibrate_visible(struct kobject *kobj,
+static umode_t ili210x_attributes_visible(struct kobject *kobj,
 					  struct attribute *attr, int index)
 {
 	struct device *dev = kobj_to_dev(kobj);
 	struct i2c_client *client = to_i2c_client(dev);
 	struct ili210x *priv = i2c_get_clientdata(client);
 
-	return priv->chip->has_calibrate_reg ? attr->mode : 0;
+	/* Calibrate is present on all ILI2xxx which have calibrate register */
+	if (attr == &dev_attr_calibrate.attr)
+		return priv->chip->has_calibrate_reg ? attr->mode : 0;
+
+	/* Firmware/Kernel/Protocol/BootMode is implememted only for ILI251x */
+	if (!priv->chip->has_firmware_proto)
+		return 0;
+
+	return attr->mode;
 }
 
 static const struct attribute_group ili210x_attr_group = {
 	.attrs = ili210x_attributes,
-	.is_visible = ili210x_calibrate_visible,
+	.is_visible = ili210x_attributes_visible,
 };
 
 static void ili210x_power_down(void *data)
@@ -449,6 +980,12 @@
 	input_set_abs_params(input, ABS_MT_POSITION_Y, 0, max_xy, 0, 0);
 	if (priv->chip->has_pressure_reg)
 		input_set_abs_params(input, ABS_MT_PRESSURE, 0, 0xa, 0, 0);
+	error = ili251x_firmware_update_cached_state(dev);
+	if (error) {
+		dev_err(dev, "Unable to cache firmware information, err: %d\n",
+			error);
+		return error;
+	}
 	touchscreen_parse_properties(input, true, &priv->prop);
 
 	error = input_mt_init_slots(input, priv->chip->max_touches,
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c
index 4d2d22a..3a49529 100644
--- a/drivers/input/touchscreen/raydium_i2c_ts.c
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -37,6 +37,7 @@
 #define RM_CMD_BOOT_READ	0x44		/* send wait bl data ready*/
 
 #define RM_BOOT_RDY		0xFF		/* bl data ready */
+#define RM_BOOT_CMD_READHWID	0x0E		/* read hwid */
 
 /* I2C main commands */
 #define RM_CMD_QUERY_BANK	0x2B
@@ -290,6 +291,44 @@
 	return 0;
 }
 
+static int raydium_i2c_query_ts_bootloader_info(struct raydium_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	static const u8 get_hwid[] = { RM_BOOT_CMD_READHWID,
+				       0x10, 0xc0, 0x01, 0x00, 0x04, 0x00 };
+	u8 rbuf[5] = { 0 };
+	u32 hw_ver;
+	int error;
+
+	error = raydium_i2c_send(client, RM_CMD_BOOT_WRT,
+				 get_hwid, sizeof(get_hwid));
+	if (error) {
+		dev_err(&client->dev, "WRT HWID command failed: %d\n", error);
+		return error;
+	}
+
+	error = raydium_i2c_send(client, RM_CMD_BOOT_ACK, rbuf, 1);
+	if (error) {
+		dev_err(&client->dev, "Ack HWID command failed: %d\n", error);
+		return error;
+	}
+
+	error = raydium_i2c_read(client, RM_CMD_BOOT_CHK, rbuf, sizeof(rbuf));
+	if (error) {
+		dev_err(&client->dev, "Read HWID command failed: %d (%4ph)\n",
+			error, rbuf + 1);
+		hw_ver = 0xffffffffUL;
+	} else {
+		hw_ver = get_unaligned_be32(rbuf + 1);
+	}
+
+	ts->info.hw_ver = cpu_to_le32(hw_ver);
+	ts->info.main_ver = 0xff;
+	ts->info.sub_ver = 0xff;
+
+	return error;
+}
+
 static int raydium_i2c_query_ts_info(struct raydium_data *ts)
 {
 	struct i2c_client *client = ts->client;
@@ -388,13 +427,10 @@
 	if (error)
 		ts->boot_mode = RAYDIUM_TS_BLDR;
 
-	if (ts->boot_mode == RAYDIUM_TS_BLDR) {
-		ts->info.hw_ver = cpu_to_le32(0xffffffffUL);
-		ts->info.main_ver = 0xff;
-		ts->info.sub_ver = 0xff;
-	} else {
+	if (ts->boot_mode == RAYDIUM_TS_BLDR)
+		raydium_i2c_query_ts_bootloader_info(ts);
+	else
 		raydium_i2c_query_ts_info(ts);
-	}
 
 	return error;
 }
@@ -1082,11 +1118,11 @@
 	if (error)
 		return error;
 
-	error = devm_add_action(&client->dev, raydium_i2c_power_off, ts);
+	error = devm_add_action_or_reset(&client->dev,
+					 raydium_i2c_power_off, ts);
 	if (error) {
 		dev_err(&client->dev,
 			"failed to install power off action: %d\n", error);
-		raydium_i2c_power_off(ts);
 		return error;
 	}
 
@@ -1218,7 +1254,7 @@
 			 raydium_i2c_suspend, raydium_i2c_resume);
 
 static const struct i2c_device_id raydium_i2c_id[] = {
-	{ "raydium_i2c" , 0 },
+	{ "raydium_i2c", 0 },
 	{ "rm32380", 0 },
 	{ /* sentinel */ }
 };
diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c
index 6abae66..e38ba3e 100644
--- a/drivers/input/touchscreen/st1232.c
+++ b/drivers/input/touchscreen/st1232.c
@@ -92,7 +92,7 @@
 	unsigned int retries;
 	int error;
 
-	for (retries = 10; retries; retries--) {
+	for (retries = 100; retries; retries--) {
 		error = st1232_ts_read_data(ts, REG_STATUS, 1);
 		if (!error) {
 			switch (ts->read_buf[0]) {
@@ -389,6 +389,7 @@
 	.driver = {
 		.name	= ST1232_TS_NAME,
 		.of_match_table = st1232_ts_dt_ids,
+		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
 		.pm	= &st1232_ts_pm_ops,
 	},
 };
diff --git a/drivers/input/touchscreen/tsc2004.c b/drivers/input/touchscreen/tsc2004.c
index 0272ced..9fdd870 100644
--- a/drivers/input/touchscreen/tsc2004.c
+++ b/drivers/input/touchscreen/tsc2004.c
@@ -45,7 +45,9 @@
 
 static int tsc2004_remove(struct i2c_client *i2c)
 {
-	return tsc200x_remove(&i2c->dev);
+	tsc200x_remove(&i2c->dev);
+
+	return 0;
 }
 
 static const struct i2c_device_id tsc2004_idtable[] = {
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index 923496bb..a2f5592 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -66,7 +66,9 @@
 
 static int tsc2005_remove(struct spi_device *spi)
 {
-	return tsc200x_remove(&spi->dev);
+	tsc200x_remove(&spi->dev);
+
+	return 0;
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index b8d720d..27810f6 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -577,15 +577,13 @@
 }
 EXPORT_SYMBOL_GPL(tsc200x_probe);
 
-int tsc200x_remove(struct device *dev)
+void tsc200x_remove(struct device *dev)
 {
 	struct tsc200x *ts = dev_get_drvdata(dev);
 
 	sysfs_remove_group(&dev->kobj, &tsc200x_attr_group);
 
 	regulator_disable(ts->vio);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(tsc200x_remove);
 
diff --git a/drivers/input/touchscreen/tsc200x-core.h b/drivers/input/touchscreen/tsc200x-core.h
index a43c08c..4ded344 100644
--- a/drivers/input/touchscreen/tsc200x-core.h
+++ b/drivers/input/touchscreen/tsc200x-core.h
@@ -74,6 +74,6 @@
 int tsc200x_probe(struct device *dev, int irq, const struct input_id *tsc_id,
 		  struct regmap *regmap,
 		  int (*tsc200x_cmd)(struct device *dev, u8 cmd));
-int tsc200x_remove(struct device *dev);
+void tsc200x_remove(struct device *dev);
 
 #endif
diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c
index 22826c3..fe4ea62 100644
--- a/drivers/input/touchscreen/wacom_i2c.c
+++ b/drivers/input/touchscreen/wacom_i2c.c
@@ -6,6 +6,7 @@
  * <tobita.tatsunosuke@wacom.co.jp>
  */
 
+#include <linux/bits.h>
 #include <linux/module.h>
 #include <linux/input.h>
 #include <linux/i2c.h>
@@ -14,6 +15,15 @@
 #include <linux/interrupt.h>
 #include <asm/unaligned.h>
 
+/* Bitmasks (for data[3]) */
+#define WACOM_TIP_SWITCH	BIT(0)
+#define WACOM_BARREL_SWITCH	BIT(1)
+#define WACOM_ERASER		BIT(2)
+#define WACOM_INVERT		BIT(3)
+#define WACOM_BARREL_SWITCH_2	BIT(4)
+#define WACOM_IN_PROXIMITY	BIT(5)
+
+/* Registers */
 #define WACOM_CMD_QUERY0	0x04
 #define WACOM_CMD_QUERY1	0x00
 #define WACOM_CMD_QUERY2	0x33
@@ -99,19 +109,19 @@
 	if (error < 0)
 		goto out;
 
-	tsw = data[3] & 0x01;
-	ers = data[3] & 0x04;
-	f1 = data[3] & 0x02;
-	f2 = data[3] & 0x10;
+	tsw = data[3] & WACOM_TIP_SWITCH;
+	ers = data[3] & WACOM_ERASER;
+	f1 = data[3] & WACOM_BARREL_SWITCH;
+	f2 = data[3] & WACOM_BARREL_SWITCH_2;
 	x = le16_to_cpup((__le16 *)&data[4]);
 	y = le16_to_cpup((__le16 *)&data[6]);
 	pressure = le16_to_cpup((__le16 *)&data[8]);
 
 	if (!wac_i2c->prox)
-		wac_i2c->tool = (data[3] & 0x0c) ?
+		wac_i2c->tool = (data[3] & (WACOM_ERASER | WACOM_INVERT)) ?
 			BTN_TOOL_RUBBER : BTN_TOOL_PEN;
 
-	wac_i2c->prox = data[3] & 0x20;
+	wac_i2c->prox = data[3] & WACOM_IN_PROXIMITY;
 
 	input_report_key(input, BTN_TOUCH, tsw || ers);
 	input_report_key(input, wac_i2c->tool, wac_i2c->prox);
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 13cbeb9..58da08c 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -929,10 +929,8 @@
 {
 	int ret;
 
-	pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n");
-
 	if (!amd_iommu_v2_supported()) {
-		pr_info("AMD IOMMUv2 functionality not available on this system\n");
+		pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
 		/*
 		 * Load anyway to provide the symbols to other modules
 		 * which may use AMD IOMMUv2 optionally.
@@ -947,6 +945,8 @@
 
 	amd_iommu_register_ppr_notifier(&ppr_nb);
 
+	pr_info("AMD IOMMUv2 loaded and initialized\n");
+
 	return 0;
 
 out:
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
index b39d223..71596fc 100644
--- a/drivers/iommu/intel/cap_audit.c
+++ b/drivers/iommu/intel/cap_audit.c
@@ -144,6 +144,7 @@
 {
 	struct dmar_drhd_unit *d;
 	struct intel_iommu *i;
+	int rc = 0;
 
 	rcu_read_lock();
 	if (list_empty(&dmar_drhd_units))
@@ -169,11 +170,11 @@
 	 */
 	if (intel_cap_smts_sanity() &&
 	    !intel_cap_flts_sanity() && !intel_cap_slts_sanity())
-		return -EOPNOTSUPP;
+		rc = -EOPNOTSUPP;
 
 out:
 	rcu_read_unlock();
-	return 0;
+	return rc;
 }
 
 int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 0bde0c8..b6a8f32 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1339,13 +1339,11 @@
 	pte = &pte[pfn_level_offset(pfn, level)];
 
 	do {
-		unsigned long level_pfn;
+		unsigned long level_pfn = pfn & level_mask(level);
 
 		if (!dma_pte_present(pte))
 			goto next;
 
-		level_pfn = pfn & level_mask(level);
-
 		/* If range covers entire pagetable, free it */
 		if (start_pfn <= level_pfn &&
 		    last_pfn >= level_pfn + level_size(level) - 1) {
@@ -1366,7 +1364,7 @@
 						       freelist);
 		}
 next:
-		pfn += level_size(level);
+		pfn = level_pfn + level_size(level);
 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
 
 	if (first_pte)
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 5cb2608..7f23ad6 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -200,8 +200,8 @@
 #define DTE_HI_MASK2	GENMASK(7, 4)
 #define DTE_HI_SHIFT1	24 /* shift bit 8 to bit 32 */
 #define DTE_HI_SHIFT2	32 /* shift bit 4 to bit 36 */
-#define PAGE_DESC_HI_MASK1	GENMASK_ULL(39, 36)
-#define PAGE_DESC_HI_MASK2	GENMASK_ULL(35, 32)
+#define PAGE_DESC_HI_MASK1	GENMASK_ULL(35, 32)
+#define PAGE_DESC_HI_MASK2	GENMASK_ULL(39, 36)
 
 static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
 {
diff --git a/drivers/irqchip/irq-csky-mpintc.c b/drivers/irqchip/irq-csky-mpintc.c
index cb403c9..4aebd67 100644
--- a/drivers/irqchip/irq-csky-mpintc.c
+++ b/drivers/irqchip/irq-csky-mpintc.c
@@ -78,7 +78,7 @@
 		readl_relaxed(reg_base + INTCL_RDYIR));
 }
 
-static void csky_mpintc_enable(struct irq_data *d)
+static void csky_mpintc_unmask(struct irq_data *d)
 {
 	void __iomem *reg_base = this_cpu_read(intcl_reg);
 
@@ -87,7 +87,7 @@
 	writel_relaxed(d->hwirq, reg_base + INTCL_SENR);
 }
 
-static void csky_mpintc_disable(struct irq_data *d)
+static void csky_mpintc_mask(struct irq_data *d)
 {
 	void __iomem *reg_base = this_cpu_read(intcl_reg);
 
@@ -164,8 +164,8 @@
 static struct irq_chip csky_irq_chip = {
 	.name           = "C-SKY SMP Intc",
 	.irq_eoi	= csky_mpintc_eoi,
-	.irq_enable	= csky_mpintc_enable,
-	.irq_disable	= csky_mpintc_disable,
+	.irq_unmask	= csky_mpintc_unmask,
+	.irq_mask	= csky_mpintc_mask,
 	.irq_set_type	= csky_mpintc_set_type,
 #ifdef CONFIG_SMP
 	.irq_set_affinity = csky_irq_set_affinity,
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index cf74cfa..259065d 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -163,7 +163,13 @@
 {
 	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
 
-	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+	if (irqd_irq_masked(d)) {
+		plic_irq_unmask(d);
+		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+		plic_irq_mask(d);
+	} else {
+		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+	}
 }
 
 static struct irq_chip plic_chip = {
diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c
index 79fa36d..cd9cb35 100644
--- a/drivers/media/cec/core/cec-adap.c
+++ b/drivers/media/cec/core/cec-adap.c
@@ -1199,6 +1199,7 @@
 			if (abort)
 				dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
 			msg->flags = dst->flags;
+			msg->sequence = dst->sequence;
 			/* Remove it from the wait_queue */
 			list_del_init(&data->list);
 
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index 1094575..90acafd 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -241,6 +241,7 @@
 	buf->offset = vaddr & ~PAGE_MASK;
 	buf->size = size;
 	buf->dma_sgt = &buf->sg_table;
+	buf->vb = vb;
 	vec = vb2_create_framevec(vaddr, size);
 	if (IS_ERR(vec))
 		goto userptr_fail_pfnvec;
@@ -642,6 +643,7 @@
 	buf->dma_dir = vb->vb2_queue->dma_dir;
 	buf->size = size;
 	buf->db_attach = dba;
+	buf->vb = vb;
 
 	return buf;
 }
diff --git a/drivers/media/i2c/hi846.c b/drivers/media/i2c/hi846.c
index 822ce30..48909fa 100644
--- a/drivers/media/i2c/hi846.c
+++ b/drivers/media/i2c/hi846.c
@@ -7,9 +7,9 @@
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_graph.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm.h>
+#include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
@@ -2176,7 +2176,7 @@
 	.driver = {
 		.name = "hi846",
 		.pm = &hi846_pm_ops,
-		.of_match_table = of_match_ptr(hi846_of_match),
+		.of_match_table = hi846_of_match,
 	},
 	.probe_new = hi846_probe,
 	.remove = hi846_remove,
diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index 8176769..0f3d6b5 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -751,10 +751,6 @@
 /*
  * x86 is the only compat architecture with different struct alignment
  * between 32-bit and 64-bit tasks.
- *
- * On all other architectures, v4l2_event32 and v4l2_event32_time32 are
- * the same as v4l2_event and v4l2_event_time32, so we can use the native
- * handlers, converting v4l2_event to v4l2_event_time32 if necessary.
  */
 struct v4l2_event32 {
 	__u32				type;
@@ -772,21 +768,6 @@
 	__u32				reserved[8];
 };
 
-#ifdef CONFIG_COMPAT_32BIT_TIME
-struct v4l2_event32_time32 {
-	__u32				type;
-	union {
-		compat_s64		value64;
-		__u8			data[64];
-	} u;
-	__u32				pending;
-	__u32				sequence;
-	struct old_timespec32		timestamp;
-	__u32				id;
-	__u32				reserved[8];
-};
-#endif
-
 static int put_v4l2_event32(struct v4l2_event *p64,
 			    struct v4l2_event32 __user *p32)
 {
@@ -802,7 +783,22 @@
 	return 0;
 }
 
+#endif
+
 #ifdef CONFIG_COMPAT_32BIT_TIME
+struct v4l2_event32_time32 {
+	__u32				type;
+	union {
+		compat_s64		value64;
+		__u8			data[64];
+	} u;
+	__u32				pending;
+	__u32				sequence;
+	struct old_timespec32		timestamp;
+	__u32				id;
+	__u32				reserved[8];
+};
+
 static int put_v4l2_event32_time32(struct v4l2_event *p64,
 				   struct v4l2_event32_time32 __user *p32)
 {
@@ -818,7 +814,6 @@
 	return 0;
 }
 #endif
-#endif
 
 struct v4l2_edid32 {
 	__u32 pad;
@@ -880,9 +875,7 @@
 #define VIDIOC_QUERYBUF32_TIME32	_IOWR('V',  9, struct v4l2_buffer32_time32)
 #define VIDIOC_QBUF32_TIME32		_IOWR('V', 15, struct v4l2_buffer32_time32)
 #define VIDIOC_DQBUF32_TIME32		_IOWR('V', 17, struct v4l2_buffer32_time32)
-#ifdef CONFIG_X86_64
 #define	VIDIOC_DQEVENT32_TIME32		_IOR ('V', 89, struct v4l2_event32_time32)
-#endif
 #define VIDIOC_PREPARE_BUF32_TIME32	_IOWR('V', 93, struct v4l2_buffer32_time32)
 #endif
 
@@ -936,11 +929,11 @@
 #ifdef CONFIG_X86_64
 	case VIDIOC_DQEVENT32:
 		return VIDIOC_DQEVENT;
+#endif
 #ifdef CONFIG_COMPAT_32BIT_TIME
 	case VIDIOC_DQEVENT32_TIME32:
 		return VIDIOC_DQEVENT;
 #endif
-#endif
 	}
 	return cmd;
 }
@@ -1032,11 +1025,11 @@
 #ifdef CONFIG_X86_64
 	case VIDIOC_DQEVENT32:
 		return put_v4l2_event32(parg, arg);
+#endif
 #ifdef CONFIG_COMPAT_32BIT_TIME
 	case VIDIOC_DQEVENT32_TIME32:
 		return put_v4l2_event32_time32(parg, arg);
 #endif
-#endif
 	}
 	return 0;
 }
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index b883dcc..e201e59 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -241,7 +241,7 @@
 {
 	struct mtk_smi_larb *larb = dev_get_drvdata(dev);
 	u32 reg, flags_general = larb->larb_gen->flags_general;
-	const u8 *larbostd = larb->larb_gen->ostd[larb->larbid];
+	const u8 *larbostd = larb->larb_gen->ostd ? larb->larb_gen->ostd[larb->larbid] : NULL;
 	int i;
 
 	if (BIT(larb->larbid) & larb->larb_gen->larb_direct_to_common_mask)
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index f4c8e1a..b431cdd 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1514,6 +1514,12 @@
 	return 0;
 }
 
+static const struct spi_device_id mmc_spi_dev_ids[] = {
+	{ "mmc-spi-slot"},
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, mmc_spi_dev_ids);
+
 static const struct of_device_id mmc_spi_of_match_table[] = {
 	{ .compatible = "mmc-spi-slot", },
 	{},
@@ -1525,6 +1531,7 @@
 		.name =		"mmc_spi",
 		.of_match_table = mmc_spi_of_match_table,
 	},
+	.id_table =	mmc_spi_dev_ids,
 	.probe =	mmc_spi_probe,
 	.remove =	mmc_spi_remove,
 };
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index afaf337..764ee1b 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -310,7 +310,6 @@
 	.flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
 			| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
 			| ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
-			| ESDHC_FLAG_CQHCI
 			| ESDHC_FLAG_STATE_LOST_IN_LPMODE
 			| ESDHC_FLAG_CLK_RATE_LOST_IN_PM_RUNTIME,
 };
@@ -319,7 +318,6 @@
 	.flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
 			| ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
 			| ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
-			| ESDHC_FLAG_CQHCI
 			| ESDHC_FLAG_STATE_LOST_IN_LPMODE,
 };
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 269c8656..07c6da1 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -771,7 +771,19 @@
 			len -= offset;
 		}
 
-		BUG_ON(len > 65536);
+		/*
+		 * The block layer forces a minimum segment size of PAGE_SIZE,
+		 * so 'len' can be too big here if PAGE_SIZE >= 64KiB. Write
+		 * multiple descriptors, noting that the ADMA table is sized
+		 * for 4KiB chunks anyway, so it will be big enough.
+		 */
+		while (len > host->max_adma) {
+			int n = 32 * 1024; /* 32KiB*/
+
+			__sdhci_adma_write_desc(host, &desc, addr, n, ADMA2_TRAN_VALID);
+			addr += n;
+			len -= n;
+		}
 
 		/* tran, valid */
 		if (len)
@@ -3968,6 +3980,7 @@
 	 * descriptor for each segment, plus 1 for a nop end descriptor.
 	 */
 	host->adma_table_cnt = SDHCI_MAX_SEGS * 2 + 1;
+	host->max_adma = 65536;
 
 	host->max_timeout_count = 0xE;
 
@@ -4633,10 +4646,12 @@
 	 * be larger than 64 KiB though.
 	 */
 	if (host->flags & SDHCI_USE_ADMA) {
-		if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC)
+		if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) {
+			host->max_adma = 65532; /* 32-bit alignment */
 			mmc->max_seg_size = 65535;
-		else
+		} else {
 			mmc->max_seg_size = 65536;
+		}
 	} else {
 		mmc->max_seg_size = mmc->max_req_size;
 	}
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index bb88355..d7929d7 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -340,7 +340,8 @@
 
 /*
  * Maximum segments assuming a 512KiB maximum requisition size and a minimum
- * 4KiB page size.
+ * 4KiB page size. Note this also allows enough for multiple descriptors in
+ * case of PAGE_SIZE >= 64KiB.
  */
 #define SDHCI_MAX_SEGS		128
 
@@ -543,6 +544,7 @@
 	unsigned int blocks;	/* remaining PIO blocks */
 
 	int sg_count;		/* Mapped sg entries */
+	int max_adma;		/* Max. length in ADMA descriptor */
 
 	void *adma_table;	/* ADMA descriptor table */
 	void *align_buffer;	/* Bounce buffer */
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 10506a4..6cccc3d 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -567,9 +567,7 @@
 config VMXNET3
 	tristate "VMware VMXNET3 ethernet driver"
 	depends on PCI && INET
-	depends on !(PAGE_SIZE_64KB || ARM64_64K_PAGES || \
-		     IA64_PAGE_SIZE_64KB || PARISC_PAGE_SIZE_64KB || \
-		     PPC_64K_PAGES)
+	depends on PAGE_SIZE_LESS_THAN_64KB
 	help
 	  This driver supports VMware's vmxnet3 virtual ethernet NIC.
 	  To compile this driver as a module, choose M here: the
diff --git a/drivers/net/amt.c b/drivers/net/amt.c
index 47a04c3..b732ee9 100644
--- a/drivers/net/amt.c
+++ b/drivers/net/amt.c
@@ -3286,7 +3286,7 @@
 {
 	rtnl_link_unregister(&amt_link_ops);
 	unregister_netdevice_notifier(&amt_notifier_block);
-	cancel_delayed_work(&source_gc_wq);
+	cancel_delayed_work_sync(&source_gc_wq);
 	__amt_source_gc_work();
 	destroy_workqueue(amt_wq);
 }
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 43fc308..013e9c0 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -1002,57 +1002,32 @@
 	data &= ~PORT_VLAN_MEMBERSHIP;
 	data |= (member & dev->port_mask);
 	ksz_pwrite8(dev, port, P_MIRROR_CTRL, data);
-	dev->ports[port].member = member;
 }
 
 static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 {
 	struct ksz_device *dev = ds->priv;
-	int forward = dev->member;
 	struct ksz_port *p;
-	int member = -1;
 	u8 data;
 
-	p = &dev->ports[port];
-
 	ksz_pread8(dev, port, P_STP_CTRL, &data);
 	data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
 
 	switch (state) {
 	case BR_STATE_DISABLED:
 		data |= PORT_LEARN_DISABLE;
-		if (port < dev->phy_port_cnt)
-			member = 0;
 		break;
 	case BR_STATE_LISTENING:
 		data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE);
-		if (port < dev->phy_port_cnt &&
-		    p->stp_state == BR_STATE_DISABLED)
-			member = dev->host_mask | p->vid_member;
 		break;
 	case BR_STATE_LEARNING:
 		data |= PORT_RX_ENABLE;
 		break;
 	case BR_STATE_FORWARDING:
 		data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
-
-		/* This function is also used internally. */
-		if (port == dev->cpu_port)
-			break;
-
-		/* Port is a member of a bridge. */
-		if (dev->br_member & BIT(port)) {
-			dev->member |= BIT(port);
-			member = dev->member;
-		} else {
-			member = dev->host_mask | p->vid_member;
-		}
 		break;
 	case BR_STATE_BLOCKING:
 		data |= PORT_LEARN_DISABLE;
-		if (port < dev->phy_port_cnt &&
-		    p->stp_state == BR_STATE_DISABLED)
-			member = dev->host_mask | p->vid_member;
 		break;
 	default:
 		dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -1060,22 +1035,11 @@
 	}
 
 	ksz_pwrite8(dev, port, P_STP_CTRL, data);
+
+	p = &dev->ports[port];
 	p->stp_state = state;
-	/* Port membership may share register with STP state. */
-	if (member >= 0 && member != p->member)
-		ksz8_cfg_port_member(dev, port, (u8)member);
 
-	/* Check if forwarding needs to be updated. */
-	if (state != BR_STATE_FORWARDING) {
-		if (dev->br_member & BIT(port))
-			dev->member &= ~BIT(port);
-	}
-
-	/* When topology has changed the function ksz_update_port_member
-	 * should be called to modify port forwarding behavior.
-	 */
-	if (forward != dev->member)
-		ksz_update_port_member(dev, port);
+	ksz_update_port_member(dev, port);
 }
 
 static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
@@ -1341,7 +1305,7 @@
 
 static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 {
-	struct ksz_port *p = &dev->ports[port];
+	struct dsa_switch *ds = dev->ds;
 	struct ksz8 *ksz8 = dev->priv;
 	const u32 *masks;
 	u8 member;
@@ -1368,10 +1332,11 @@
 		if (!ksz_is_ksz88x3(dev))
 			ksz8795_cpu_interface_select(dev, port);
 
-		member = dev->port_mask;
+		member = dsa_user_ports(ds);
 	} else {
-		member = dev->host_mask | p->vid_member;
+		member = BIT(dsa_upstream_port(ds, port));
 	}
+
 	ksz8_cfg_port_member(dev, port, member);
 }
 
@@ -1392,20 +1357,13 @@
 	ksz_cfg(dev, regs[S_TAIL_TAG_CTRL], masks[SW_TAIL_TAG_ENABLE], true);
 
 	p = &dev->ports[dev->cpu_port];
-	p->vid_member = dev->port_mask;
 	p->on = 1;
 
 	ksz8_port_setup(dev, dev->cpu_port, true);
-	dev->member = dev->host_mask;
 
 	for (i = 0; i < dev->phy_port_cnt; i++) {
 		p = &dev->ports[i];
 
-		/* Initialize to non-zero so that ksz_cfg_port_member() will
-		 * be called.
-		 */
-		p->vid_member = BIT(i);
-		p->member = dev->port_mask;
 		ksz8_port_stp_state_set(ds, i, BR_STATE_DISABLED);
 
 		/* Last port may be disabled. */
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 854e25f..353b5f9 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -391,7 +391,6 @@
 				    u8 member)
 {
 	ksz_pwrite32(dev, port, REG_PORT_VLAN_MEMBERSHIP__4, member);
-	dev->ports[port].member = member;
 }
 
 static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
@@ -400,8 +399,6 @@
 	struct ksz_device *dev = ds->priv;
 	struct ksz_port *p = &dev->ports[port];
 	u8 data;
-	int member = -1;
-	int forward = dev->member;
 
 	ksz_pread8(dev, port, P_STP_CTRL, &data);
 	data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
@@ -409,40 +406,18 @@
 	switch (state) {
 	case BR_STATE_DISABLED:
 		data |= PORT_LEARN_DISABLE;
-		if (port != dev->cpu_port)
-			member = 0;
 		break;
 	case BR_STATE_LISTENING:
 		data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE);
-		if (port != dev->cpu_port &&
-		    p->stp_state == BR_STATE_DISABLED)
-			member = dev->host_mask | p->vid_member;
 		break;
 	case BR_STATE_LEARNING:
 		data |= PORT_RX_ENABLE;
 		break;
 	case BR_STATE_FORWARDING:
 		data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
-
-		/* This function is also used internally. */
-		if (port == dev->cpu_port)
-			break;
-
-		member = dev->host_mask | p->vid_member;
-		mutex_lock(&dev->dev_mutex);
-
-		/* Port is a member of a bridge. */
-		if (dev->br_member & (1 << port)) {
-			dev->member |= (1 << port);
-			member = dev->member;
-		}
-		mutex_unlock(&dev->dev_mutex);
 		break;
 	case BR_STATE_BLOCKING:
 		data |= PORT_LEARN_DISABLE;
-		if (port != dev->cpu_port &&
-		    p->stp_state == BR_STATE_DISABLED)
-			member = dev->host_mask | p->vid_member;
 		break;
 	default:
 		dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -451,23 +426,8 @@
 
 	ksz_pwrite8(dev, port, P_STP_CTRL, data);
 	p->stp_state = state;
-	mutex_lock(&dev->dev_mutex);
-	/* Port membership may share register with STP state. */
-	if (member >= 0 && member != p->member)
-		ksz9477_cfg_port_member(dev, port, (u8)member);
 
-	/* Check if forwarding needs to be updated. */
-	if (state != BR_STATE_FORWARDING) {
-		if (dev->br_member & (1 << port))
-			dev->member &= ~(1 << port);
-	}
-
-	/* When topology has changed the function ksz_update_port_member
-	 * should be called to modify port forwarding behavior.
-	 */
-	if (forward != dev->member)
-		ksz_update_port_member(dev, port);
-	mutex_unlock(&dev->dev_mutex);
+	ksz_update_port_member(dev, port);
 }
 
 static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
@@ -1168,10 +1128,10 @@
 
 static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 {
-	u8 data8;
-	u8 member;
-	u16 data16;
 	struct ksz_port *p = &dev->ports[port];
+	struct dsa_switch *ds = dev->ds;
+	u8 data8, member;
+	u16 data16;
 
 	/* enable tag tail for host port */
 	if (cpu_port)
@@ -1250,12 +1210,12 @@
 		ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8);
 		p->phydev.duplex = 1;
 	}
-	mutex_lock(&dev->dev_mutex);
+
 	if (cpu_port)
-		member = dev->port_mask;
+		member = dsa_user_ports(ds);
 	else
-		member = dev->host_mask | p->vid_member;
-	mutex_unlock(&dev->dev_mutex);
+		member = BIT(dsa_upstream_port(ds, port));
+
 	ksz9477_cfg_port_member(dev, port, member);
 
 	/* clear pending interrupts */
@@ -1276,8 +1236,6 @@
 			const char *prev_mode;
 
 			dev->cpu_port = i;
-			dev->host_mask = (1 << dev->cpu_port);
-			dev->port_mask |= dev->host_mask;
 			p = &dev->ports[i];
 
 			/* Read from XMII register to determine host port
@@ -1312,23 +1270,15 @@
 
 			/* enable cpu port */
 			ksz9477_port_setup(dev, i, true);
-			p->vid_member = dev->port_mask;
 			p->on = 1;
 		}
 	}
 
-	dev->member = dev->host_mask;
-
 	for (i = 0; i < dev->port_cnt; i++) {
 		if (i == dev->cpu_port)
 			continue;
 		p = &dev->ports[i];
 
-		/* Initialize to non-zero so that ksz_cfg_port_member() will
-		 * be called.
-		 */
-		p->vid_member = (1 << i);
-		p->member = dev->port_mask;
 		ksz9477_port_stp_state_set(ds, i, BR_STATE_DISABLED);
 		p->on = 1;
 		if (i < dev->phy_port_cnt)
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 7c2968a..8a04302 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -22,21 +22,40 @@
 
 void ksz_update_port_member(struct ksz_device *dev, int port)
 {
-	struct ksz_port *p;
+	struct ksz_port *p = &dev->ports[port];
+	struct dsa_switch *ds = dev->ds;
+	u8 port_member = 0, cpu_port;
+	const struct dsa_port *dp;
 	int i;
 
-	for (i = 0; i < dev->port_cnt; i++) {
-		if (i == port || i == dev->cpu_port)
+	if (!dsa_is_user_port(ds, port))
+		return;
+
+	dp = dsa_to_port(ds, port);
+	cpu_port = BIT(dsa_upstream_port(ds, port));
+
+	for (i = 0; i < ds->num_ports; i++) {
+		const struct dsa_port *other_dp = dsa_to_port(ds, i);
+		struct ksz_port *other_p = &dev->ports[i];
+		u8 val = 0;
+
+		if (!dsa_is_user_port(ds, i))
 			continue;
-		p = &dev->ports[i];
-		if (!(dev->member & (1 << i)))
+		if (port == i)
+			continue;
+		if (!dp->bridge_dev || dp->bridge_dev != other_dp->bridge_dev)
 			continue;
 
-		/* Port is a member of the bridge and is forwarding. */
-		if (p->stp_state == BR_STATE_FORWARDING &&
-		    p->member != dev->member)
-			dev->dev_ops->cfg_port_member(dev, i, dev->member);
+		if (other_p->stp_state == BR_STATE_FORWARDING &&
+		    p->stp_state == BR_STATE_FORWARDING) {
+			val |= BIT(port);
+			port_member |= BIT(i);
+		}
+
+		dev->dev_ops->cfg_port_member(dev, i, val | cpu_port);
 	}
+
+	dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port);
 }
 EXPORT_SYMBOL_GPL(ksz_update_port_member);
 
@@ -175,12 +194,6 @@
 int ksz_port_bridge_join(struct dsa_switch *ds, int port,
 			 struct net_device *br)
 {
-	struct ksz_device *dev = ds->priv;
-
-	mutex_lock(&dev->dev_mutex);
-	dev->br_member |= (1 << port);
-	mutex_unlock(&dev->dev_mutex);
-
 	/* port_stp_state_set() will be called after to put the port in
 	 * appropriate state so there is no need to do anything.
 	 */
@@ -192,13 +205,6 @@
 void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
 			   struct net_device *br)
 {
-	struct ksz_device *dev = ds->priv;
-
-	mutex_lock(&dev->dev_mutex);
-	dev->br_member &= ~(1 << port);
-	dev->member &= ~(1 << port);
-	mutex_unlock(&dev->dev_mutex);
-
 	/* port_stp_state_set() will be called after to put the port in
 	 * forwarding state so there is no need to do anything.
 	 */
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 1597c63..54b456b 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -25,8 +25,6 @@
 };
 
 struct ksz_port {
-	u16 member;
-	u16 vid_member;
 	bool remove_tag;		/* Remove Tag flag set, for ksz8795 only */
 	int stp_state;
 	struct phy_device phydev;
@@ -83,8 +81,6 @@
 	struct ksz_port *ports;
 	struct delayed_work mib_read;
 	unsigned long mib_read_interval;
-	u16 br_member;
-	u16 member;
 	u16 mirror_rx;
 	u16 mirror_tx;
 	u32 features;			/* chip specific features */
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index a429c97..147ca39 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1256,8 +1256,12 @@
 		/* Set initial MTU for every port.
 		 * We have only have a general MTU setting. So track
 		 * every port and set the max across all port.
+		 * Set per port MTU to 1500 as the MTU change function
+		 * will add the overhead and if its set to 1518 then it
+		 * will apply the overhead again and we will end up with
+		 * MTU of 1536 instead of 1518
 		 */
-		priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN;
+		priv->port_mtu[i] = ETH_DATA_LEN;
 	}
 
 	/* Special GLOBAL_FC_THRESH value are needed for ar8327 switch */
@@ -1433,6 +1437,12 @@
 
 		qca8k_write(priv, QCA8K_REG_SGMII_CTRL, val);
 
+		/* From original code is reported port instability as SGMII also
+		 * require delay set. Apply advised values here or take them from DT.
+		 */
+		if (state->interface == PHY_INTERFACE_MODE_SGMII)
+			qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg);
+
 		/* For qca8327/qca8328/qca8334/qca8338 sgmii is unique and
 		 * falling edge is set writing in the PORT0 PAD reg
 		 */
@@ -1455,12 +1465,6 @@
 					QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE,
 					val);
 
-		/* From original code is reported port instability as SGMII also
-		 * require delay set. Apply advised values here or take them from DT.
-		 */
-		if (state->interface == PHY_INTERFACE_MODE_SGMII)
-			qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg);
-
 		break;
 	default:
 		dev_err(ds->dev, "xMII mode %s not supported for port %d\n",
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 24122cc..81b3756 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -298,13 +298,14 @@
 			}
 		}
 
-		if (unlikely(buff->is_eop)) {
+		if (unlikely(buff->is_eop && buff->skb)) {
 			u64_stats_update_begin(&self->stats.tx.syncp);
 			++self->stats.tx.packets;
 			self->stats.tx.bytes += buff->skb->len;
 			u64_stats_update_end(&self->stats.tx.syncp);
 
 			dev_kfree_skb_any(buff->skb);
+			buff->skb = NULL;
 		}
 		buff->pa = 0U;
 		buff->eop_index = 0xffffU;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index fc0e660..3f1704c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -559,6 +559,11 @@
 			goto err_exit;
 
 		if (fw.len == 0xFFFFU) {
+			if (sw.len > sizeof(self->rpc)) {
+				printk(KERN_INFO "Invalid sw len: %x\n", sw.len);
+				err = -EINVAL;
+				goto err_exit;
+			}
 			err = hw_atl_utils_fw_rpc_call(self, sw.len);
 			if (err < 0)
 				goto err_exit;
@@ -567,6 +572,11 @@
 
 	if (rpc) {
 		if (fw.len) {
+			if (fw.len > sizeof(self->rpc)) {
+				printk(KERN_INFO "Invalid fw len: %x\n", fw.len);
+				err = -EINVAL;
+				goto err_exit;
+			}
 			err =
 			hw_atl_utils_fw_downld_dwords(self,
 						      self->rpc_addr,
diff --git a/drivers/net/ethernet/asix/ax88796c_main.h b/drivers/net/ethernet/asix/ax88796c_main.h
index 80263c3..4a83c99 100644
--- a/drivers/net/ethernet/asix/ax88796c_main.h
+++ b/drivers/net/ethernet/asix/ax88796c_main.h
@@ -127,9 +127,9 @@
 		#define AX_PRIV_FLAGS_MASK	(AX_CAP_COMP)
 
 	unsigned long		flags;
-		#define EVENT_INTR		BIT(0)
-		#define EVENT_TX		BIT(1)
-		#define EVENT_SET_MULTI		BIT(2)
+		#define EVENT_INTR		0
+		#define EVENT_TX		1
+		#define EVENT_SET_MULTI		2
 
 };
 
diff --git a/drivers/net/ethernet/asix/ax88796c_spi.c b/drivers/net/ethernet/asix/ax88796c_spi.c
index 94df4f9..0710e71 100644
--- a/drivers/net/ethernet/asix/ax88796c_spi.c
+++ b/drivers/net/ethernet/asix/ax88796c_spi.c
@@ -34,7 +34,7 @@
 
 	/* OP */
 	ax_spi->cmd_buf[0] = AX_SPICMD_READ_STATUS;
-	ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)&status, 3);
+	ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)status, 3);
 	if (ret)
 		dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret);
 	else
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
index 1835d2e..fc7fce6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h
@@ -635,11 +635,13 @@
 {
 	int i, rc;
 	struct bnx2x_ilt *ilt = BP_ILT(bp);
-	struct ilt_client_info *ilt_cli = &ilt->clients[cli_num];
+	struct ilt_client_info *ilt_cli;
 
 	if (!ilt || !ilt->lines)
 		return -1;
 
+	ilt_cli = &ilt->clients[cli_num];
+
 	if (ilt_cli->flags & (ILT_CLIENT_SKIP_INIT | ILT_CLIENT_SKIP_MEM))
 		return 0;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index d0d5da9..4c9507d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2258,6 +2258,16 @@
 	}
 }
 
+/* Must hold rtnl_lock */
+static inline bool bnxt_sriov_cfg(struct bnxt *bp)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+	return BNXT_PF(bp) && (bp->pf.active_vfs || bp->sriov_cfg);
+#else
+	return false;
+#endif
+}
+
 extern const u16 bnxt_lhint_arr[];
 
 int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 5c464ea..951c4c5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -360,7 +360,7 @@
 		NL_SET_ERR_MSG_MOD(extack, "Live patch already applied");
 		break;
 	default:
-		netdev_err(bp->dev, "Unexpected live patch error: %hhd\n", err);
+		netdev_err(bp->dev, "Unexpected live patch error: %d\n", err);
 		NL_SET_ERR_MSG_MOD(extack, "Failed to activate live patch");
 		break;
 	}
@@ -441,12 +441,13 @@
 
 	switch (action) {
 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: {
-		if (BNXT_PF(bp) && bp->pf.active_vfs) {
+		rtnl_lock();
+		if (bnxt_sriov_cfg(bp)) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "reload is unsupported when VFs are allocated");
+					   "reload is unsupported while VFs are allocated or being configured");
+			rtnl_unlock();
 			return -EOPNOTSUPP;
 		}
-		rtnl_lock();
 		if (bp->dev->reg_state == NETREG_UNREGISTERED) {
 			rtnl_unlock();
 			return -ENODEV;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index e6a4a76..1471b61 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -1868,7 +1868,7 @@
 	struct flow_cls_offload *flower = type_data;
 	struct bnxt *bp = priv->bp;
 
-	if (flower->common.chain_index)
+	if (!tc_cls_can_offload_and_chain0(bp->dev, type_data))
 		return -EOPNOTSUPP;
 
 	switch (type) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 64479c4..ae9cca7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -3196,6 +3196,7 @@
 	}
 	if (adapter->registered_device_map == 0) {
 		dev_err(&pdev->dev, "could not register any net devices\n");
+		err = -EINVAL;
 		goto err_disable_interrupts;
 	}
 
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 13121c4..71730ef 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -4709,6 +4709,10 @@
         lp->ibn = 3;
         lp->active = *p++;
 	if (MOTO_SROM_BUG) lp->active = 0;
+	/* if (MOTO_SROM_BUG) statement indicates lp->active could
+	 * be 8 (i.e. the size of array lp->phy) */
+	if (WARN_ON(lp->active >= ARRAY_SIZE(lp->phy)))
+		return -EINVAL;
 	lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1);
 	lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1);
 	lp->phy[lp->active].mc  = get_unaligned_le16(p); p += 2;
@@ -5000,19 +5004,23 @@
 	}
 	if ((j == limit) && (i < DE4X5_MAX_MII)) {
 	    for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++);
-	    lp->phy[k].addr = i;
-	    lp->phy[k].id = id;
-	    lp->phy[k].spd.reg = GENERIC_REG;      /* ANLPA register         */
-	    lp->phy[k].spd.mask = GENERIC_MASK;    /* 100Mb/s technologies   */
-	    lp->phy[k].spd.value = GENERIC_VALUE;  /* TX & T4, H/F Duplex    */
-	    lp->mii_cnt++;
-	    lp->active++;
-	    printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name);
-	    j = de4x5_debug;
-	    de4x5_debug |= DEBUG_MII;
-	    de4x5_dbg_mii(dev, k);
-	    de4x5_debug = j;
-	    printk("\n");
+	    if (k < DE4X5_MAX_PHY) {
+		lp->phy[k].addr = i;
+		lp->phy[k].id = id;
+		lp->phy[k].spd.reg = GENERIC_REG;      /* ANLPA register         */
+		lp->phy[k].spd.mask = GENERIC_MASK;    /* 100Mb/s technologies   */
+		lp->phy[k].spd.value = GENERIC_VALUE;  /* TX & T4, H/F Duplex    */
+		lp->mii_cnt++;
+		lp->active++;
+		printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name);
+		j = de4x5_debug;
+		de4x5_debug |= DEBUG_MII;
+		de4x5_dbg_mii(dev, k);
+		de4x5_debug = j;
+		printk("\n");
+	    } else {
+		goto purgatory;
+	    }
 	}
     }
   purgatory:
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 714e961..6451c83 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -4550,10 +4550,10 @@
 
 	fsl_mc_portal_free(priv->mc_io);
 
-	free_netdev(net_dev);
-
 	dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name);
 
+	free_netdev(net_dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 23d9cbf..740850b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -400,6 +400,10 @@
 		return;
 
 	if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
+		/* DSAF_MAX_PORT_NUM is 6, but DSAF_GE_NUM is 8.
+		   We need check to prevent array overflow */
+		if (port >= DSAF_MAX_PORT_NUM)
+			return;
 		reg_val_1  = 0x1 << port;
 		port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off;
 		/* there is difference between V1 and V2 in register.*/
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 67364ab..081295b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -1081,7 +1081,8 @@
 	u32 j = 0;
 
 	sprintf(result[j++], "%u", index);
-	sprintf(result[j++], "%u", ring->page_pool->pages_state_hold_cnt);
+	sprintf(result[j++], "%u",
+		READ_ONCE(ring->page_pool->pages_state_hold_cnt));
 	sprintf(result[j++], "%u",
 		atomic_read(&ring->page_pool->pages_state_release_cnt));
 	sprintf(result[j++], "%u", ring->page_pool->p.pool_size);
@@ -1106,6 +1107,11 @@
 		return -EFAULT;
 	}
 
+	if (!priv->ring[h->kinfo.num_tqps].page_pool) {
+		dev_err(&h->pdev->dev, "page pool is not initialized\n");
+		return -EFAULT;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(page_pool_info_items); i++)
 		result[i] = &data_str[i][0];
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index c8442b8..c9b4568 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -987,6 +987,7 @@
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
 	const struct hns3_reset_type_map *rst_type_map;
+	enum ethtool_reset_flags rst_flags;
 	u32 i, size;
 
 	if (ops->ae_dev_resetting && ops->ae_dev_resetting(h))
@@ -1006,6 +1007,7 @@
 	for (i = 0; i < size; i++) {
 		if (rst_type_map[i].rst_flags == *flags) {
 			rst_type = rst_type_map[i].rst_type;
+			rst_flags = rst_type_map[i].rst_flags;
 			break;
 		}
 	}
@@ -1021,6 +1023,8 @@
 
 	ops->reset_event(h->pdev, h);
 
+	*flags &= ~rst_flags;
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 25c419d..41afaee 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -703,9 +703,9 @@
 	roundup_size = ilog2(roundup_size);
 
 	for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) {
-		tc_valid[i] = !!(hdev->hw_tc_map & BIT(i));
+		tc_valid[i] = 1;
 		tc_size[i] = roundup_size;
-		tc_offset[i] = rss_size * i;
+		tc_offset[i] = (hdev->hw_tc_map & BIT(i)) ? rss_size * i : 0;
 	}
 
 	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false);
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 5039a25..0bf3d47b 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -3003,9 +3003,10 @@
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct nic *nic = netdev_priv(netdev);
 
+	netif_device_detach(netdev);
+
 	if (netif_running(netdev))
 		e100_down(nic);
-	netif_device_detach(netdev);
 
 	if ((nic->flags & wol_magic) | e100_asf(nic)) {
 		/* enable reverse auto-negotiation */
@@ -3022,7 +3023,7 @@
 		*enable_wake = false;
 	}
 
-	pci_clear_master(pdev);
+	pci_disable_device(pdev);
 }
 
 static int __e100_power_off(struct pci_dev *pdev, bool wake)
@@ -3042,8 +3043,6 @@
 
 	__e100_shutdown(to_pci_dev(dev_d), &wake);
 
-	device_wakeup_disable(dev_d);
-
 	return 0;
 }
 
@@ -3051,6 +3050,14 @@
 {
 	struct net_device *netdev = dev_get_drvdata(dev_d);
 	struct nic *nic = netdev_priv(netdev);
+	int err;
+
+	err = pci_enable_device(to_pci_dev(dev_d));
+	if (err) {
+		netdev_err(netdev, "Resume cannot enable PCI device, aborting\n");
+		return err;
+	}
+	pci_set_master(to_pci_dev(dev_d));
 
 	/* disable reverse auto-negotiation */
 	if (nic->phy == phy_82552_v) {
@@ -3062,10 +3069,11 @@
 		           smartspeed & ~(E100_82552_REV_ANEG));
 	}
 
-	netif_device_attach(netdev);
 	if (netif_running(netdev))
 		e100_up(nic);
 
+	netif_device_attach(netdev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 3d528fb..4d939af 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -161,6 +161,7 @@
 	__I40E_VSI_OVERFLOW_PROMISC,
 	__I40E_VSI_REINIT_REQUESTED,
 	__I40E_VSI_DOWN_REQUESTED,
+	__I40E_VSI_RELEASING,
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_VSI_STATE_SIZE__,
 };
@@ -1247,6 +1248,7 @@
 void i40e_ptp_init(struct i40e_pf *pf);
 void i40e_ptp_stop(struct i40e_pf *pf);
 int i40e_ptp_alloc_pins(struct i40e_pf *pf);
+int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset);
 int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
 i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf);
 i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ba86213..e118cf9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1790,6 +1790,7 @@
 				     bool is_add)
 {
 	struct i40e_pf *pf = vsi->back;
+	u16 num_tc_qps = 0;
 	u16 sections = 0;
 	u8 netdev_tc = 0;
 	u16 numtc = 1;
@@ -1797,13 +1798,33 @@
 	u8 offset;
 	u16 qmap;
 	int i;
-	u16 num_tc_qps = 0;
 
 	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
 	offset = 0;
+	/* zero out queue mapping, it will get updated on the end of the function */
+	memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping));
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		/* This code helps add more queue to the VSI if we have
+		 * more cores than RSS can support, the higher cores will
+		 * be served by ATR or other filters. Furthermore, the
+		 * non-zero req_queue_pairs says that user requested a new
+		 * queue count via ethtool's set_channels, so use this
+		 * value for queues distribution across traffic classes
+		 */
+		if (vsi->req_queue_pairs > 0)
+			vsi->num_queue_pairs = vsi->req_queue_pairs;
+		else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+			vsi->num_queue_pairs = pf->num_lan_msix;
+	}
 
 	/* Number of queues per enabled TC */
-	num_tc_qps = vsi->alloc_queue_pairs;
+	if (vsi->type == I40E_VSI_MAIN ||
+	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0))
+		num_tc_qps = vsi->num_queue_pairs;
+	else
+		num_tc_qps = vsi->alloc_queue_pairs;
+
 	if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
 		/* Find numtc from enabled TC bitmap */
 		for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
@@ -1881,15 +1902,11 @@
 		}
 		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
 	}
-
-	/* Set actual Tx/Rx queue pairs */
-	vsi->num_queue_pairs = offset;
-	if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) {
-		if (vsi->req_queue_pairs > 0)
-			vsi->num_queue_pairs = vsi->req_queue_pairs;
-		else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
-			vsi->num_queue_pairs = pf->num_lan_msix;
-	}
+	/* Do not change previously set num_queue_pairs for PFs and VFs*/
+	if ((vsi->type == I40E_VSI_MAIN && numtc != 1) ||
+	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) ||
+	    (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV))
+		vsi->num_queue_pairs = offset;
 
 	/* Scheduler section valid can only be set for ADD VSI */
 	if (is_add) {
@@ -2623,7 +2640,8 @@
 
 	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] &&
-		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) {
+		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
+		    !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) {
 			int ret = i40e_sync_vsi_filters(pf->vsi[v]);
 
 			if (ret) {
@@ -5427,6 +5445,58 @@
 }
 
 /**
+ * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI
+ * @vsi: the VSI being reconfigured
+ * @vsi_offset: offset from main VF VSI
+ */
+int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset)
+{
+	struct i40e_vsi_context ctxt = {};
+	struct i40e_pf *pf;
+	struct i40e_hw *hw;
+	int ret;
+
+	if (!vsi)
+		return I40E_ERR_PARAM;
+	pf = vsi->back;
+	hw = &pf->hw;
+
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = hw->pf_id;
+	ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+	ctxt.flags = I40E_AQ_VSI_TYPE_VF;
+	ctxt.info = vsi->info;
+
+	i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc,
+				 false);
+	if (vsi->reconfig_rss) {
+		vsi->rss_size = min_t(int, pf->alloc_rss_size,
+				      vsi->num_queue_pairs);
+		ret = i40e_vsi_config_rss(vsi);
+		if (ret) {
+			dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n");
+			return ret;
+		}
+		vsi->reconfig_rss = false;
+	}
+
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "Update vsi config failed, err %s aq_err %s\n",
+			 i40e_stat_str(hw, ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+	}
+	/* update the local VSI info with updated queue map */
+	i40e_vsi_update_queue_map(vsi, &ctxt);
+	vsi->info.valid_sections = 0;
+
+	return ret;
+}
+
+/**
  * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map
  * @vsi: VSI to be configured
  * @enabled_tc: TC bitmap
@@ -5717,24 +5787,6 @@
 }
 
 /**
- * i40e_is_any_channel - channel exist or not
- * @vsi: ptr to VSI to which channels are associated with
- *
- * Returns true or false if channel(s) exist for associated VSI or not
- **/
-static bool i40e_is_any_channel(struct i40e_vsi *vsi)
-{
-	struct i40e_channel *ch, *ch_tmp;
-
-	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
-		if (ch->initialized)
-			return true;
-	}
-
-	return false;
-}
-
-/**
  * i40e_get_max_queues_for_channel
  * @vsi: ptr to VSI to which channels are associated with
  *
@@ -6240,26 +6292,15 @@
 	/* By default we are in VEPA mode, if this is the first VF/VMDq
 	 * VSI to be added switch to VEB mode.
 	 */
-	if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
-	    (!i40e_is_any_channel(vsi))) {
-		if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
-			dev_dbg(&pf->pdev->dev,
-				"Failed to create channel. Override queues (%u) not power of 2\n",
-				vsi->tc_config.tc_info[0].qcount);
-			return -EINVAL;
-		}
 
-		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
-			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+	if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+		pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
 
-			if (vsi->type == I40E_VSI_MAIN) {
-				if (pf->flags & I40E_FLAG_TC_MQPRIO)
-					i40e_do_reset(pf, I40E_PF_RESET_FLAG,
-						      true);
-				else
-					i40e_do_reset_safe(pf,
-							   I40E_PF_RESET_FLAG);
-			}
+		if (vsi->type == I40E_VSI_MAIN) {
+			if (pf->flags & I40E_FLAG_TC_MQPRIO)
+				i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+			else
+				i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
 		}
 		/* now onwards for main VSI, number of queues will be value
 		 * of TC0's queue count
@@ -7912,12 +7953,20 @@
 			    vsi->seid);
 		need_reset = true;
 		goto exit;
-	} else {
-		dev_info(&vsi->back->pdev->dev,
-			 "Setup channel (id:%u) utilizing num_queues %d\n",
-			 vsi->seid, vsi->tc_config.tc_info[0].qcount);
+	} else if (enabled_tc &&
+		   (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) {
+		netdev_info(netdev,
+			    "Failed to create channel. Override queues (%u) not power of 2\n",
+			    vsi->tc_config.tc_info[0].qcount);
+		ret = -EINVAL;
+		need_reset = true;
+		goto exit;
 	}
 
+	dev_info(&vsi->back->pdev->dev,
+		 "Setup channel (id:%u) utilizing num_queues %d\n",
+		 vsi->seid, vsi->tc_config.tc_info[0].qcount);
+
 	if (pf->flags & I40E_FLAG_TC_MQPRIO) {
 		if (vsi->mqprio_qopt.max_rate[0]) {
 			u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
@@ -8482,9 +8531,8 @@
 		err = i40e_add_del_cloud_filter(vsi, filter, true);
 
 	if (err) {
-		dev_err(&pf->pdev->dev,
-			"Failed to add cloud filter, err %s\n",
-			i40e_stat_str(&pf->hw, err));
+		dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n",
+			err);
 		goto err;
 	}
 
@@ -13771,7 +13819,7 @@
 		dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
 		return -ENODEV;
 	}
-
+	set_bit(__I40E_VSI_RELEASING, vsi->state);
 	uplink_seid = vsi->uplink_seid;
 	if (vsi->type != I40E_VSI_SRIOV) {
 		if (vsi->netdev_registered) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 472f56b..80ae264 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -183,17 +183,18 @@
 /***********************misc routines*****************************/
 
 /**
- * i40e_vc_disable_vf
+ * i40e_vc_reset_vf
  * @vf: pointer to the VF info
- *
- * Disable the VF through a SW reset.
+ * @notify_vf: notify vf about reset or not
+ * Reset VF handler.
  **/
-static inline void i40e_vc_disable_vf(struct i40e_vf *vf)
+static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
 {
 	struct i40e_pf *pf = vf->pf;
 	int i;
 
-	i40e_vc_notify_vf_reset(vf);
+	if (notify_vf)
+		i40e_vc_notify_vf_reset(vf);
 
 	/* We want to ensure that an actual reset occurs initiated after this
 	 * function was called. However, we do not want to wait forever, so
@@ -211,9 +212,14 @@
 		usleep_range(10000, 20000);
 	}
 
-	dev_warn(&vf->pf->pdev->dev,
-		 "Failed to initiate reset for VF %d after 200 milliseconds\n",
-		 vf->vf_id);
+	if (notify_vf)
+		dev_warn(&vf->pf->pdev->dev,
+			 "Failed to initiate reset for VF %d after 200 milliseconds\n",
+			 vf->vf_id);
+	else
+		dev_dbg(&vf->pf->pdev->dev,
+			"Failed to initiate reset for VF %d after 200 milliseconds\n",
+			vf->vf_id);
 }
 
 /**
@@ -674,14 +680,13 @@
 				    u16 vsi_queue_id,
 				    struct virtchnl_rxq_info *info)
 {
+	u16 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
 	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_hmc_obj_rxq rx_ctx;
-	u16 pf_queue_id;
 	int ret = 0;
 
-	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
-
 	/* clear the context structure first */
 	memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
 
@@ -719,6 +724,10 @@
 	}
 	rx_ctx.rxmax = info->max_pkt_size;
 
+	/* if port VLAN is configured increase the max packet size */
+	if (vsi->info.pvid)
+		rx_ctx.rxmax += VLAN_HLEN;
+
 	/* enable 32bytes desc always */
 	rx_ctx.dsize = 1;
 
@@ -2106,20 +2115,6 @@
 }
 
 /**
- * i40e_vc_reset_vf_msg
- * @vf: pointer to the VF info
- *
- * called from the VF to reset itself,
- * unlike other virtchnl messages, PF driver
- * doesn't send the response back to the VF
- **/
-static void i40e_vc_reset_vf_msg(struct i40e_vf *vf)
-{
-	if (test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
-		i40e_reset_vf(vf, false);
-}
-
-/**
  * i40e_vc_config_promiscuous_mode_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -2217,11 +2212,12 @@
 	struct virtchnl_vsi_queue_config_info *qci =
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
-	struct i40e_pf *pf = vf->pf;
 	u16 vsi_id, vsi_queue_id = 0;
-	u16 num_qps_all = 0;
+	struct i40e_pf *pf = vf->pf;
 	i40e_status aq_ret = 0;
 	int i, j = 0, idx = 0;
+	struct i40e_vsi *vsi;
+	u16 num_qps_all = 0;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
@@ -2310,9 +2306,15 @@
 		pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
 			qci->num_queue_pairs;
 	} else {
-		for (i = 0; i < vf->num_tc; i++)
-			pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
-			       vf->ch[i].num_qps;
+		for (i = 0; i < vf->num_tc; i++) {
+			vsi = pf->vsi[vf->ch[i].vsi_idx];
+			vsi->num_queue_pairs = vf->ch[i].num_qps;
+
+			if (i40e_update_adq_vsi_queues(vsi, i)) {
+				aq_ret = I40E_ERR_CONFIG;
+				goto error_param;
+			}
+		}
 	}
 
 error_param:
@@ -2607,8 +2609,7 @@
 	} else {
 		/* successful request */
 		vf->num_req_queues = req_pairs;
-		i40e_vc_notify_vf_reset(vf);
-		i40e_reset_vf(vf, false);
+		i40e_vc_reset_vf(vf, true);
 		return 0;
 	}
 
@@ -3803,8 +3804,7 @@
 	vf->num_req_queues = 0;
 
 	/* reset the VF in order to allocate resources */
-	i40e_vc_notify_vf_reset(vf);
-	i40e_reset_vf(vf, false);
+	i40e_vc_reset_vf(vf, true);
 
 	return I40E_SUCCESS;
 
@@ -3844,8 +3844,7 @@
 	}
 
 	/* reset the VF in order to allocate resources */
-	i40e_vc_notify_vf_reset(vf);
-	i40e_reset_vf(vf, false);
+	i40e_vc_reset_vf(vf, true);
 
 	return I40E_SUCCESS;
 
@@ -3907,7 +3906,7 @@
 		i40e_vc_notify_vf_link_state(vf);
 		break;
 	case VIRTCHNL_OP_RESET_VF:
-		i40e_vc_reset_vf_msg(vf);
+		i40e_vc_reset_vf(vf, false);
 		ret = 0;
 		break;
 	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
@@ -4161,7 +4160,7 @@
 	/* Force the VF interface down so it has to bring up with new MAC
 	 * address
 	 */
-	i40e_vc_disable_vf(vf);
+	i40e_vc_reset_vf(vf, true);
 	dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n");
 
 error_param:
@@ -4170,34 +4169,6 @@
 }
 
 /**
- * i40e_vsi_has_vlans - True if VSI has configured VLANs
- * @vsi: pointer to the vsi
- *
- * Check if a VSI has configured any VLANs. False if we have a port VLAN or if
- * we have no configured VLANs. Do not call while holding the
- * mac_filter_hash_lock.
- */
-static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi)
-{
-	bool have_vlans;
-
-	/* If we have a port VLAN, then the VSI cannot have any VLANs
-	 * configured, as all MAC/VLAN filters will be assigned to the PVID.
-	 */
-	if (vsi->info.pvid)
-		return false;
-
-	/* Since we don't have a PVID, we know that if the device is in VLAN
-	 * mode it must be because of a VLAN filter configured on this VSI.
-	 */
-	spin_lock_bh(&vsi->mac_filter_hash_lock);
-	have_vlans = i40e_is_vsi_in_vlan(vsi);
-	spin_unlock_bh(&vsi->mac_filter_hash_lock);
-
-	return have_vlans;
-}
-
-/**
  * i40e_ndo_set_vf_port_vlan
  * @netdev: network interface device structure
  * @vf_id: VF identifier
@@ -4253,19 +4224,9 @@
 		/* duplicate request, so just return success */
 		goto error_pvid;
 
-	if (i40e_vsi_has_vlans(vsi)) {
-		dev_err(&pf->pdev->dev,
-			"VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n",
-			vf_id);
-		/* Administrator Error - knock the VF offline until he does
-		 * the right thing by reconfiguring his network correctly
-		 * and then reloading the VF driver.
-		 */
-		i40e_vc_disable_vf(vf);
-		/* During reset the VF got a new VSI, so refresh the pointer. */
-		vsi = pf->vsi[vf->lan_vsi_idx];
-	}
-
+	i40e_vc_reset_vf(vf, true);
+	/* During reset the VF got a new VSI, so refresh a pointer. */
+	vsi = pf->vsi[vf->lan_vsi_idx];
 	/* Locked once because multiple functions below iterate list */
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
@@ -4641,7 +4602,7 @@
 		goto out;
 
 	vf->trusted = setting;
-	i40e_vc_disable_vf(vf);
+	i40e_vc_reset_vf(vf, true);
 	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
 		 vf_id, setting ? "" : "un");
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index e6e7c1d..3789269 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -39,6 +39,7 @@
 #include "iavf_txrx.h"
 #include "iavf_fdir.h"
 #include "iavf_adv_rss.h"
+#include <linux/bitmap.h>
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
 #define PFX "iavf: "
@@ -304,6 +305,7 @@
 #define IAVF_FLAG_AQ_DEL_FDIR_FILTER		BIT(26)
 #define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG		BIT(27)
 #define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG		BIT(28)
+#define IAVF_FLAG_AQ_REQUEST_STATS		BIT(29)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -443,6 +445,7 @@
 void iavf_down(struct iavf_adapter *adapter);
 int iavf_process_config(struct iavf_adapter *adapter);
 void iavf_schedule_reset(struct iavf_adapter *adapter);
+void iavf_schedule_request_stats(struct iavf_adapter *adapter);
 void iavf_reset(struct iavf_adapter *adapter);
 void iavf_set_ethtool_ops(struct net_device *netdev);
 void iavf_update_stats(struct iavf_adapter *adapter);
@@ -500,4 +503,5 @@
 void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
 struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
 					const u8 *macaddr);
+int iavf_lock_timeout(struct mutex *lock, unsigned int msecs);
 #endif /* _IAVF_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 5a359a0..0cecaff 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -354,6 +354,9 @@
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 	unsigned int i;
 
+	/* Explicitly request stats refresh */
+	iavf_schedule_request_stats(adapter);
+
 	iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
 
 	rcu_read_lock();
@@ -723,12 +726,31 @@
  *
  * Change the ITR settings for a specific queue.
  **/
-static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
-				   struct ethtool_coalesce *ec, int queue)
+static int iavf_set_itr_per_queue(struct iavf_adapter *adapter,
+				  struct ethtool_coalesce *ec, int queue)
 {
 	struct iavf_ring *rx_ring = &adapter->rx_rings[queue];
 	struct iavf_ring *tx_ring = &adapter->tx_rings[queue];
 	struct iavf_q_vector *q_vector;
+	u16 itr_setting;
+
+	itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+	if (ec->rx_coalesce_usecs != itr_setting &&
+	    ec->use_adaptive_rx_coalesce) {
+		netif_info(adapter, drv, adapter->netdev,
+			   "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n");
+		return -EINVAL;
+	}
+
+	itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+	if (ec->tx_coalesce_usecs != itr_setting &&
+	    ec->use_adaptive_tx_coalesce) {
+		netif_info(adapter, drv, adapter->netdev,
+			   "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n");
+		return -EINVAL;
+	}
 
 	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
 	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
@@ -751,6 +773,7 @@
 	 * the Tx and Rx ITR values based on the values we have entered
 	 * into the q_vector, no need to write the values now.
 	 */
+	return 0;
 }
 
 /**
@@ -792,9 +815,11 @@
 	 */
 	if (queue < 0) {
 		for (i = 0; i < adapter->num_active_queues; i++)
-			iavf_set_itr_per_queue(adapter, ec, i);
+			if (iavf_set_itr_per_queue(adapter, ec, i))
+				return -EINVAL;
 	} else if (queue < adapter->num_active_queues) {
-		iavf_set_itr_per_queue(adapter, ec, queue);
+		if (iavf_set_itr_per_queue(adapter, ec, queue))
+			return -EINVAL;
 	} else {
 		netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
 			   adapter->num_active_queues - 1);
@@ -1776,6 +1801,7 @@
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 	u32 num_req = ch->combined_count;
+	int i;
 
 	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
 	    adapter->num_tc) {
@@ -1786,7 +1812,7 @@
 	/* All of these should have already been checked by ethtool before this
 	 * even gets to us, but just to be sure.
 	 */
-	if (num_req > adapter->vsi_res->num_queue_pairs)
+	if (num_req == 0 || num_req > adapter->vsi_res->num_queue_pairs)
 		return -EINVAL;
 
 	if (num_req == adapter->num_active_queues)
@@ -1798,6 +1824,20 @@
 	adapter->num_req_queues = num_req;
 	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
 	iavf_schedule_reset(adapter);
+
+	/* wait for the reset is done */
+	for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
+		msleep(IAVF_RESET_WAIT_MS);
+		if (adapter->flags & IAVF_FLAG_RESET_PENDING)
+			continue;
+		break;
+	}
+	if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+		adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+		adapter->num_active_queues = num_req;
+		return -EOPNOTSUPP;
+	}
+
 	return 0;
 }
 
@@ -1844,14 +1884,13 @@
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
-	if (!indir)
-		return 0;
+	if (key)
+		memcpy(key, adapter->rss_key, adapter->rss_key_size);
 
-	memcpy(key, adapter->rss_key, adapter->rss_key_size);
-
-	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
-	for (i = 0; i < adapter->rss_lut_size; i++)
-		indir[i] = (u32)adapter->rss_lut[i];
+	if (indir)
+		/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+		for (i = 0; i < adapter->rss_lut_size; i++)
+			indir[i] = (u32)adapter->rss_lut[i];
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 847d67e..14934a7 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -147,7 +147,7 @@
  *
  * Returns 0 on success, negative on failure
  **/
-static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
 {
 	unsigned int wait, delay = 10;
 
@@ -175,6 +175,19 @@
 }
 
 /**
+ * iavf_schedule_request_stats - Set the flags and schedule statistics request
+ * @adapter: board private structure
+ *
+ * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly
+ * request and refresh ethtool stats
+ **/
+void iavf_schedule_request_stats(struct iavf_adapter *adapter)
+{
+	adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS;
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+}
+
+/**
  * iavf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  * @txqueue: queue number that is timing out
@@ -697,6 +710,21 @@
 }
 
 /**
+ * iavf_restore_filters
+ * @adapter: board private structure
+ *
+ * Restore existing non MAC filters when VF netdev comes back up
+ **/
+static void iavf_restore_filters(struct iavf_adapter *adapter)
+{
+	u16 vid;
+
+	/* re-add all VLAN filters */
+	for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID)
+		iavf_add_vlan(adapter, vid);
+}
+
+/**
  * iavf_vlan_rx_add_vid - Add a VLAN filter to a device
  * @netdev: network device struct
  * @proto: unused protocol data
@@ -709,8 +737,11 @@
 
 	if (!VLAN_ALLOWED(adapter))
 		return -EIO;
+
 	if (iavf_add_vlan(adapter, vid) == NULL)
 		return -ENOMEM;
+
+	set_bit(vid, adapter->vsi.active_vlans);
 	return 0;
 }
 
@@ -725,11 +756,10 @@
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 
-	if (VLAN_ALLOWED(adapter)) {
-		iavf_del_vlan(adapter, vid);
-		return 0;
-	}
-	return -EIO;
+	iavf_del_vlan(adapter, vid);
+	clear_bit(vid, adapter->vsi.active_vlans);
+
+	return 0;
 }
 
 /**
@@ -1639,8 +1669,7 @@
 		iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC);
 		return 0;
 	}
-
-	if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) &&
+	if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) ||
 	    (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) {
 		iavf_set_promiscuous(adapter, 0);
 		return 0;
@@ -1688,6 +1717,11 @@
 		iavf_del_adv_rss_cfg(adapter);
 		return 0;
 	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) {
+		iavf_request_stats(adapter);
+		return 0;
+	}
+
 	return -EAGAIN;
 }
 
@@ -2123,8 +2157,8 @@
 
 	iavf_free_misc_irq(adapter);
 	iavf_reset_interrupt_capability(adapter);
-	iavf_free_queues(adapter);
 	iavf_free_q_vectors(adapter);
+	iavf_free_queues(adapter);
 	memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
 	iavf_shutdown_adminq(&adapter->hw);
 	adapter->netdev->flags &= ~IFF_UP;
@@ -2152,7 +2186,6 @@
 	struct net_device *netdev = adapter->netdev;
 	struct iavf_hw *hw = &adapter->hw;
 	struct iavf_mac_filter *f, *ftmp;
-	struct iavf_vlan_filter *vlf;
 	struct iavf_cloud_filter *cf;
 	u32 reg_val;
 	int i = 0, err;
@@ -2233,6 +2266,7 @@
 		   (adapter->state == __IAVF_RESETTING));
 
 	if (running) {
+		netdev->flags &= ~IFF_UP;
 		netif_carrier_off(netdev);
 		netif_tx_stop_all_queues(netdev);
 		adapter->link_up = false;
@@ -2292,11 +2326,6 @@
 	list_for_each_entry(f, &adapter->mac_filter_list, list) {
 		f->add = true;
 	}
-	/* re-add all VLAN filters */
-	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
-		vlf->add = true;
-	}
-
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
 	/* check if TCs are running and re-add all cloud filters */
@@ -2310,7 +2339,6 @@
 	spin_unlock_bh(&adapter->cloud_filter_list_lock);
 
 	adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
-	adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
 	adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
 	iavf_misc_irq_enable(adapter);
 
@@ -2344,7 +2372,7 @@
 		 * to __IAVF_RUNNING
 		 */
 		iavf_up_complete(adapter);
-
+		netdev->flags |= IFF_UP;
 		iavf_irq_enable(adapter, true);
 	} else {
 		iavf_change_state(adapter, __IAVF_DOWN);
@@ -2357,8 +2385,10 @@
 reset_err:
 	mutex_unlock(&adapter->client_lock);
 	mutex_unlock(&adapter->crit_lock);
-	if (running)
+	if (running) {
 		iavf_change_state(adapter, __IAVF_RUNNING);
+		netdev->flags |= IFF_UP;
+	}
 	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
 	iavf_close(netdev);
 }
@@ -2410,7 +2440,7 @@
 
 	/* check for error indications */
 	val = rd32(hw, hw->aq.arq.len);
-	if (val == 0xdeadbeef) /* indicates device in reset */
+	if (val == 0xdeadbeef || val == 0xffffffff) /* device in reset */
 		goto freedom;
 	oldval = val;
 	if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) {
@@ -3095,8 +3125,10 @@
 		return -ENOMEM;
 
 	while (!mutex_trylock(&adapter->crit_lock)) {
-		if (--count == 0)
-			goto err;
+		if (--count == 0) {
+			kfree(filter);
+			return err;
+		}
 		udelay(1);
 	}
 
@@ -3107,11 +3139,11 @@
 	/* start out with flow type and eth type IPv4 to begin with */
 	filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
 	err = iavf_parse_cls_flower(adapter, cls_flower, filter);
-	if (err < 0)
+	if (err)
 		goto err;
 
 	err = iavf_handle_tclass(adapter, tc, filter);
-	if (err < 0)
+	if (err)
 		goto err;
 
 	/* add filter to the list */
@@ -3308,6 +3340,9 @@
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	/* Restore VLAN filters that were removed with IFF_DOWN */
+	iavf_restore_filters(adapter);
+
 	iavf_configure(adapter);
 
 	iavf_up_complete(adapter);
@@ -3415,11 +3450,16 @@
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 
-	/* Don't allow changing VLAN_RX flag when adapter is not capable
-	 * of VLAN offload
+	/* Don't allow enabling VLAN features when adapter is not capable
+	 * of VLAN offload/filtering
 	 */
 	if (!VLAN_ALLOWED(adapter)) {
-		if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX)
+		netdev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
+					 NETIF_F_HW_VLAN_CTAG_TX |
+					 NETIF_F_HW_VLAN_CTAG_FILTER);
+		if (features & (NETIF_F_HW_VLAN_CTAG_RX |
+				NETIF_F_HW_VLAN_CTAG_TX |
+				NETIF_F_HW_VLAN_CTAG_FILTER))
 			return -EINVAL;
 	} else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
 		if (features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -3503,7 +3543,8 @@
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 
-	if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+	if (adapter->vf_res &&
+	    !(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
 		features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
 			      NETIF_F_HW_VLAN_CTAG_RX |
 			      NETIF_F_HW_VLAN_CTAG_FILTER);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 8c3f0f7..d60bf7c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -607,7 +607,7 @@
 		if (f->add)
 			count++;
 	}
-	if (!count) {
+	if (!count || !VLAN_ALLOWED(adapter)) {
 		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -673,9 +673,19 @@
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
-	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
-		if (f->remove)
+	list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+		/* since VLAN capabilities are not allowed, we dont want to send
+		 * a VLAN delete request because it will most likely fail and
+		 * create unnecessary errors/noise, so just free the VLAN
+		 * filters marked for removal to enable bailing out before
+		 * sending a virtchnl message
+		 */
+		if (f->remove && !VLAN_ALLOWED(adapter)) {
+			list_del(&f->list);
+			kfree(f);
+		} else if (f->remove) {
 			count++;
+		}
 	}
 	if (!count) {
 		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
@@ -784,6 +794,8 @@
 		/* no error message, this isn't crucial */
 		return;
 	}
+
+	adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_STATS;
 	adapter->current_op = VIRTCHNL_OP_GET_STATS;
 	vqs.vsi_id = adapter->vsi_res->vsi_id;
 	/* queue maps are ignored for this message - only the vsi is used */
@@ -1722,8 +1734,37 @@
 		}
 		spin_lock_bh(&adapter->mac_vlan_list_lock);
 		iavf_add_filter(adapter, adapter->hw.mac.addr);
+
+		if (VLAN_ALLOWED(adapter)) {
+			if (!list_empty(&adapter->vlan_filter_list)) {
+				struct iavf_vlan_filter *vlf;
+
+				/* re-add all VLAN filters over virtchnl */
+				list_for_each_entry(vlf,
+						    &adapter->vlan_filter_list,
+						    list)
+					vlf->add = true;
+
+				adapter->aq_required |=
+					IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+			}
+		}
+
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		iavf_process_config(adapter);
+
+		/* unlock crit_lock before acquiring rtnl_lock as other
+		 * processes holding rtnl_lock could be waiting for the same
+		 * crit_lock
+		 */
+		mutex_unlock(&adapter->crit_lock);
+		rtnl_lock();
+		netdev_update_features(adapter->netdev);
+		rtnl_unlock();
+		if (iavf_lock_timeout(&adapter->crit_lock, 10000))
+			dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n",
+				 __FUNCTION__);
+
 		}
 		break;
 	case VIRTCHNL_OP_ENABLE_QUEUES:
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 40562600..09a3297 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -89,8 +89,13 @@
 	if (!vsi->rx_rings)
 		goto err_rings;
 
-	/* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
-	vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq),
+	/* txq_map needs to have enough space to track both Tx (stack) rings
+	 * and XDP rings; at this point vsi->num_xdp_txq might not be set,
+	 * so use num_possible_cpus() as we want to always provide XDP ring
+	 * per CPU, regardless of queue count settings from user that might
+	 * have come from ethtool's set_channels() callback;
+	 */
+	vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
 				    sizeof(*vsi->txq_map), GFP_KERNEL);
 
 	if (!vsi->txq_map)
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f099797..4d1fc48 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2609,7 +2609,18 @@
 			ice_stat_str(status));
 		goto clear_xdp_rings;
 	}
-	ice_vsi_assign_bpf_prog(vsi, prog);
+
+	/* assign the prog only when it's not already present on VSI;
+	 * this flow is a subject of both ethtool -L and ndo_bpf flows;
+	 * VSI rebuild that happens under ethtool -L can expose us to
+	 * the bpf_prog refcount issues as we would be swapping same
+	 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
+	 * on it as it would be treated as an 'old_prog'; for ndo_bpf
+	 * this is not harmful as dev_xdp_install bumps the refcount
+	 * before calling the op exposed by the driver;
+	 */
+	if (!ice_is_xdp_ena_vsi(vsi))
+		ice_vsi_assign_bpf_prog(vsi, prog);
 
 	return 0;
 clear_xdp_rings:
@@ -2785,6 +2796,11 @@
 		if (xdp_ring_err)
 			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
 	} else {
+		/* safe to call even when prog == vsi->xdp_prog as
+		 * dev_xdp_install in net/core/dev.c incremented prog's
+		 * refcount so corresponding bpf_prog_put won't cause
+		 * underflow
+		 */
 		ice_vsi_assign_bpf_prog(vsi, prog);
 	}
 
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 836be0d..fd54d3e 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8026,7 +8026,7 @@
 	if (likely(napi_complete_done(napi, work_done)))
 		igb_ring_irq_enable(q_vector);
 
-	return min(work_done, budget - 1);
+	return work_done;
 }
 
 /**
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 6433c90..072391c 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -25,6 +25,7 @@
 #include <linux/io.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
+#include <linux/property.h>
 
 #include <asm/checksum.h>
 
@@ -239,6 +240,7 @@
 {
 	struct ltq_etop_priv *priv = netdev_priv(dev);
 	int i;
+	int err;
 
 	ltq_pmu_enable(PMU_PPE);
 
@@ -273,7 +275,13 @@
 
 		if (IS_TX(i)) {
 			ltq_dma_alloc_tx(&ch->dma);
-			request_irq(irq, ltq_etop_dma_irq, 0, "etop_tx", priv);
+			err = request_irq(irq, ltq_etop_dma_irq, 0, "etop_tx", priv);
+			if (err) {
+				netdev_err(dev,
+					   "Unable to get Tx DMA IRQ %d\n",
+					   irq);
+				return err;
+			}
 		} else if (IS_RX(i)) {
 			ltq_dma_alloc_rx(&ch->dma);
 			for (ch->dma.desc = 0; ch->dma.desc < LTQ_DESC_NUM;
@@ -281,7 +289,13 @@
 				if (ltq_etop_alloc_skb(ch))
 					return -ENOMEM;
 			ch->dma.desc = 0;
-			request_irq(irq, ltq_etop_dma_irq, 0, "etop_rx", priv);
+			err = request_irq(irq, ltq_etop_dma_irq, 0, "etop_rx", priv);
+			if (err) {
+				netdev_err(dev,
+					   "Unable to get Rx DMA IRQ %d\n",
+					   irq);
+				return err;
+			}
 		}
 		ch->dma.irq = irq;
 	}
@@ -726,7 +740,7 @@
 	},
 };
 
-int __init
+static int __init
 init_ltq_etop(void)
 {
 	int ret = platform_driver_probe(&ltq_mii_driver, ltq_etop_probe);
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 62a97c4..ef87897 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -429,12 +429,14 @@
 };
 MODULE_DEVICE_TABLE(of, orion_mdio_match);
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id orion_mdio_acpi_match[] = {
 	{ "MRVL0100", BUS_TYPE_SMI },
 	{ "MRVL0101", BUS_TYPE_XSMI },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match);
+#endif
 
 static struct platform_driver orion_mdio_driver = {
 	.probe = orion_mdio_probe,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 2b18d89..ce486e1 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -5017,11 +5017,13 @@
 		mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
 	}
 
+	if (port->xdp_prog && mtu > MVPP2_MAX_RX_BUF_SIZE) {
+		netdev_err(dev, "Illegal MTU value %d (> %d) for XDP mode\n",
+			   mtu, (int)MVPP2_MAX_RX_BUF_SIZE);
+		return -EINVAL;
+	}
+
 	if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
-		if (port->xdp_prog) {
-			netdev_err(dev, "Jumbo frames are not supported with XDP\n");
-			return -EINVAL;
-		}
 		if (priv->percpu_pools) {
 			netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
 			mvpp2_bm_switch_buffers(priv, false);
@@ -5307,8 +5309,8 @@
 	bool running = netif_running(port->dev);
 	bool reset = !prog != !port->xdp_prog;
 
-	if (port->dev->mtu > ETH_DATA_LEN) {
-		NL_SET_ERR_MSG_MOD(bpf->extack, "XDP is not supported with jumbo frames enabled");
+	if (port->dev->mtu > MVPP2_MAX_RX_BUF_SIZE) {
+		NL_SET_ERR_MSG_MOD(bpf->extack, "MTU too large for XDP");
 		return -EOPNOTSUPP;
 	}
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index c7fd466..a09a507 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -236,10 +236,11 @@
 	u64 lmt_addr, val, tbl_base;
 	int pf, vf, num_vfs, hw_vfs;
 	void __iomem *lmt_map_base;
-	int index = 0, off = 0;
-	int bytes_not_copied;
 	int buf_size = 10240;
+	size_t off = 0;
+	int index = 0;
 	char *buf;
+	int ret;
 
 	/* don't allow partial reads */
 	if (*ppos != 0)
@@ -303,15 +304,17 @@
 	}
 	off +=	scnprintf(&buf[off], buf_size - 1 - off, "\n");
 
-	bytes_not_copied = copy_to_user(buffer, buf, off);
+	ret = min(off, count);
+	if (copy_to_user(buffer, buf, ret))
+		ret = -EFAULT;
 	kfree(buf);
 
 	iounmap(lmt_map_base);
-	if (bytes_not_copied)
-		return -EFAULT;
+	if (ret < 0)
+		return ret;
 
-	*ppos = off;
-	return off;
+	*ppos = ret;
+	return ret;
 }
 
 RVU_DEBUG_FOPS(lmtst_map_table, lmtst_map_table_display, NULL);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 3ce6ccd0f..b4599fe 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -497,8 +497,8 @@
 
 	br_port = prestera_bridge_port_add(bridge, port->dev);
 	if (IS_ERR(br_port)) {
-		err = PTR_ERR(br_port);
-		goto err_brport_create;
+		prestera_bridge_put(bridge);
+		return PTR_ERR(br_port);
 	}
 
 	err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
@@ -519,8 +519,6 @@
 	switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
 err_switchdev_offload:
 	prestera_bridge_port_put(br_port);
-err_brport_create:
-	prestera_bridge_put(bridge);
 	return err;
 }
 
@@ -1124,7 +1122,7 @@
 						     prestera_port_obj_attr_set);
 		break;
 	default:
-		err = -EOPNOTSUPP;
+		return NOTIFY_DONE;
 	}
 
 	return notifier_from_errno(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index f71ec4d..8eaa24d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -339,6 +339,8 @@
 	case MLX5_CMD_OP_PAGE_FAULT_RESUME:
 	case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
 	case MLX5_CMD_OP_DEALLOC_SF:
+	case MLX5_CMD_OP_DESTROY_UCTX:
+	case MLX5_CMD_OP_DESTROY_UMEM:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -464,9 +466,7 @@
 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_CREATE_UCTX:
-	case MLX5_CMD_OP_DESTROY_UCTX:
 	case MLX5_CMD_OP_CREATE_UMEM:
-	case MLX5_CMD_OP_DESTROY_UMEM:
 	case MLX5_CMD_OP_ALLOC_MEMIC:
 	case MLX5_CMD_OP_MODIFY_XRQ:
 	case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 02e77ff..5371ad0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -164,13 +164,14 @@
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
 	MLX5_SET(destroy_cq_in, in, uid, cq->uid);
 	err = mlx5_cmd_exec_in(dev, destroy_cq, in);
+	if (err)
+		return err;
 
 	synchronize_irq(cq->irqn);
-
 	mlx5_cq_put(cq);
 	wait_for_completion(&cq->free);
 
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL(mlx5_core_destroy_cq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 07c8d981..10d1950 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -507,6 +507,8 @@
 	if (!mlx5_debugfs_root)
 		return;
 
-	if (cq->dbg)
+	if (cq->dbg) {
 		rem_res_tree(cq->dbg);
+		cq->dbg = NULL;
+	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index c1c6e74..2445e2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -1356,9 +1356,13 @@
 int
 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
 			struct mlx5_flow_attr *attr,
+			struct mlx5e_tc_mod_hdr_acts *mod_acts,
 			const struct flow_action_entry *act,
 			struct netlink_ext_ack *extack)
 {
+	bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+	int err;
+
 	if (!priv) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "offload of ct action isn't available");
@@ -1369,6 +1373,17 @@
 	attr->ct_attr.ct_action = act->ct.action;
 	attr->ct_attr.nf_ft = act->ct.flow_table;
 
+	if (!clear_action)
+		goto out;
+
+	err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear");
+		return err;
+	}
+	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+out:
 	return 0;
 }
 
@@ -1898,23 +1913,16 @@
 
 	memcpy(pre_ct_attr, attr, attr_sz);
 
-	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
-	if (err) {
-		ct_dbg("Failed to set register for ct clear");
-		goto err_set_registers;
-	}
-
 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
 					   mod_acts->num_actions,
 					   mod_acts->actions);
 	if (IS_ERR(mod_hdr)) {
 		err = PTR_ERR(mod_hdr);
 		ct_dbg("Failed to add create ct clear mod hdr");
-		goto err_set_registers;
+		goto err_mod_hdr;
 	}
 
 	pre_ct_attr->modify_hdr = mod_hdr;
-	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 
 	rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
 	if (IS_ERR(rule)) {
@@ -1930,7 +1938,7 @@
 
 err_insert:
 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
-err_set_registers:
+err_mod_hdr:
 	netdev_warn(priv->netdev,
 		    "Failed to offload ct clear flow, err %d\n", err);
 	kfree(pre_ct_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 363329f..99662af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -110,6 +110,7 @@
 int
 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
 			struct mlx5_flow_attr *attr,
+			struct mlx5e_tc_mod_hdr_acts *mod_acts,
 			const struct flow_action_entry *act,
 			struct netlink_ext_ack *extack);
 
@@ -172,6 +173,7 @@
 static inline int
 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
 			struct mlx5_flow_attr *attr,
+			struct mlx5e_tc_mod_hdr_acts *mod_acts,
 			const struct flow_action_entry *act,
 			struct netlink_ext_ack *extack)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index 8f64f2c..b689701 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -102,6 +102,7 @@
 	refcount_t refcnt;
 	struct rcu_head rcu_head;
 	struct completion init_done;
+	struct completion del_hw_done;
 	int tunnel_id; /* the mapped tunnel id of this flow */
 	struct mlx5_flow_attr *attr;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 660cca7..042b1ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -245,8 +245,14 @@
 				struct list_head *flow_list,
 				int index)
 {
-	if (IS_ERR(mlx5e_flow_get(flow)))
+	if (IS_ERR(mlx5e_flow_get(flow))) {
+		/* Flow is being deleted concurrently. Wait for it to be
+		 * unoffloaded from hardware, otherwise deleting encap will
+		 * fail.
+		 */
+		wait_for_completion(&flow->del_hw_done);
 		return;
+	}
 	wait_for_completion(&flow->init_done);
 
 	flow->tmp_entry_index = index;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 62abce0..a2a9f685 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -55,6 +55,7 @@
 	DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS);
 
 	/* resync */
+	spinlock_t lock; /* protects resync fields */
 	struct mlx5e_ktls_rx_resync_ctx resync;
 	struct list_head list;
 };
@@ -386,14 +387,18 @@
 	struct mlx5e_icosq *sq;
 	bool trigger_poll;
 
-	memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
-
 	sq = &c->async_icosq;
 	ktls_resync = sq->ktls_resync;
+	trigger_poll = false;
 
 	spin_lock_bh(&ktls_resync->lock);
-	list_add_tail(&priv_rx->list, &ktls_resync->list);
-	trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+	spin_lock_bh(&priv_rx->lock);
+	memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
+	if (list_empty(&priv_rx->list)) {
+		list_add_tail(&priv_rx->list, &ktls_resync->list);
+		trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+	}
+	spin_unlock_bh(&priv_rx->lock);
 	spin_unlock_bh(&ktls_resync->lock);
 
 	if (!trigger_poll)
@@ -617,6 +622,8 @@
 	if (err)
 		goto err_create_key;
 
+	INIT_LIST_HEAD(&priv_rx->list);
+	spin_lock_init(&priv_rx->lock);
 	priv_rx->crypto_info  =
 		*(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
 
@@ -730,10 +737,14 @@
 		priv_rx = list_first_entry(&local_list,
 					   struct mlx5e_ktls_offload_context_rx,
 					   list);
+		spin_lock(&priv_rx->lock);
 		cseg = post_static_params(sq, priv_rx);
-		if (IS_ERR(cseg))
+		if (IS_ERR(cseg)) {
+			spin_unlock(&priv_rx->lock);
 			break;
-		list_del(&priv_rx->list);
+		}
+		list_del_init(&priv_rx->list);
+		spin_unlock(&priv_rx->lock);
 		db_cseg = cseg;
 	}
 	if (db_cseg)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 835caa1c..3d45f4a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1600,6 +1600,7 @@
 		else
 			mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
 	}
+	complete_all(&flow->del_hw_done);
 
 	if (mlx5_flow_has_geneve_opt(flow))
 		mlx5_geneve_tlv_option_del(priv->mdev->geneve);
@@ -3607,7 +3608,9 @@
 			attr->dest_chain = act->chain_index;
 			break;
 		case FLOW_ACTION_CT:
-			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
+			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
+						      &parse_attr->mod_hdr_acts,
+						      act, extack);
 			if (err)
 				return err;
 
@@ -4276,7 +4279,9 @@
 				NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
 				return -EOPNOTSUPP;
 			}
-			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
+			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
+						      &parse_attr->mod_hdr_acts,
+						      act, extack);
 			if (err)
 				return err;
 
@@ -4465,6 +4470,7 @@
 	INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
 	refcount_set(&flow->refcnt, 1);
 	init_completion(&flow->init_done);
+	init_completion(&flow->del_hw_done);
 
 	*__flow = flow;
 	*__parse_attr = parse_attr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ec136b4..51a8cec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1305,12 +1305,17 @@
  */
 int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 {
+	bool toggle_lag;
 	int ret;
 
 	if (!mlx5_esw_allowed(esw))
 		return 0;
 
-	mlx5_lag_disable_change(esw->dev);
+	toggle_lag = esw->mode == MLX5_ESWITCH_NONE;
+
+	if (toggle_lag)
+		mlx5_lag_disable_change(esw->dev);
+
 	down_write(&esw->mode_lock);
 	if (esw->mode == MLX5_ESWITCH_NONE) {
 		ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1324,7 +1329,10 @@
 			esw->esw_funcs.num_vfs = num_vfs;
 	}
 	up_write(&esw->mode_lock);
-	mlx5_lag_enable_change(esw->dev);
+
+	if (toggle_lag)
+		mlx5_lag_enable_change(esw->dev);
+
 	return ret;
 }
 
@@ -1572,6 +1580,11 @@
 	esw->enabled_vports = 0;
 	esw->mode = MLX5_ESWITCH_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
+	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+	else
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 
 	dev->priv.eswitch = esw;
 	BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
@@ -1934,7 +1947,7 @@
 	return err;
 }
 
-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
 {
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 
@@ -1948,7 +1961,7 @@
 	struct mlx5_eswitch *esw;
 
 	esw = dev->priv.eswitch;
-	return mlx5_esw_allowed(esw) ? esw->offloads.encap :
+	return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS)  ? esw->offloads.encap :
 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f4eaa58..a464556 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3183,12 +3183,6 @@
 	u64 mapping_id;
 	int err;
 
-	if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
-	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
-		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
-	else
-		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-
 	mutex_init(&esw->offloads.termtbl_mutex);
 	mlx5_rdma_enable_roce(esw->dev);
 
@@ -3286,7 +3280,6 @@
 	esw_offloads_metadata_uninit(esw);
 	mlx5_rdma_disable_roce(esw->dev);
 	mutex_destroy(&esw->offloads.termtbl_mutex);
-	esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 
 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
@@ -3630,7 +3623,7 @@
 	*encap = esw->offloads.encap;
 unlock:
 	up_write(&esw->mode_lock);
-	return 0;
+	return err;
 }
 
 static bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 31c99d5..7e0e04c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -40,7 +40,7 @@
 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
 /* Max number of counters to query in bulk read is 32K */
 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
-#define MLX5_SF_NUM_COUNTERS_BULK 6
+#define MLX5_SF_NUM_COUNTERS_BULK 8
 #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
 #define MLX5_FC_POOL_USED_BUFF_RATIO 10
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 48d2ea6..4ddf6b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -615,6 +615,7 @@
 	bool is_bonded, is_in_lag, mode_supported;
 	int bond_status = 0;
 	int num_slaves = 0;
+	int changed = 0;
 	int idx;
 
 	if (!netif_is_lag_master(upper))
@@ -653,27 +654,27 @@
 	 */
 	is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
 
-	if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
-		NL_SET_ERR_MSG_MOD(info->info.extack,
-				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
-		return 0;
-	}
-
 	/* Lag mode must be activebackup or hash. */
 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
 
-	if (is_in_lag && !mode_supported)
-		NL_SET_ERR_MSG_MOD(info->info.extack,
-				   "Can't activate LAG offload, TX type isn't supported");
-
 	is_bonded = is_in_lag && mode_supported;
 	if (tracker->is_bonded != is_bonded) {
 		tracker->is_bonded = is_bonded;
-		return 1;
+		changed = 1;
 	}
 
-	return 0;
+	if (!is_in_lag)
+		return changed;
+
+	if (!mlx5_lag_is_ready(ldev))
+		NL_SET_ERR_MSG_MOD(info->info.extack,
+				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
+	else if (!mode_supported)
+		NL_SET_ERR_MSG_MOD(info->info.extack,
+				   "Can't activate LAG offload, TX type isn't supported");
+
+	return changed;
 }
 
 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
@@ -716,9 +717,6 @@
 
 	ldev    = container_of(this, struct mlx5_lag, nb);
 
-	if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
-		return NOTIFY_DONE;
-
 	tracker = ldev->tracker;
 
 	switch (event) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 49089cb..8cbd36c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -135,25 +135,14 @@
 
 static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
 				 u16 vport_number,
+				 bool other_vport,
 				 struct mlx5dr_cmd_vport_cap *vport_caps)
 {
-	u16 cmd_vport = vport_number;
-	bool other_vport = true;
 	int ret;
 
-	if (vport_number == MLX5_VPORT_UPLINK) {
-		dr_domain_fill_uplink_caps(dmn, vport_caps);
-		return 0;
-	}
-
-	if (dmn->info.caps.is_ecpf && vport_number == MLX5_VPORT_ECPF) {
-		other_vport = false;
-		cmd_vport = 0;
-	}
-
 	ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev,
 						 other_vport,
-						 cmd_vport,
+						 vport_number,
 						 &vport_caps->icm_address_rx,
 						 &vport_caps->icm_address_tx);
 	if (ret)
@@ -161,7 +150,7 @@
 
 	ret = mlx5dr_cmd_query_gvmi(dmn->mdev,
 				    other_vport,
-				    cmd_vport,
+				    vport_number,
 				    &vport_caps->vport_gvmi);
 	if (ret)
 		return ret;
@@ -176,9 +165,15 @@
 {
 	return dr_domain_query_vport(dmn,
 				     dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0,
+				     false,
 				     &dmn->info.caps.vports.esw_manager_caps);
 }
 
+static void dr_domain_query_uplink(struct mlx5dr_domain *dmn)
+{
+	dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps);
+}
+
 static struct mlx5dr_cmd_vport_cap *
 dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
 {
@@ -190,7 +185,7 @@
 	if (!vport_caps)
 		return NULL;
 
-	ret = dr_domain_query_vport(dmn, vport, vport_caps);
+	ret = dr_domain_query_vport(dmn, vport, true, vport_caps);
 	if (ret) {
 		kvfree(vport_caps);
 		return NULL;
@@ -207,16 +202,26 @@
 	return vport_caps;
 }
 
+static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport)
+{
+	struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+	return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) ||
+	       (!caps->is_ecpf && vport == 0);
+}
+
 struct mlx5dr_cmd_vport_cap *
 mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
 {
 	struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
 	struct mlx5dr_cmd_vport_cap *vport_caps;
 
-	if ((caps->is_ecpf && vport == MLX5_VPORT_ECPF) ||
-	    (!caps->is_ecpf && vport == 0))
+	if (dr_domain_is_esw_mgr_vport(dmn, vport))
 		return &caps->vports.esw_manager_caps;
 
+	if (vport == MLX5_VPORT_UPLINK)
+		return &caps->vports.uplink_caps;
+
 vport_load:
 	vport_caps = xa_load(&caps->vports.vports_caps_xa, vport);
 	if (vport_caps)
@@ -241,17 +246,6 @@
 	}
 }
 
-static int dr_domain_query_uplink(struct mlx5dr_domain *dmn)
-{
-	struct mlx5dr_cmd_vport_cap *vport_caps;
-
-	vport_caps = mlx5dr_domain_get_vport_cap(dmn, MLX5_VPORT_UPLINK);
-	if (!vport_caps)
-		return -EINVAL;
-
-	return 0;
-}
-
 static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
 				    struct mlx5dr_domain *dmn)
 {
@@ -281,11 +275,7 @@
 		goto free_vports_caps_xa;
 	}
 
-	ret = dr_domain_query_uplink(dmn);
-	if (ret) {
-		mlx5dr_err(dmn, "Failed to query uplink vport caps (err: %d)", ret);
-		goto free_vports_caps_xa;
-	}
+	dr_domain_query_uplink(dmn);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 75c775b..7933652 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -924,11 +924,12 @@
 
 	/* Check that all mask data was consumed */
 	for (i = 0; i < consumed_mask.match_sz; i++) {
-		if (consumed_mask.match_buf[i]) {
-			mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n");
-			ret = -EOPNOTSUPP;
-			goto free_consumed_mask;
-		}
+		if (!((u8 *)consumed_mask.match_buf)[i])
+			continue;
+
+		mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n");
+		ret = -EOPNOTSUPP;
+		goto free_consumed_mask;
 	}
 
 	ret =  0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 3028b776..2333c24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -764,6 +764,7 @@
 
 struct mlx5dr_vports {
 	struct mlx5dr_cmd_vport_cap esw_manager_caps;
+	struct mlx5dr_cmd_vport_cap uplink_caps;
 	struct xarray vports_caps_xa;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5925db3..03e5bad 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2153,7 +2153,7 @@
 	max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
 	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
 
-	if (WARN_ON_ONCE(local_port >= max_ports))
+	if (WARN_ON_ONCE(!local_port || local_port >= max_ports))
 		return;
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port)
@@ -3290,10 +3290,10 @@
 	u8 max_rif_mac_profiles;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIF_MAC_PROFILES))
-		return -EIO;
-
-	max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core,
-						  MAX_RIF_MAC_PROFILES);
+		max_rif_mac_profiles = 1;
+	else
+		max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core,
+							  MAX_RIF_MAC_PROFILES);
 	devlink_resource_size_params_init(&size_params, max_rif_mac_profiles,
 					  max_rif_mac_profiles, 1,
 					  DEVLINK_RESOURCE_UNIT_ENTRY);
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 4fc9782..7d76474 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -914,8 +914,7 @@
 }
 
 static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
-					   u8 duplex, u16 local_adv,
-					   u16 remote_adv)
+					   u16 local_adv, u16 remote_adv)
 {
 	struct lan743x_phy *phy = &adapter->phy;
 	u8 cap;
@@ -943,7 +942,6 @@
 
 	phy_print_status(phydev);
 	if (phydev->state == PHY_RUNNING) {
-		struct ethtool_link_ksettings ksettings;
 		int remote_advertisement = 0;
 		int local_advertisement = 0;
 
@@ -980,18 +978,14 @@
 		}
 		lan743x_csr_write(adapter, MAC_CR, data);
 
-		memset(&ksettings, 0, sizeof(ksettings));
-		phy_ethtool_get_link_ksettings(netdev, &ksettings);
 		local_advertisement =
 			linkmode_adv_to_mii_adv_t(phydev->advertising);
 		remote_advertisement =
 			linkmode_adv_to_mii_adv_t(phydev->lp_advertising);
 
-		lan743x_phy_update_flowcontrol(adapter,
-					       ksettings.base.duplex,
-					       local_advertisement,
+		lan743x_phy_update_flowcontrol(adapter, local_advertisement,
 					       remote_advertisement);
-		lan743x_ptp_update_latency(adapter, ksettings.base.speed);
+		lan743x_ptp_update_latency(adapter, phydev->speed);
 	}
 }
 
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index e6c18b5..409cde1 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1278,6 +1278,225 @@
 }
 EXPORT_SYMBOL(ocelot_fdb_dump);
 
+static void ocelot_populate_l2_ptp_trap_key(struct ocelot_vcap_filter *trap)
+{
+	trap->key_type = OCELOT_VCAP_KEY_ETYPE;
+	*(__be16 *)trap->key.etype.etype.value = htons(ETH_P_1588);
+	*(__be16 *)trap->key.etype.etype.mask = htons(0xffff);
+}
+
+static void
+ocelot_populate_ipv4_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
+{
+	trap->key_type = OCELOT_VCAP_KEY_IPV4;
+	trap->key.ipv4.dport.value = PTP_EV_PORT;
+	trap->key.ipv4.dport.mask = 0xffff;
+}
+
+static void
+ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
+{
+	trap->key_type = OCELOT_VCAP_KEY_IPV6;
+	trap->key.ipv6.dport.value = PTP_EV_PORT;
+	trap->key.ipv6.dport.mask = 0xffff;
+}
+
+static void
+ocelot_populate_ipv4_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
+{
+	trap->key_type = OCELOT_VCAP_KEY_IPV4;
+	trap->key.ipv4.dport.value = PTP_GEN_PORT;
+	trap->key.ipv4.dport.mask = 0xffff;
+}
+
+static void
+ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
+{
+	trap->key_type = OCELOT_VCAP_KEY_IPV6;
+	trap->key.ipv6.dport.value = PTP_GEN_PORT;
+	trap->key.ipv6.dport.mask = 0xffff;
+}
+
+static int ocelot_trap_add(struct ocelot *ocelot, int port,
+			   unsigned long cookie,
+			   void (*populate)(struct ocelot_vcap_filter *f))
+{
+	struct ocelot_vcap_block *block_vcap_is2;
+	struct ocelot_vcap_filter *trap;
+	bool new = false;
+	int err;
+
+	block_vcap_is2 = &ocelot->block[VCAP_IS2];
+
+	trap = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, cookie,
+						   false);
+	if (!trap) {
+		trap = kzalloc(sizeof(*trap), GFP_KERNEL);
+		if (!trap)
+			return -ENOMEM;
+
+		populate(trap);
+		trap->prio = 1;
+		trap->id.cookie = cookie;
+		trap->id.tc_offload = false;
+		trap->block_id = VCAP_IS2;
+		trap->type = OCELOT_VCAP_FILTER_OFFLOAD;
+		trap->lookup = 0;
+		trap->action.cpu_copy_ena = true;
+		trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
+		trap->action.port_mask = 0;
+		new = true;
+	}
+
+	trap->ingress_port_mask |= BIT(port);
+
+	if (new)
+		err = ocelot_vcap_filter_add(ocelot, trap, NULL);
+	else
+		err = ocelot_vcap_filter_replace(ocelot, trap);
+	if (err) {
+		trap->ingress_port_mask &= ~BIT(port);
+		if (!trap->ingress_port_mask)
+			kfree(trap);
+		return err;
+	}
+
+	return 0;
+}
+
+static int ocelot_trap_del(struct ocelot *ocelot, int port,
+			   unsigned long cookie)
+{
+	struct ocelot_vcap_block *block_vcap_is2;
+	struct ocelot_vcap_filter *trap;
+
+	block_vcap_is2 = &ocelot->block[VCAP_IS2];
+
+	trap = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, cookie,
+						   false);
+	if (!trap)
+		return 0;
+
+	trap->ingress_port_mask &= ~BIT(port);
+	if (!trap->ingress_port_mask)
+		return ocelot_vcap_filter_del(ocelot, trap);
+
+	return ocelot_vcap_filter_replace(ocelot, trap);
+}
+
+static int ocelot_l2_ptp_trap_add(struct ocelot *ocelot, int port)
+{
+	unsigned long l2_cookie = ocelot->num_phys_ports + 1;
+
+	return ocelot_trap_add(ocelot, port, l2_cookie,
+			       ocelot_populate_l2_ptp_trap_key);
+}
+
+static int ocelot_l2_ptp_trap_del(struct ocelot *ocelot, int port)
+{
+	unsigned long l2_cookie = ocelot->num_phys_ports + 1;
+
+	return ocelot_trap_del(ocelot, port, l2_cookie);
+}
+
+static int ocelot_ipv4_ptp_trap_add(struct ocelot *ocelot, int port)
+{
+	unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2;
+	unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3;
+	int err;
+
+	err = ocelot_trap_add(ocelot, port, ipv4_ev_cookie,
+			      ocelot_populate_ipv4_ptp_event_trap_key);
+	if (err)
+		return err;
+
+	err = ocelot_trap_add(ocelot, port, ipv4_gen_cookie,
+			      ocelot_populate_ipv4_ptp_general_trap_key);
+	if (err)
+		ocelot_trap_del(ocelot, port, ipv4_ev_cookie);
+
+	return err;
+}
+
+static int ocelot_ipv4_ptp_trap_del(struct ocelot *ocelot, int port)
+{
+	unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2;
+	unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3;
+	int err;
+
+	err = ocelot_trap_del(ocelot, port, ipv4_ev_cookie);
+	err |= ocelot_trap_del(ocelot, port, ipv4_gen_cookie);
+	return err;
+}
+
+static int ocelot_ipv6_ptp_trap_add(struct ocelot *ocelot, int port)
+{
+	unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4;
+	unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5;
+	int err;
+
+	err = ocelot_trap_add(ocelot, port, ipv6_ev_cookie,
+			      ocelot_populate_ipv6_ptp_event_trap_key);
+	if (err)
+		return err;
+
+	err = ocelot_trap_add(ocelot, port, ipv6_gen_cookie,
+			      ocelot_populate_ipv6_ptp_general_trap_key);
+	if (err)
+		ocelot_trap_del(ocelot, port, ipv6_ev_cookie);
+
+	return err;
+}
+
+static int ocelot_ipv6_ptp_trap_del(struct ocelot *ocelot, int port)
+{
+	unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4;
+	unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5;
+	int err;
+
+	err = ocelot_trap_del(ocelot, port, ipv6_ev_cookie);
+	err |= ocelot_trap_del(ocelot, port, ipv6_gen_cookie);
+	return err;
+}
+
+static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port,
+				  bool l2, bool l4)
+{
+	int err;
+
+	if (l2)
+		err = ocelot_l2_ptp_trap_add(ocelot, port);
+	else
+		err = ocelot_l2_ptp_trap_del(ocelot, port);
+	if (err)
+		return err;
+
+	if (l4) {
+		err = ocelot_ipv4_ptp_trap_add(ocelot, port);
+		if (err)
+			goto err_ipv4;
+
+		err = ocelot_ipv6_ptp_trap_add(ocelot, port);
+		if (err)
+			goto err_ipv6;
+	} else {
+		err = ocelot_ipv4_ptp_trap_del(ocelot, port);
+
+		err |= ocelot_ipv6_ptp_trap_del(ocelot, port);
+	}
+	if (err)
+		return err;
+
+	return 0;
+
+err_ipv6:
+	ocelot_ipv4_ptp_trap_del(ocelot, port);
+err_ipv4:
+	if (l2)
+		ocelot_l2_ptp_trap_del(ocelot, port);
+	return err;
+}
+
 int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr)
 {
 	return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config,
@@ -1288,7 +1507,9 @@
 int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	bool l2 = false, l4 = false;
 	struct hwtstamp_config cfg;
+	int err;
 
 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
 		return -EFAULT;
@@ -1320,28 +1541,40 @@
 	switch (cfg.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		break;
-	case HWTSTAMP_FILTER_ALL:
-	case HWTSTAMP_FILTER_SOME:
-	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-	case HWTSTAMP_FILTER_NTP_ALL:
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		l4 = true;
+		break;
 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		l2 = true;
+		break;
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		l2 = true;
+		l4 = true;
 		break;
 	default:
 		mutex_unlock(&ocelot->ptp_lock);
 		return -ERANGE;
 	}
 
+	err = ocelot_setup_ptp_traps(ocelot, port, l2, l4);
+	if (err)
+		return err;
+
+	if (l2 && l4)
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+	else if (l2)
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+	else if (l4)
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+	else
+		cfg.rx_filter = HWTSTAMP_FILTER_NONE;
+
 	/* Commit back the result & save it */
 	memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg));
 	mutex_unlock(&ocelot->ptp_lock);
@@ -1444,7 +1677,10 @@
 				 SOF_TIMESTAMPING_RAW_HARDWARE;
 	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
 			 BIT(HWTSTAMP_TX_ONESTEP_SYNC);
-	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c
index 99d7376..337cd08 100644
--- a/drivers/net/ethernet/mscc/ocelot_vcap.c
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.c
@@ -1217,6 +1217,22 @@
 }
 EXPORT_SYMBOL(ocelot_vcap_filter_del);
 
+int ocelot_vcap_filter_replace(struct ocelot *ocelot,
+			       struct ocelot_vcap_filter *filter)
+{
+	struct ocelot_vcap_block *block = &ocelot->block[filter->block_id];
+	int index;
+
+	index = ocelot_vcap_block_get_filter_index(block, filter);
+	if (index < 0)
+		return index;
+
+	vcap_entry_set(ocelot, index, filter);
+
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_vcap_filter_replace);
+
 int ocelot_vcap_filter_stats_update(struct ocelot *ocelot,
 				    struct ocelot_vcap_filter *filter)
 {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index df20373..0b1865e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -565,7 +565,6 @@
  * @exn_name:           Name for Exception interrupt
  * @shared_handler:     Handler for shared interrupts
  * @shared_name:        Name for shared interrupt
- * @me_freq_mhz:        ME clock_freq (MHz)
  * @reconfig_lock:	Protects @reconfig_posted, @reconfig_timer_active,
  *			@reconfig_sync_present and HW reconfiguration request
  *			regs/machinery from async requests (sync must take
@@ -650,8 +649,6 @@
 	irq_handler_t shared_handler;
 	char shared_name[IFNAMSIZ + 8];
 
-	u32 me_freq_mhz;
-
 	bool link_up;
 	spinlock_t link_status_lock;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 1de076f..cf78829 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -1344,7 +1344,7 @@
 	 * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
 	 * count.
 	 */
-	factor = nn->me_freq_mhz / 16;
+	factor = nn->tlv_caps.me_freq_mhz / 16;
 
 	/* Each pair of (usecs, max_frames) fields specifies that interrupts
 	 * should be coalesced until
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index cfeb762..07a00dd9 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1209,7 +1209,7 @@
 
 	cell = nvmem_cell_get(dev, "address");
 	if (IS_ERR(cell))
-		return NULL;
+		return cell;
 
 	mac = nvmem_cell_read(cell, &cell_size);
 	nvmem_cell_put(cell);
@@ -1282,7 +1282,7 @@
 	ndev->max_mtu = NIXGE_JUMBO_MTU;
 
 	mac_addr = nixge_get_nvmem_address(&pdev->dev);
-	if (mac_addr && is_valid_ether_addr(mac_addr)) {
+	if (!IS_ERR(mac_addr) && is_valid_ether_addr(mac_addr)) {
 		eth_hw_addr_set(ndev, mac_addr);
 		kfree(mac_addr);
 	} else {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index a97f691..6958ade 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1045,7 +1045,7 @@
 		if (!parities)
 			continue;
 
-		for (j = 0, bit_idx = 0; bit_idx < 32; j++) {
+		for (j = 0, bit_idx = 0; bit_idx < 32 && j < 32; j++) {
 			struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j];
 
 			if (qed_int_is_parity_flag(p_hwfn, p_bit) &&
@@ -1083,7 +1083,7 @@
 			 * to current group, making them responsible for the
 			 * previous assertion.
 			 */
-			for (j = 0, bit_idx = 0; bit_idx < 32; j++) {
+			for (j = 0, bit_idx = 0; bit_idx < 32 && j < 32; j++) {
 				long unsigned int bitmask;
 				u8 bit, bit_len;
 
@@ -1382,7 +1382,7 @@
 	memset(sb_info->parity_mask, 0, sizeof(u32) * NUM_ATTN_REGS);
 	for (i = 0; i < NUM_ATTN_REGS; i++) {
 		/* j is array index, k is bit index */
-		for (j = 0, k = 0; k < 32; j++) {
+		for (j = 0, k = 0; k < 32 && j < 32; j++) {
 			struct aeu_invert_reg_bit *p_aeu;
 
 			p_aeu = &aeu_descs[i].bits[j];
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index bbe21db..86c44bc 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5217,8 +5217,8 @@
 
 static void rtl_init_mac_address(struct rtl8169_private *tp)
 {
+	u8 mac_addr[ETH_ALEN] __aligned(2) = {};
 	struct net_device *dev = tp->dev;
-	u8 mac_addr[ETH_ALEN];
 	int rc;
 
 	rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr);
@@ -5233,7 +5233,8 @@
 	if (is_valid_ether_addr(mac_addr))
 		goto done;
 
-	eth_hw_addr_random(dev);
+	eth_random_addr(mac_addr);
+	dev->addr_assign_type = NET_ADDR_RANDOM;
 	dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n");
 done:
 	eth_hw_addr_set(dev, mac_addr);
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index cc2d907..23a336c 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -392,7 +392,7 @@
 			/* get MAC address from EEPROM */
 			for (i = 0; i < 3; i++)
 			        addr[i] = read_eeprom(ioaddr, i + EEPROMMACAddr);
-			 eth_hw_addr_set(net_dev, (u8 *)addr);
+			eth_hw_addr_set(net_dev, (u8 *)addr);
 
 			rc = 1;
 			break;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 8520812..b7c2579 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -485,8 +485,28 @@
 }
 #endif /* CONFIG_PM_SLEEP */
 
-static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend,
-					       socfpga_dwmac_resume);
+static int __maybe_unused socfpga_dwmac_runtime_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	stmmac_bus_clks_config(priv, false);
+
+	return 0;
+}
+
+static int __maybe_unused socfpga_dwmac_runtime_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	return stmmac_bus_clks_config(priv, true);
+}
+
+static const struct dev_pm_ops socfpga_dwmac_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, socfpga_dwmac_resume)
+	SET_RUNTIME_PM_OPS(socfpga_dwmac_runtime_suspend, socfpga_dwmac_runtime_resume, NULL)
+};
 
 static const struct socfpga_dwmac_ops socfpga_gen5_ops = {
 	.set_phy_mode = socfpga_gen5_set_phy_mode,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 43eead7..5f12973 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -314,6 +314,7 @@
 int stmmac_xpcs_setup(struct mii_bus *mii);
 void stmmac_set_ethtool_ops(struct net_device *netdev);
 
+int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags);
 void stmmac_ptp_register(struct stmmac_priv *priv);
 void stmmac_ptp_unregister(struct stmmac_priv *priv);
 int stmmac_open(struct net_device *dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d3f350c..74819569 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -50,6 +50,13 @@
 #include "dwxgmac2.h"
 #include "hwif.h"
 
+/* As long as the interface is active, we keep the timestamping counter enabled
+ * with fine resolution and binary rollover. This avoid non-monotonic behavior
+ * (clock jumps) when changing timestamping settings at runtime.
+ */
+#define STMMAC_HWTS_ACTIVE	(PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | \
+				 PTP_TCR_TSCTRLSSR)
+
 #define	STMMAC_ALIGN(x)		ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16)
 #define	TSO_MAX_BUFF_SIZE	(SZ_16K - 1)
 
@@ -511,6 +518,14 @@
 	return true;
 }
 
+static inline u32 stmmac_cdc_adjust(struct stmmac_priv *priv)
+{
+	/* Correct the clk domain crossing(CDC) error */
+	if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate)
+		return (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate;
+	return 0;
+}
+
 /* stmmac_get_tx_hwtstamp - get HW TX timestamps
  * @priv: driver private structure
  * @p : descriptor pointer
@@ -524,7 +539,6 @@
 {
 	struct skb_shared_hwtstamps shhwtstamp;
 	bool found = false;
-	s64 adjust = 0;
 	u64 ns = 0;
 
 	if (!priv->hwts_tx_en)
@@ -543,12 +557,7 @@
 	}
 
 	if (found) {
-		/* Correct the clk domain crossing(CDC) error */
-		if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
-			adjust += -(2 * (NSEC_PER_SEC /
-					 priv->plat->clk_ptp_rate));
-			ns += adjust;
-		}
+		ns -= stmmac_cdc_adjust(priv);
 
 		memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp.hwtstamp = ns_to_ktime(ns);
@@ -573,7 +582,6 @@
 {
 	struct skb_shared_hwtstamps *shhwtstamp = NULL;
 	struct dma_desc *desc = p;
-	u64 adjust = 0;
 	u64 ns = 0;
 
 	if (!priv->hwts_rx_en)
@@ -586,11 +594,7 @@
 	if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
 		stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
 
-		/* Correct the clk domain crossing(CDC) error */
-		if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
-			adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate);
-			ns -= adjust;
-		}
+		ns -= stmmac_cdc_adjust(priv);
 
 		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
 		shhwtstamp = skb_hwtstamps(skb);
@@ -616,8 +620,6 @@
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct hwtstamp_config config;
-	struct timespec64 now;
-	u64 temp = 0;
 	u32 ptp_v2 = 0;
 	u32 tstamp_all = 0;
 	u32 ptp_over_ipv4_udp = 0;
@@ -626,11 +628,6 @@
 	u32 snap_type_sel = 0;
 	u32 ts_master_en = 0;
 	u32 ts_event_en = 0;
-	u32 sec_inc = 0;
-	u32 value = 0;
-	bool xmac;
-
-	xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
 
 	if (!(priv->dma_cap.time_stamp || priv->adv_ts)) {
 		netdev_alert(priv->dev, "No support for HW time stamping\n");
@@ -792,42 +789,17 @@
 	priv->hwts_rx_en = ((config.rx_filter == HWTSTAMP_FILTER_NONE) ? 0 : 1);
 	priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON;
 
-	if (!priv->hwts_tx_en && !priv->hwts_rx_en)
-		stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0);
-	else {
-		value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR |
-			 tstamp_all | ptp_v2 | ptp_over_ethernet |
-			 ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
-			 ts_master_en | snap_type_sel);
-		stmmac_config_hw_tstamping(priv, priv->ptpaddr, value);
+	priv->systime_flags = STMMAC_HWTS_ACTIVE;
 
-		/* program Sub Second Increment reg */
-		stmmac_config_sub_second_increment(priv,
-				priv->ptpaddr, priv->plat->clk_ptp_rate,
-				xmac, &sec_inc);
-		temp = div_u64(1000000000ULL, sec_inc);
-
-		/* Store sub second increment and flags for later use */
-		priv->sub_second_inc = sec_inc;
-		priv->systime_flags = value;
-
-		/* calculate default added value:
-		 * formula is :
-		 * addend = (2^32)/freq_div_ratio;
-		 * where, freq_div_ratio = 1e9ns/sec_inc
-		 */
-		temp = (u64)(temp << 32);
-		priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
-		stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
-
-		/* initialize system time */
-		ktime_get_real_ts64(&now);
-
-		/* lower 32 bits of tv_sec are safe until y2106 */
-		stmmac_init_systime(priv, priv->ptpaddr,
-				(u32)now.tv_sec, now.tv_nsec);
+	if (priv->hwts_tx_en || priv->hwts_rx_en) {
+		priv->systime_flags |= tstamp_all | ptp_v2 |
+				       ptp_over_ethernet | ptp_over_ipv6_udp |
+				       ptp_over_ipv4_udp | ts_event_en |
+				       ts_master_en | snap_type_sel;
 	}
 
+	stmmac_config_hw_tstamping(priv, priv->ptpaddr, priv->systime_flags);
+
 	memcpy(&priv->tstamp_config, &config, sizeof(config));
 
 	return copy_to_user(ifr->ifr_data, &config,
@@ -856,6 +828,66 @@
 }
 
 /**
+ * stmmac_init_tstamp_counter - init hardware timestamping counter
+ * @priv: driver private structure
+ * @systime_flags: timestamping flags
+ * Description:
+ * Initialize hardware counter for packet timestamping.
+ * This is valid as long as the interface is open and not suspended.
+ * Will be rerun after resuming from suspend, case in which the timestamping
+ * flags updated by stmmac_hwtstamp_set() also need to be restored.
+ */
+int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
+{
+	bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+	struct timespec64 now;
+	u32 sec_inc = 0;
+	u64 temp = 0;
+	int ret;
+
+	if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
+		return -EOPNOTSUPP;
+
+	ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+	if (ret < 0) {
+		netdev_warn(priv->dev,
+			    "failed to enable PTP reference clock: %pe\n",
+			    ERR_PTR(ret));
+		return ret;
+	}
+
+	stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
+	priv->systime_flags = systime_flags;
+
+	/* program Sub Second Increment reg */
+	stmmac_config_sub_second_increment(priv, priv->ptpaddr,
+					   priv->plat->clk_ptp_rate,
+					   xmac, &sec_inc);
+	temp = div_u64(1000000000ULL, sec_inc);
+
+	/* Store sub second increment for later use */
+	priv->sub_second_inc = sec_inc;
+
+	/* calculate default added value:
+	 * formula is :
+	 * addend = (2^32)/freq_div_ratio;
+	 * where, freq_div_ratio = 1e9ns/sec_inc
+	 */
+	temp = (u64)(temp << 32);
+	priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
+	stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
+
+	/* initialize system time */
+	ktime_get_real_ts64(&now);
+
+	/* lower 32 bits of tv_sec are safe until y2106 */
+	stmmac_init_systime(priv, priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(stmmac_init_tstamp_counter);
+
+/**
  * stmmac_init_ptp - init PTP
  * @priv: driver private structure
  * Description: this is to verify if the HW supports the PTPv1 or PTPv2.
@@ -865,9 +897,11 @@
 static int stmmac_init_ptp(struct stmmac_priv *priv)
 {
 	bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+	int ret;
 
-	if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
-		return -EOPNOTSUPP;
+	ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE);
+	if (ret)
+		return ret;
 
 	priv->adv_ts = 0;
 	/* Check if adv_ts can be enabled for dwmac 4.x / xgmac core */
@@ -3275,10 +3309,6 @@
 	stmmac_mmc_setup(priv);
 
 	if (init_ptp) {
-		ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
-		if (ret < 0)
-			netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
-
 		ret = stmmac_init_ptp(priv);
 		if (ret == -EOPNOTSUPP)
 			netdev_warn(priv->dev, "PTP not supported by HW\n");
@@ -3772,6 +3802,8 @@
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 chan;
 
+	netif_tx_disable(dev);
+
 	if (device_may_wakeup(priv->device))
 		phylink_speed_down(priv->phylink, false);
 	/* Stop and disconnect the PHY */
@@ -5164,12 +5196,13 @@
 		if (likely(!(status & rx_not_ls)) &&
 		    (likely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
 		     unlikely(status != llc_snap))) {
-			if (buf2_len)
+			if (buf2_len) {
 				buf2_len -= ETH_FCS_LEN;
-			else
+				len -= ETH_FCS_LEN;
+			} else if (buf1_len) {
 				buf1_len -= ETH_FCS_LEN;
-
-			len -= ETH_FCS_LEN;
+				len -= ETH_FCS_LEN;
+			}
 		}
 
 		if (!skb) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 232ac98..5d29f33 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -816,7 +816,7 @@
 		if (ret)
 			return ret;
 
-		clk_prepare_enable(priv->plat->clk_ptp_ref);
+		stmmac_init_tstamp_counter(priv, priv->systime_flags);
 	}
 
 	return 0;
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index bfdf89e..8a19a06 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -306,7 +306,6 @@
 {
 	/* Finish setting up the DEVICE info. */
 	dev->netdev_ops		= &sp_netdev_ops;
-	dev->needs_free_netdev	= true;
 	dev->mtu		= SIXP_MTU;
 	dev->hard_header_len	= AX25_MAX_HEADER_LEN;
 	dev->header_ops 	= &ax25_header_ops;
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index e2b332b..7da2bb8 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -31,6 +31,8 @@
 
 #define AX_MTU		236
 
+/* some arch define END as assembly function ending, just undef it */
+#undef	END
 /* SLIP/KISS protocol characters. */
 #define END             0300		/* indicates end of frame	*/
 #define ESC             0333		/* indicates byte stuffing	*/
diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index cff5173..d57472e 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -661,22 +661,6 @@
 	wait_for_completion(&ipa->completion);
 }
 
-void ipa_cmd_pipeline_clear(struct ipa *ipa)
-{
-	u32 count = ipa_cmd_pipeline_clear_count();
-	struct gsi_trans *trans;
-
-	trans = ipa_cmd_trans_alloc(ipa, count);
-	if (trans) {
-		ipa_cmd_pipeline_clear_add(trans);
-		gsi_trans_commit_wait(trans);
-		ipa_cmd_pipeline_clear_wait(ipa);
-	} else {
-		dev_err(&ipa->pdev->dev,
-			"error allocating %u entry tag transaction\n", count);
-	}
-}
-
 static struct ipa_cmd_info *
 ipa_cmd_info_alloc(struct ipa_endpoint *endpoint, u32 tre_count)
 {
diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h
index 69cd085..05ed7e4 100644
--- a/drivers/net/ipa/ipa_cmd.h
+++ b/drivers/net/ipa/ipa_cmd.h
@@ -164,12 +164,6 @@
 void ipa_cmd_pipeline_clear_wait(struct ipa *ipa);
 
 /**
- * ipa_cmd_pipeline_clear() - Clear the hardware pipeline
- * @ipa:	- IPA pointer
- */
-void ipa_cmd_pipeline_clear(struct ipa *ipa);
-
-/**
  * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint
  * @ipa:	IPA pointer
  * @tre_count:	Number of elements in the transaction
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 5528d97..03a1709 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -853,6 +853,7 @@
 	u32 offset;
 	u32 val;
 
+	/* This should only be changed when HOL_BLOCK_EN is disabled */
 	offset = IPA_REG_ENDP_INIT_HOL_BLOCK_TIMER_N_OFFSET(endpoint_id);
 	val = hol_block_timer_val(ipa, microseconds);
 	iowrite32(val, ipa->reg_virt + offset);
@@ -868,6 +869,9 @@
 	val = enable ? HOL_BLOCK_EN_FMASK : 0;
 	offset = IPA_REG_ENDP_INIT_HOL_BLOCK_EN_N_OFFSET(endpoint_id);
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
+	/* When enabling, the register must be written twice for IPA v4.5+ */
+	if (enable && endpoint->ipa->version >= IPA_VERSION_4_5)
+		iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
 
 void ipa_endpoint_modem_hol_block_clear_all(struct ipa *ipa)
@@ -880,6 +884,7 @@
 		if (endpoint->toward_ipa || endpoint->ee_id != GSI_EE_MODEM)
 			continue;
 
+		ipa_endpoint_init_hol_block_enable(endpoint, false);
 		ipa_endpoint_init_hol_block_timer(endpoint, 0);
 		ipa_endpoint_init_hol_block_enable(endpoint, true);
 	}
@@ -1631,8 +1636,6 @@
 	if (ipa->modem_netdev)
 		ipa_modem_suspend(ipa->modem_netdev);
 
-	ipa_cmd_pipeline_clear(ipa);
-
 	ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);
 	ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
 }
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index cdfa98a..a448ec1 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -28,6 +28,7 @@
 #include "ipa_reg.h"
 #include "ipa_mem.h"
 #include "ipa_table.h"
+#include "ipa_smp2p.h"
 #include "ipa_modem.h"
 #include "ipa_uc.h"
 #include "ipa_interrupt.h"
@@ -801,6 +802,11 @@
 	struct device *dev = &pdev->dev;
 	int ret;
 
+	/* Prevent the modem from triggering a call to ipa_setup().  This
+	 * also ensures a modem-initiated setup that's underway completes.
+	 */
+	ipa_smp2p_irq_disable_setup(ipa);
+
 	ret = pm_runtime_get_sync(dev);
 	if (WARN_ON(ret < 0))
 		goto out_power_put;
diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c
index ad116bc..d0ab4d7 100644
--- a/drivers/net/ipa/ipa_modem.c
+++ b/drivers/net/ipa/ipa_modem.c
@@ -339,9 +339,6 @@
 	if (state != IPA_MODEM_STATE_RUNNING)
 		return -EBUSY;
 
-	/* Prevent the modem from triggering a call to ipa_setup() */
-	ipa_smp2p_disable(ipa);
-
 	/* Clean up the netdev and endpoints if it was started */
 	if (netdev) {
 		struct ipa_priv *priv = netdev_priv(netdev);
@@ -369,6 +366,9 @@
 	struct device *dev = &ipa->pdev->dev;
 	int ret;
 
+	/* Prevent the modem from triggering a call to ipa_setup() */
+	ipa_smp2p_irq_disable_setup(ipa);
+
 	ret = pm_runtime_get_sync(dev);
 	if (ret < 0) {
 		dev_err(dev, "error %d getting power to handle crash\n", ret);
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index e3da95d..06cec71 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -52,7 +52,7 @@
 				return false;
 	}
 
-	group_count = data->rsrc_group_src_count;
+	group_count = data->rsrc_group_dst_count;
 	if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX)
 		return false;
 
diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c
index df7639c..2112336 100644
--- a/drivers/net/ipa/ipa_smp2p.c
+++ b/drivers/net/ipa/ipa_smp2p.c
@@ -53,7 +53,7 @@
  * @setup_ready_irq:	IPA interrupt triggered by modem to signal GSI ready
  * @power_on:		Whether IPA power is on
  * @notified:		Whether modem has been notified of power state
- * @disabled:		Whether setup ready interrupt handling is disabled
+ * @setup_disabled:	Whether setup ready interrupt handler is disabled
  * @mutex:		Mutex protecting ready-interrupt/shutdown interlock
  * @panic_notifier:	Panic notifier structure
 */
@@ -67,7 +67,7 @@
 	u32 setup_ready_irq;
 	bool power_on;
 	bool notified;
-	bool disabled;
+	bool setup_disabled;
 	struct mutex mutex;
 	struct notifier_block panic_notifier;
 };
@@ -155,11 +155,9 @@
 	struct device *dev;
 	int ret;
 
-	mutex_lock(&smp2p->mutex);
-
-	if (smp2p->disabled)
-		goto out_mutex_unlock;
-	smp2p->disabled = true;		/* If any others arrive, ignore them */
+	/* Ignore any (spurious) interrupts received after the first */
+	if (smp2p->ipa->setup_complete)
+		return IRQ_HANDLED;
 
 	/* Power needs to be active for setup */
 	dev = &smp2p->ipa->pdev->dev;
@@ -176,8 +174,6 @@
 out_power_put:
 	pm_runtime_mark_last_busy(dev);
 	(void)pm_runtime_put_autosuspend(dev);
-out_mutex_unlock:
-	mutex_unlock(&smp2p->mutex);
 
 	return IRQ_HANDLED;
 }
@@ -313,7 +309,7 @@
 	kfree(smp2p);
 }
 
-void ipa_smp2p_disable(struct ipa *ipa)
+void ipa_smp2p_irq_disable_setup(struct ipa *ipa)
 {
 	struct ipa_smp2p *smp2p = ipa->smp2p;
 
@@ -322,7 +318,10 @@
 
 	mutex_lock(&smp2p->mutex);
 
-	smp2p->disabled = true;
+	if (!smp2p->setup_disabled) {
+		disable_irq(smp2p->setup_ready_irq);
+		smp2p->setup_disabled = true;
+	}
 
 	mutex_unlock(&smp2p->mutex);
 }
diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h
index 99a9567..59cee31 100644
--- a/drivers/net/ipa/ipa_smp2p.h
+++ b/drivers/net/ipa/ipa_smp2p.h
@@ -27,13 +27,12 @@
 void ipa_smp2p_exit(struct ipa *ipa);
 
 /**
- * ipa_smp2p_disable() - Prevent "ipa-setup-ready" interrupt handling
+ * ipa_smp2p_irq_disable_setup() - Disable the "setup ready" interrupt
  * @ipa:	IPA pointer
  *
- * Prevent handling of the "setup ready" interrupt from the modem.
- * This is used before initiating shutdown of the driver.
+ * Disable the "ipa-setup-ready" interrupt from the modem.
  */
-void ipa_smp2p_disable(struct ipa *ipa);
+void ipa_smp2p_irq_disable_setup(struct ipa *ipa);
 
 /**
  * ipa_smp2p_notify_reset() - Reset modem notification state
diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c
index cad8205..966c3b4 100644
--- a/drivers/net/mdio/mdio-aspeed.c
+++ b/drivers/net/mdio/mdio-aspeed.c
@@ -61,6 +61,13 @@
 
 	iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL);
 
+	rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl,
+				!(ctrl & ASPEED_MDIO_CTRL_FIRE),
+				ASPEED_MDIO_INTERVAL_US,
+				ASPEED_MDIO_TIMEOUT_US);
+	if (rc < 0)
+		return rc;
+
 	rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data,
 				data & ASPEED_MDIO_DATA_IDLE,
 				ASPEED_MDIO_INTERVAL_US,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 3ad7397..5904546 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -710,6 +710,7 @@
 	struct phylink_link_state link_state;
 	struct net_device *ndev = pl->netdev;
 	bool mac_config = false;
+	bool retrigger = false;
 	bool cur_link_state;
 
 	mutex_lock(&pl->state_mutex);
@@ -723,6 +724,7 @@
 		link_state.link = false;
 	} else if (pl->mac_link_dropped) {
 		link_state.link = false;
+		retrigger = true;
 	} else {
 		switch (pl->cur_link_an_mode) {
 		case MLO_AN_PHY:
@@ -739,6 +741,19 @@
 		case MLO_AN_INBAND:
 			phylink_mac_pcs_get_state(pl, &link_state);
 
+			/* The PCS may have a latching link-fail indicator.
+			 * If the link was up, bring the link down and
+			 * re-trigger the resolve. Otherwise, re-read the
+			 * PCS state to get the current status of the link.
+			 */
+			if (!link_state.link) {
+				if (cur_link_state)
+					retrigger = true;
+				else
+					phylink_mac_pcs_get_state(pl,
+								  &link_state);
+			}
+
 			/* If we have a phy, the "up" state is the union of
 			 * both the PHY and the MAC
 			 */
@@ -747,6 +762,15 @@
 
 			/* Only update if the PHY link is up */
 			if (pl->phydev && pl->phy_state.link) {
+				/* If the interface has changed, force a
+				 * link down event if the link isn't already
+				 * down, and re-resolve.
+				 */
+				if (link_state.interface !=
+				    pl->phy_state.interface) {
+					retrigger = true;
+					link_state.link = false;
+				}
 				link_state.interface = pl->phy_state.interface;
 
 				/* If we have a PHY, we need to update with
@@ -789,7 +813,7 @@
 		else
 			phylink_link_up(pl, link_state);
 	}
-	if (!link_state.link && pl->mac_link_dropped) {
+	if (!link_state.link && retrigger) {
 		pl->mac_link_dropped = false;
 		queue_work(system_power_efficient_wq, &pl->resolve);
 	}
diff --git a/drivers/net/slip/slip.h b/drivers/net/slip/slip.h
index c420e59..3d7f88b 100644
--- a/drivers/net/slip/slip.h
+++ b/drivers/net/slip/slip.h
@@ -40,6 +40,8 @@
 					   insmod -oslip_maxdev=nnn	*/
 #define SL_MTU		296		/* 296; I am used to 600- FvK	*/
 
+/* some arch define END as assembly function ending, just undef it */
+#undef	END
 /* SLIP protocol characters. */
 #define END             0300		/* indicates end of frame	*/
 #define ESC             0333		/* indicates byte stuffing	*/
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fecc9a1..1572878 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1010,6 +1010,7 @@
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	int txq = skb->queue_mapping;
+	struct netdev_queue *queue;
 	struct tun_file *tfile;
 	int len = skb->len;
 
@@ -1054,6 +1055,10 @@
 	if (ptr_ring_produce(&tfile->tx_ring, skb))
 		goto drop;
 
+	/* NETIF_F_LLTX requires to do our own update of trans_start */
+	queue = netdev_get_tx_queue(dev, txq);
+	queue->trans_start = jiffies;
+
 	/* Notify and wake up reader process */
 	if (tfile->flags & TUN_FASYNC)
 		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 4a02f33..f9877a3 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -9603,12 +9603,9 @@
 		netdev->hw_features &= ~NETIF_F_RXCSUM;
 	}
 
-	if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO) {
-		switch (le16_to_cpu(udev->descriptor.idProduct)) {
-		case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2:
-		case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2:
-			tp->lenovo_macpassthru = 1;
-		}
+	if (udev->parent &&
+			le16_to_cpu(udev->parent->descriptor.idVendor) == VENDOR_ID_LENOVO) {
+		tp->lenovo_macpassthru = 1;
 	}
 
 	if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial &&
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 20fe4cd..abe0149 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1050,6 +1050,14 @@
 	.ndo_set_features	= smsc95xx_set_features,
 };
 
+static void smsc95xx_handle_link_change(struct net_device *net)
+{
+	struct usbnet *dev = netdev_priv(net);
+
+	phy_print_status(net->phydev);
+	usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
+}
+
 static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct smsc95xx_priv *pdata;
@@ -1154,6 +1162,17 @@
 	dev->net->min_mtu = ETH_MIN_MTU;
 	dev->net->max_mtu = ETH_DATA_LEN;
 	dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
+
+	ret = phy_connect_direct(dev->net, pdata->phydev,
+				 &smsc95xx_handle_link_change,
+				 PHY_INTERFACE_MODE_MII);
+	if (ret) {
+		netdev_err(dev->net, "can't attach PHY to %s\n", pdata->mdiobus->id);
+		goto unregister_mdio;
+	}
+
+	phy_attached_info(dev->net->phydev);
+
 	return 0;
 
 unregister_mdio:
@@ -1171,47 +1190,25 @@
 {
 	struct smsc95xx_priv *pdata = dev->driver_priv;
 
+	phy_disconnect(dev->net->phydev);
 	mdiobus_unregister(pdata->mdiobus);
 	mdiobus_free(pdata->mdiobus);
 	netif_dbg(dev, ifdown, dev->net, "free pdata\n");
 	kfree(pdata);
 }
 
-static void smsc95xx_handle_link_change(struct net_device *net)
-{
-	struct usbnet *dev = netdev_priv(net);
-
-	phy_print_status(net->phydev);
-	usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
-}
-
 static int smsc95xx_start_phy(struct usbnet *dev)
 {
-	struct smsc95xx_priv *pdata = dev->driver_priv;
-	struct net_device *net = dev->net;
-	int ret;
+	phy_start(dev->net->phydev);
 
-	ret = smsc95xx_reset(dev);
-	if (ret < 0)
-		return ret;
-
-	ret = phy_connect_direct(net, pdata->phydev,
-				 &smsc95xx_handle_link_change,
-				 PHY_INTERFACE_MODE_MII);
-	if (ret) {
-		netdev_err(net, "can't attach PHY to %s\n", pdata->mdiobus->id);
-		return ret;
-	}
-
-	phy_attached_info(net->phydev);
-	phy_start(net->phydev);
 	return 0;
 }
 
-static int smsc95xx_disconnect_phy(struct usbnet *dev)
+static int smsc95xx_stop(struct usbnet *dev)
 {
-	phy_stop(dev->net->phydev);
-	phy_disconnect(dev->net->phydev);
+	if (dev->net->phydev)
+		phy_stop(dev->net->phydev);
+
 	return 0;
 }
 
@@ -1966,7 +1963,7 @@
 	.unbind		= smsc95xx_unbind,
 	.link_reset	= smsc95xx_link_reset,
 	.reset		= smsc95xx_start_phy,
-	.stop		= smsc95xx_disconnect_phy,
+	.stop		= smsc95xx_stop,
 	.rx_fixup	= smsc95xx_rx_fixup,
 	.tx_fixup	= smsc95xx_tx_fixup,
 	.status		= smsc95xx_status,
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1771d6e..55db6a3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3423,7 +3423,6 @@
 	.feature_table_size = ARRAY_SIZE(features),
 	.feature_table_legacy = features_legacy,
 	.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
-	.suppress_used_validation = true,
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c
index 221fa3b..f577449 100644
--- a/drivers/nfc/virtual_ncidev.c
+++ b/drivers/nfc/virtual_ncidev.c
@@ -202,7 +202,7 @@
 	miscdev.minor = MISC_DYNAMIC_MINOR;
 	miscdev.name = "virtual_nci";
 	miscdev.fops = &virtual_ncidev_fops;
-	miscdev.mode = S_IALLUGO;
+	miscdev.mode = 0600;
 
 	return misc_register(&miscdev);
 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4b5de8f5..4c63564 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -895,10 +895,19 @@
 		cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
 	cmnd->write_zeroes.length =
 		cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-	if (nvme_ns_has_pi(ns))
+
+	if (nvme_ns_has_pi(ns)) {
 		cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
-	else
-		cmnd->write_zeroes.control = 0;
+
+		switch (ns->pi_type) {
+		case NVME_NS_DPS_PI_TYPE1:
+		case NVME_NS_DPS_PI_TYPE2:
+			cmnd->write_zeroes.reftag =
+				cpu_to_le32(t10_pi_ref_tag(req));
+			break;
+		}
+	}
+
 	return BLK_STS_OK;
 }
 
@@ -2469,6 +2478,20 @@
 		.vid = 0x14a4,
 		.fr = "22301111",
 		.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
+	},
+	{
+		/*
+		 * This Kioxia CD6-V Series / HPE PE8030 device times out and
+		 * aborts I/O during any load, but more easily reproducible
+		 * with discards (fstrim).
+		 *
+		 * The device is left in a state where it is also not possible
+		 * to use "nvme set-feature" to disable APST, but booting with
+		 * nvme_core.default_ps_max_latency=0 works.
+		 */
+		.vid = 0x1e0f,
+		.mn = "KCD6XVUL6T40",
+		.quirks = NVME_QUIRK_NO_APST,
 	}
 };
 
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index c5a2b71..282d541 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -698,6 +698,9 @@
 			if (token >= 0)
 				pr_warn("I/O fail on reconnect controller after %d sec\n",
 					token);
+			else
+				token = -1;
+
 			opts->fast_io_fail_tmo = token;
 			break;
 		case NVMF_OPT_HOSTNQN:
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 33bc83d..4ceb286 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -572,7 +572,7 @@
 	return ret;
 }
 
-static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
+static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
 		struct nvme_tcp_r2t_pdu *pdu)
 {
 	struct nvme_tcp_data_pdu *data = req->pdu;
@@ -581,32 +581,11 @@
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	u8 ddgst = nvme_tcp_ddgst_len(queue);
 
+	req->state = NVME_TCP_SEND_H2C_PDU;
+	req->offset = 0;
 	req->pdu_len = le32_to_cpu(pdu->r2t_length);
 	req->pdu_sent = 0;
 
-	if (unlikely(!req->pdu_len)) {
-		dev_err(queue->ctrl->ctrl.device,
-			"req %d r2t len is %u, probably a bug...\n",
-			rq->tag, req->pdu_len);
-		return -EPROTO;
-	}
-
-	if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
-		dev_err(queue->ctrl->ctrl.device,
-			"req %d r2t len %u exceeded data len %u (%zu sent)\n",
-			rq->tag, req->pdu_len, req->data_len,
-			req->data_sent);
-		return -EPROTO;
-	}
-
-	if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
-		dev_err(queue->ctrl->ctrl.device,
-			"req %d unexpected r2t offset %u (expected %zu)\n",
-			rq->tag, le32_to_cpu(pdu->r2t_offset),
-			req->data_sent);
-		return -EPROTO;
-	}
-
 	memset(data, 0, sizeof(*data));
 	data->hdr.type = nvme_tcp_h2c_data;
 	data->hdr.flags = NVME_TCP_F_DATA_LAST;
@@ -622,7 +601,6 @@
 	data->command_id = nvme_cid(rq);
 	data->data_offset = pdu->r2t_offset;
 	data->data_length = cpu_to_le32(req->pdu_len);
-	return 0;
 }
 
 static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
@@ -630,7 +608,7 @@
 {
 	struct nvme_tcp_request *req;
 	struct request *rq;
-	int ret;
+	u32 r2t_length = le32_to_cpu(pdu->r2t_length);
 
 	rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
 	if (!rq) {
@@ -641,13 +619,28 @@
 	}
 	req = blk_mq_rq_to_pdu(rq);
 
-	ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
-	if (unlikely(ret))
-		return ret;
+	if (unlikely(!r2t_length)) {
+		dev_err(queue->ctrl->ctrl.device,
+			"req %d r2t len is %u, probably a bug...\n",
+			rq->tag, r2t_length);
+		return -EPROTO;
+	}
 
-	req->state = NVME_TCP_SEND_H2C_PDU;
-	req->offset = 0;
+	if (unlikely(req->data_sent + r2t_length > req->data_len)) {
+		dev_err(queue->ctrl->ctrl.device,
+			"req %d r2t len %u exceeded data len %u (%zu sent)\n",
+			rq->tag, r2t_length, req->data_len, req->data_sent);
+		return -EPROTO;
+	}
 
+	if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
+		dev_err(queue->ctrl->ctrl.device,
+			"req %d unexpected r2t offset %u (expected %zu)\n",
+			rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent);
+		return -EPROTO;
+	}
+
+	nvme_tcp_setup_h2c_data_pdu(req, pdu);
 	nvme_tcp_queue_request(req, false, true);
 
 	return 0;
@@ -1232,6 +1225,7 @@
 
 static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
 {
+	struct page *page;
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
 	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
 
@@ -1241,6 +1235,11 @@
 	if (queue->hdr_digest || queue->data_digest)
 		nvme_tcp_free_crypto(queue);
 
+	if (queue->pf_cache.va) {
+		page = virt_to_head_page(queue->pf_cache.va);
+		__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
+		queue->pf_cache.va = NULL;
+	}
 	sock_release(queue->sock);
 	kfree(queue->pdu);
 	mutex_destroy(&queue->send_mutex);
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 6aa30f3..6be6e59 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -8,6 +8,7 @@
 #include <linux/uio.h>
 #include <linux/falloc.h>
 #include <linux/file.h>
+#include <linux/fs.h>
 #include "nvmet.h"
 
 #define NVMET_MAX_MPOOL_BVEC		16
@@ -266,7 +267,8 @@
 
 	if (req->ns->buffered_io) {
 		if (likely(!req->f.mpool_alloc) &&
-				nvmet_file_execute_io(req, IOCB_NOWAIT))
+		    (req->ns->file->f_mode & FMODE_NOWAIT) &&
+		    nvmet_file_execute_io(req, IOCB_NOWAIT))
 			return;
 		nvmet_file_submit_buffered_io(req);
 	} else
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 84c387e..cb6a473 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -166,6 +166,8 @@
 static const struct nvmet_fabrics_ops nvmet_tcp_ops;
 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
 static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
+static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
+static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd);
 
 static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
 		struct nvmet_tcp_cmd *cmd)
@@ -297,6 +299,16 @@
 	return 0;
 }
 
+static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
+{
+	WARN_ON(unlikely(cmd->nr_mapped > 0));
+
+	kfree(cmd->iov);
+	sgl_free(cmd->req.sg);
+	cmd->iov = NULL;
+	cmd->req.sg = NULL;
+}
+
 static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
 {
 	struct scatterlist *sg;
@@ -306,6 +318,8 @@
 
 	for (i = 0; i < cmd->nr_mapped; i++)
 		kunmap(sg_page(&sg[i]));
+
+	cmd->nr_mapped = 0;
 }
 
 static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
@@ -387,7 +401,7 @@
 
 	return 0;
 err:
-	sgl_free(cmd->req.sg);
+	nvmet_tcp_free_cmd_buffers(cmd);
 	return NVME_SC_INTERNAL;
 }
 
@@ -632,10 +646,8 @@
 		}
 	}
 
-	if (queue->nvme_sq.sqhd_disabled) {
-		kfree(cmd->iov);
-		sgl_free(cmd->req.sg);
-	}
+	if (queue->nvme_sq.sqhd_disabled)
+		nvmet_tcp_free_cmd_buffers(cmd);
 
 	return 1;
 
@@ -664,8 +676,7 @@
 	if (left)
 		return -EAGAIN;
 
-	kfree(cmd->iov);
-	sgl_free(cmd->req.sg);
+	nvmet_tcp_free_cmd_buffers(cmd);
 	cmd->queue->snd_cmd = NULL;
 	nvmet_tcp_put_cmd(cmd);
 	return 1;
@@ -700,10 +711,11 @@
 static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
 {
 	struct nvmet_tcp_queue *queue = cmd->queue;
+	int left = NVME_TCP_DIGEST_LENGTH - cmd->offset;
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
 	struct kvec iov = {
 		.iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
-		.iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
+		.iov_len = left
 	};
 	int ret;
 
@@ -717,6 +729,10 @@
 		return ret;
 
 	cmd->offset += ret;
+	left -= ret;
+
+	if (left)
+		return -EAGAIN;
 
 	if (queue->nvme_sq.sqhd_disabled) {
 		cmd->queue->snd_cmd = NULL;
@@ -1406,8 +1422,7 @@
 {
 	nvmet_req_uninit(&cmd->req);
 	nvmet_tcp_unmap_pdu_iovec(cmd);
-	kfree(cmd->iov);
-	sgl_free(cmd->req.sg);
+	nvmet_tcp_free_cmd_buffers(cmd);
 }
 
 static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
@@ -1417,7 +1432,10 @@
 
 	for (i = 0; i < queue->nr_cmds; i++, cmd++) {
 		if (nvmet_tcp_need_data_in(cmd))
-			nvmet_tcp_finish_cmd(cmd);
+			nvmet_req_uninit(&cmd->req);
+
+		nvmet_tcp_unmap_pdu_iovec(cmd);
+		nvmet_tcp_free_cmd_buffers(cmd);
 	}
 
 	if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
@@ -1437,7 +1455,9 @@
 	mutex_unlock(&nvmet_tcp_queue_mutex);
 
 	nvmet_tcp_restore_socket_callbacks(queue);
-	flush_work(&queue->io_work);
+	cancel_work_sync(&queue->io_work);
+	/* stop accepting incoming data */
+	queue->rcv_state = NVMET_TCP_RECV_ERR;
 
 	nvmet_tcp_uninit_data_in_cmds(queue);
 	nvmet_sq_destroy(&queue->nvme_sq);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 32be5a0..b10f015b 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -161,9 +161,10 @@
 		 * if it is then we are done, unless there is an
 		 * interrupt-map which takes precedence.
 		 */
+		bool intc = of_property_read_bool(ipar, "interrupt-controller");
+
 		imap = of_get_property(ipar, "interrupt-map", &imaplen);
-		if (imap == NULL &&
-		    of_property_read_bool(ipar, "interrupt-controller")) {
+		if (imap == NULL && intc) {
 			pr_debug(" -> got it !\n");
 			return 0;
 		}
@@ -244,8 +245,20 @@
 
 			pr_debug(" -> imaplen=%d\n", imaplen);
 		}
-		if (!match)
+		if (!match) {
+			if (intc) {
+				/*
+				 * The PASEMI Nemo is a known offender, so
+				 * let's only warn for anyone else.
+				 */
+				WARN(!IS_ENABLED(CONFIG_PPC_PASEMI),
+				     "%pOF interrupt-map failed, using interrupt-controller\n",
+				     ipar);
+				return 0;
+			}
+
 			goto fail;
+		}
 
 		/*
 		 * Successfully parsed an interrrupt-map translation; copy new
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 07813fb..b3faf89 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -76,6 +76,7 @@
 	struct device_node *node = dev->of_node;
 	const __be32 *reg;
 	u64 addr;
+	u32 mask;
 
 	/* Construct the name, using parent nodes if necessary to ensure uniqueness */
 	while (node->parent) {
@@ -85,8 +86,13 @@
 		 */
 		reg = of_get_property(node, "reg", NULL);
 		if (reg && (addr = of_translate_address(node, reg)) != OF_BAD_ADDR) {
-			dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
-				     addr, node, dev_name(dev));
+			if (!of_property_read_u32(node, "mask", &mask))
+				dev_set_name(dev, dev_name(dev) ? "%llx.%x.%pOFn:%s" : "%llx.%x.%pOFn",
+					     addr, ffs(mask) - 1, node, dev_name(dev));
+
+			else
+				dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
+					     addr, node, dev_name(dev));
 			return;
 		}
 
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index e917bb3..93b1411 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -270,7 +270,8 @@
 
 config PCIE_BRCMSTB
 	tristate "Broadcom Brcmstb PCIe host controller"
-	depends on ARCH_BRCMSTB || ARCH_BCM2835 || ARCH_BCM4908 || COMPILE_TEST
+	depends on ARCH_BRCMSTB || ARCH_BCM2835 || ARCH_BCM4908 || \
+		   BMIPS_GENERIC || COMPILE_TEST
 	depends on OF
 	depends on PCI_MSI_IRQ_DOMAIN
 	default ARCH_BRCMSTB
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index dcefdb4..a89b7de 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -57,6 +57,29 @@
 	return zpci_deconfigure_device(zdev);
 }
 
+static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
+{
+	struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
+					     hotplug_slot);
+
+	if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+		return -EIO;
+	/*
+	 * We can't take the zdev->lock as reset_slot may be called during
+	 * probing and/or device removal which already happens under the
+	 * zdev->lock. Instead the user should use the higher level
+	 * pci_reset_function() or pci_bus_reset() which hold the PCI device
+	 * lock preventing concurrent removal. If not using these functions
+	 * holding the PCI device lock is required.
+	 */
+
+	/* As long as the function is configured we can reset */
+	if (probe)
+		return 0;
+
+	return zpci_hot_reset_device(zdev);
+}
+
 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
@@ -76,6 +99,7 @@
 static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
 	.enable_slot =		enable_slot,
 	.disable_slot =		disable_slot,
+	.reset_slot =		reset_slot,
 	.get_power_status =	get_power_status,
 	.get_adapter_status =	get_adapter_status,
 };
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 12e296d..48e3f4e 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -148,6 +148,9 @@
 	raw_spinlock_t *lock = &desc->dev->msi_lock;
 	unsigned long flags;
 
+	if (!desc->msi_attrib.can_mask)
+		return;
+
 	raw_spin_lock_irqsave(lock, flags);
 	desc->msi_mask &= ~clear;
 	desc->msi_mask |= set;
@@ -181,7 +184,8 @@
 {
 	void __iomem *desc_addr = pci_msix_desc_addr(desc);
 
-	writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+	if (desc->msi_attrib.can_mask)
+		writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
 }
 
 static inline void pci_msix_mask(struct msi_desc *desc)
@@ -200,23 +204,17 @@
 
 static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
 {
-	if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
-		return;
-
 	if (desc->msi_attrib.is_msix)
 		pci_msix_mask(desc);
-	else if (desc->msi_attrib.maskbit)
+	else
 		pci_msi_mask(desc, mask);
 }
 
 static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
 {
-	if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
-		return;
-
 	if (desc->msi_attrib.is_msix)
 		pci_msix_unmask(desc);
-	else if (desc->msi_attrib.maskbit)
+	else
 		pci_msi_unmask(desc, mask);
 }
 
@@ -370,6 +368,11 @@
 			for (i = 0; i < entry->nvec_used; i++)
 				BUG_ON(irq_has_action(entry->irq + i));
 
+	if (dev->msi_irq_groups) {
+		msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
+		dev->msi_irq_groups = NULL;
+	}
+
 	pci_msi_teardown_msi_irqs(dev);
 
 	list_for_each_entry_safe(entry, tmp, msi_list, list) {
@@ -381,11 +384,6 @@
 		list_del(&entry->list);
 		free_msi_entry(entry);
 	}
-
-	if (dev->msi_irq_groups) {
-		msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
-		dev->msi_irq_groups = NULL;
-	}
 }
 
 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
@@ -479,12 +477,16 @@
 		goto out;
 
 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
+	/* Lies, damned lies, and MSIs */
+	if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
+		control |= PCI_MSI_FLAGS_MASKBIT;
 
 	entry->msi_attrib.is_msix	= 0;
 	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
 	entry->msi_attrib.is_virtual    = 0;
 	entry->msi_attrib.entry_nr	= 0;
-	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
+	entry->msi_attrib.can_mask	= !pci_msi_ignore_mask &&
+					  !!(control & PCI_MSI_FLAGS_MASKBIT);
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
@@ -495,7 +497,7 @@
 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
 
 	/* Save the initial mask status */
-	if (entry->msi_attrib.maskbit)
+	if (entry->msi_attrib.can_mask)
 		pci_read_config_dword(dev, entry->mask_pos, &entry->msi_mask);
 
 out:
@@ -639,10 +641,13 @@
 		entry->msi_attrib.is_virtual =
 			entry->msi_attrib.entry_nr >= vec_count;
 
+		entry->msi_attrib.can_mask	= !pci_msi_ignore_mask &&
+						  !entry->msi_attrib.is_virtual;
+
 		entry->msi_attrib.default_irq	= dev->irq;
 		entry->mask_base		= base;
 
-		if (!entry->msi_attrib.is_virtual) {
+		if (entry->msi_attrib.can_mask) {
 			addr = pci_msix_desc_addr(entry);
 			entry->msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
 		}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1579a37..3d2fb39 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5106,12 +5106,13 @@
 	return pci_parent_bus_reset(dev, probe);
 }
 
-static void pci_dev_lock(struct pci_dev *dev)
+void pci_dev_lock(struct pci_dev *dev)
 {
 	pci_cfg_access_lock(dev);
 	/* block PM suspend, driver probe, etc. */
 	device_lock(&dev->dev);
 }
+EXPORT_SYMBOL_GPL(pci_dev_lock);
 
 /* Return 1 on successful lock, 0 on contention */
 int pci_dev_trylock(struct pci_dev *dev)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index aedb78c..003950c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5851,3 +5851,9 @@
 			 pci_fixup_pericom_acs_store_forward);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2303,
 			 pci_fixup_pericom_acs_store_forward);
+
+static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
+{
+	pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup);
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index bae9d42..ecab9064 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -598,14 +598,14 @@
 
 #define PIN_IRQ_PENDING	(BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
 
-static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
+static bool do_amd_gpio_irq_handler(int irq, void *dev_id)
 {
 	struct amd_gpio *gpio_dev = dev_id;
 	struct gpio_chip *gc = &gpio_dev->gc;
-	irqreturn_t ret = IRQ_NONE;
 	unsigned int i, irqnr;
 	unsigned long flags;
 	u32 __iomem *regs;
+	bool ret = false;
 	u32  regval;
 	u64 status, mask;
 
@@ -627,6 +627,14 @@
 		/* Each status bit covers four pins */
 		for (i = 0; i < 4; i++) {
 			regval = readl(regs + i);
+			/* caused wake on resume context for shared IRQ */
+			if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) {
+				dev_dbg(&gpio_dev->pdev->dev,
+					"Waking due to GPIO %d: 0x%x",
+					irqnr + i, regval);
+				return true;
+			}
+
 			if (!(regval & PIN_IRQ_PENDING) ||
 			    !(regval & BIT(INTERRUPT_MASK_OFF)))
 				continue;
@@ -650,9 +658,12 @@
 			}
 			writel(regval, regs + i);
 			raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
-			ret = IRQ_HANDLED;
+			ret = true;
 		}
 	}
+	/* did not cause wake on resume context for shared IRQ */
+	if (irq < 0)
+		return false;
 
 	/* Signal EOI to the GPIO unit */
 	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
@@ -664,6 +675,16 @@
 	return ret;
 }
 
+static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
+{
+	return IRQ_RETVAL(do_amd_gpio_irq_handler(irq, dev_id));
+}
+
+static bool __maybe_unused amd_gpio_check_wake(void *dev_id)
+{
+	return do_amd_gpio_irq_handler(-1, dev_id);
+}
+
 static int amd_get_groups_count(struct pinctrl_dev *pctldev)
 {
 	struct amd_gpio *gpio_dev = pinctrl_dev_get_drvdata(pctldev);
@@ -1033,6 +1054,7 @@
 		goto out2;
 
 	platform_set_drvdata(pdev, gpio_dev);
+	acpi_register_wakeup_handler(gpio_dev->irq, amd_gpio_check_wake, gpio_dev);
 
 	dev_dbg(&pdev->dev, "amd gpio driver loaded\n");
 	return ret;
@@ -1050,6 +1072,7 @@
 	gpio_dev = platform_get_drvdata(pdev);
 
 	gpiochip_remove(&gpio_dev->gc);
+	acpi_unregister_wakeup_handler(amd_gpio_check_wake, gpio_dev);
 
 	return 0;
 }
diff --git a/drivers/pinctrl/pinctrl-apple-gpio.c b/drivers/pinctrl/pinctrl-apple-gpio.c
index 0cc346b..a786107 100644
--- a/drivers/pinctrl/pinctrl-apple-gpio.c
+++ b/drivers/pinctrl/pinctrl-apple-gpio.c
@@ -258,7 +258,7 @@
 	       pctl->base + REG_IRQ(irqgrp, data->hwirq));
 }
 
-static int apple_gpio_irq_type(unsigned int type)
+static unsigned int apple_gpio_irq_type(unsigned int type)
 {
 	switch (type & IRQ_TYPE_SENSE_MASK) {
 	case IRQ_TYPE_EDGE_RISING:
@@ -272,7 +272,7 @@
 	case IRQ_TYPE_LEVEL_LOW:
 		return REG_GPIOx_IN_IRQ_LO;
 	default:
-		return -EINVAL;
+		return REG_GPIOx_IN_IRQ_OFF;
 	}
 }
 
@@ -288,7 +288,7 @@
 {
 	struct apple_gpio_pinctrl *pctl =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
-	int irqtype = apple_gpio_irq_type(irqd_get_trigger_type(data));
+	unsigned int irqtype = apple_gpio_irq_type(irqd_get_trigger_type(data));
 
 	apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE,
 			   FIELD_PREP(REG_GPIOx_MODE, irqtype));
@@ -313,10 +313,10 @@
 {
 	struct apple_gpio_pinctrl *pctl =
 		gpiochip_get_data(irq_data_get_irq_chip_data(data));
-	int irqtype = apple_gpio_irq_type(type);
+	unsigned int irqtype = apple_gpio_irq_type(type);
 
-	if (irqtype < 0)
-		return irqtype;
+	if (irqtype == REG_GPIOx_IN_IRQ_OFF)
+		return -EINVAL;
 
 	apple_gpio_set_reg(pctl, data->hwirq, REG_GPIOx_MODE,
 			   FIELD_PREP(REG_GPIOx_MODE, irqtype));
diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
index b9191f1..3e0c007 100644
--- a/drivers/pinctrl/qcom/Kconfig
+++ b/drivers/pinctrl/qcom/Kconfig
@@ -197,6 +197,7 @@
 	select PINMUX
 	select PINCONF
 	select GENERIC_PINCONF
+  select GPIOLIB
 	select GPIOLIB_IRQCHIP
 	select IRQ_DOMAIN_HIERARCHY
 	help
@@ -211,6 +212,7 @@
 	select PINMUX
 	select PINCONF
 	select GENERIC_PINCONF
+  select GPIOLIB
 	select GPIOLIB_IRQCHIP
 	select IRQ_DOMAIN_HIERARCHY
 	help
diff --git a/drivers/pinctrl/qcom/pinctrl-sdm845.c b/drivers/pinctrl/qcom/pinctrl-sdm845.c
index c51793f..fdfd7b8 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdm845.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdm845.c
@@ -1310,6 +1310,7 @@
 	.ngpios = 151,
 	.wakeirq_map = sdm845_pdc_map,
 	.nwakeirq_map = ARRAY_SIZE(sdm845_pdc_map),
+	.wakeirq_dual_edge_errata = true,
 };
 
 static const struct msm_pinctrl_soc_data sdm845_acpi_pinctrl = {
diff --git a/drivers/pinctrl/qcom/pinctrl-sm8350.c b/drivers/pinctrl/qcom/pinctrl-sm8350.c
index 4d8f863..1c042d3 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8350.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8350.c
@@ -1597,10 +1597,10 @@
 	[200] = PINGROUP(200, qdss_gpio, _, _, _, _, _, _, _, _),
 	[201] = PINGROUP(201, _, _, _, _, _, _, _, _, _),
 	[202] = PINGROUP(202, _, _, _, _, _, _, _, _, _),
-	[203] = UFS_RESET(ufs_reset, 0x1d8000),
-	[204] = SDC_PINGROUP(sdc2_clk, 0x1cf000, 14, 6),
-	[205] = SDC_PINGROUP(sdc2_cmd, 0x1cf000, 11, 3),
-	[206] = SDC_PINGROUP(sdc2_data, 0x1cf000, 9, 0),
+	[203] = UFS_RESET(ufs_reset, 0xd8000),
+	[204] = SDC_PINGROUP(sdc2_clk, 0xcf000, 14, 6),
+	[205] = SDC_PINGROUP(sdc2_cmd, 0xcf000, 11, 3),
+	[206] = SDC_PINGROUP(sdc2_data, 0xcf000, 9, 0),
 };
 
 static const struct msm_gpio_wakeirq_map sm8350_pdc_map[] = {
diff --git a/drivers/pinctrl/ralink/pinctrl-mt7620.c b/drivers/pinctrl/ralink/pinctrl-mt7620.c
index 425d55a..6853b5b 100644
--- a/drivers/pinctrl/ralink/pinctrl-mt7620.c
+++ b/drivers/pinctrl/ralink/pinctrl-mt7620.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <asm/mach-ralink/ralink_regs.h>
 #include <asm/mach-ralink/mt7620.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra.c b/drivers/pinctrl/tegra/pinctrl-tegra.c
index 8d734bfc..50bd26a 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra.c
@@ -275,7 +275,7 @@
 	return 0;
 }
 
-static struct tegra_pingroup *tegra_pinctrl_get_group(struct pinctrl_dev *pctldev,
+static const struct tegra_pingroup *tegra_pinctrl_get_group(struct pinctrl_dev *pctldev,
 					unsigned int offset)
 {
 	struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
@@ -289,7 +289,7 @@
 			continue;
 		for (j = 0; j < num_pins; j++) {
 			if (offset == pins[j])
-				return (struct tegra_pingroup *)&pmx->soc->groups[group];
+				return &pmx->soc->groups[group];
 		}
 	}
 
diff --git a/drivers/pinctrl/tegra/pinctrl-tegra194.c b/drivers/pinctrl/tegra/pinctrl-tegra194.c
index b4fef91..5c1dfcb 100644
--- a/drivers/pinctrl/tegra/pinctrl-tegra194.c
+++ b/drivers/pinctrl/tegra/pinctrl-tegra194.c
@@ -1387,7 +1387,6 @@
 		.schmitt_bit = schmitt_b,			\
 		.drvtype_bit = 13,				\
 		.lpdr_bit = e_lpdr,				\
-		.drv_reg = -1,					\
 
 #define drive_touch_clk_pcc4            DRV_PINGROUP_ENTRY_Y(0x2004,	12,	5,	20,	5,	-1,	-1,	-1,	-1,	1)
 #define drive_uart3_rx_pcc6             DRV_PINGROUP_ENTRY_Y(0x200c,	12,	5,	20,	5,	-1,	-1,	-1,	-1,	1)
diff --git a/drivers/platform/chrome/cros_ec_ishtp.c b/drivers/platform/chrome/cros_ec_ishtp.c
index 9d1e7e03..4020b83 100644
--- a/drivers/platform/chrome/cros_ec_ishtp.c
+++ b/drivers/platform/chrome/cros_ec_ishtp.c
@@ -41,9 +41,12 @@
 #define ISHTP_SEND_TIMEOUT			(3 * HZ)
 
 /* ISH Transport CrOS EC ISH client unique GUID */
-static const guid_t cros_ish_guid =
-	GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc,
-		  0xb0, 0xd8, 0x9e, 0x7c, 0xda,	0xe0, 0xd6, 0xa0);
+static const struct ishtp_device_id cros_ec_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc,
+		  0xb0, 0xd8, 0x9e, 0x7c, 0xda,	0xe0, 0xd6, 0xa0), },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, cros_ec_ishtp_id_table);
 
 struct header {
 	u8 channel;
@@ -389,7 +392,7 @@
 	ishtp_set_tx_ring_size(cros_ish_cl, CROS_ISH_CL_TX_RING_SIZE);
 	ishtp_set_rx_ring_size(cros_ish_cl, CROS_ISH_CL_RX_RING_SIZE);
 
-	fw_client = ishtp_fw_cl_get_client(dev, &cros_ish_guid);
+	fw_client = ishtp_fw_cl_get_client(dev, &cros_ec_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(client_data),
 			"ish client uuid not found\n");
@@ -765,7 +768,7 @@
 
 static struct ishtp_cl_driver	cros_ec_ishtp_driver = {
 	.name = "cros_ec_ishtp",
-	.guid = &cros_ish_guid,
+	.id = cros_ec_ishtp_id_table,
 	.probe = cros_ec_ishtp_probe,
 	.remove = cros_ec_ishtp_remove,
 	.reset = cros_ec_ishtp_reset,
@@ -791,4 +794,3 @@
 MODULE_AUTHOR("Rushikesh S Kadam <rushikesh.s.kadam@intel.com>");
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("ishtp:*");
diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c
index 0b7f58f..c897a2f 100644
--- a/drivers/platform/mellanox/mlxreg-lc.c
+++ b/drivers/platform/mellanox/mlxreg-lc.c
@@ -413,7 +413,7 @@
 				int size)
 {
 	struct mlxreg_hotplug_device *dev = devs;
-	int i;
+	int i, ret;
 
 	/* Create static I2C device feeding by auxiliary or main power. */
 	for (i = 0; i < size; i++, dev++) {
@@ -423,6 +423,7 @@
 				dev->brdinfo->type, dev->nr, dev->brdinfo->addr);
 
 			dev->adapter = NULL;
+			ret = PTR_ERR(dev->client);
 			goto fail_create_static_devices;
 		}
 	}
@@ -435,7 +436,7 @@
 		i2c_unregister_device(dev->client);
 		dev->client = NULL;
 	}
-	return IS_ERR(dev->client);
+	return ret;
 }
 
 static void
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index d4c079f..7400bc5 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -185,7 +185,7 @@
 
 config AMD_PMC
 	tristate "AMD SoC PMC driver"
-	depends on ACPI && PCI
+	depends on ACPI && PCI && RTC_CLASS
 	help
 	  The driver provides support for AMD Power Management Controller
 	  primarily responsible for S2Idle transactions that are driven from
diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig
index 2fffa57..fe224a5 100644
--- a/drivers/platform/x86/dell/Kconfig
+++ b/drivers/platform/x86/dell/Kconfig
@@ -187,7 +187,7 @@
 
 config DELL_WMI_DESCRIPTOR
 	tristate
-	default m
+	default n
 	depends on ACPI_WMI
 
 config DELL_WMI_LED
diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
index b183967..435a91f 100644
--- a/drivers/platform/x86/hp_accel.c
+++ b/drivers/platform/x86/hp_accel.c
@@ -331,9 +331,11 @@
 	INIT_WORK(&hpled_led.work, delayed_set_status_worker);
 	ret = led_classdev_register(NULL, &hpled_led.led_classdev);
 	if (ret) {
+		i8042_remove_filter(hp_accel_i8042_filter);
 		lis3lv02d_joystick_disable(&lis3_dev);
 		lis3lv02d_poweroff(&lis3_dev);
 		flush_work(&hpled_led.work);
+		lis3lv02d_remove_fs(&lis3_dev);
 		return ret;
 	}
 
diff --git a/drivers/platform/x86/intel/ishtp_eclite.c b/drivers/platform/x86/intel/ishtp_eclite.c
index 12fc98a..93ac8b2 100644
--- a/drivers/platform/x86/intel/ishtp_eclite.c
+++ b/drivers/platform/x86/intel/ishtp_eclite.c
@@ -93,9 +93,12 @@
 };
 
 /* eclite ishtp client UUID: 6a19cc4b-d760-4de3-b14d-f25ebd0fbcd9 */
-static const guid_t ecl_ishtp_guid =
-	GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
-		  0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9);
+static const struct ishtp_device_id ecl_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
+		  0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9), },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, ecl_ishtp_id_table);
 
 /* ACPI DSM UUID: 91d936a7-1f01-49c6-a6b4-72f00ad8d8a5 */
 static const guid_t ecl_acpi_guid =
@@ -462,7 +465,7 @@
 	ishtp_set_tx_ring_size(ecl_ishtp_cl, ECL_CL_TX_RING_SIZE);
 	ishtp_set_rx_ring_size(ecl_ishtp_cl, ECL_CL_RX_RING_SIZE);
 
-	fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_guid);
+	fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(opr_dev), "fw client not found\n");
 		return -ENOENT;
@@ -674,7 +677,7 @@
 
 static struct ishtp_cl_driver ecl_ishtp_cl_driver = {
 	.name = "ishtp-eclite",
-	.guid = &ecl_ishtp_guid,
+	.id = ecl_ishtp_id_table,
 	.probe = ecl_ishtp_cl_probe,
 	.remove = ecl_ishtp_cl_remove,
 	.reset = ecl_ishtp_cl_reset,
@@ -698,4 +701,3 @@
 MODULE_AUTHOR("K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>");
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("ishtp:*");
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index 7ee010a..c1d9ed9 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -152,7 +152,7 @@
 
 static const struct sabi_config sabi_configs[] = {
 	{
-		/* I don't know if it is really 2, but it it is
+		/* I don't know if it is really 2, but it is
 		 * less than 3 anyway */
 		.sabi_version = 2,
 
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 9472aae..c4d9c45 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -888,8 +888,10 @@
 			break;
 		if (!item)
 			break;
-		if (!*item)
+		if (!*item) {
+			kfree(item);
 			continue;
+		}
 
 		/* It is not allowed to have '/' for file name. Convert it into '\'. */
 		strreplace(item, '/', '\\');
@@ -902,6 +904,7 @@
 		setting = kzalloc(sizeof(*setting), GFP_KERNEL);
 		if (!setting) {
 			ret = -ENOMEM;
+			kfree(item);
 			goto fail_clear_attr;
 		}
 		setting->index = i;
@@ -916,7 +919,6 @@
 		}
 		kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
 		tlmi_priv.setting[i] = setting;
-		tlmi_priv.settings_count++;
 		kfree(item);
 	}
 
@@ -983,7 +985,12 @@
 
 static int tlmi_probe(struct wmi_device *wdev, const void *context)
 {
-	tlmi_analyze();
+	int ret;
+
+	ret = tlmi_analyze();
+	if (ret)
+		return ret;
+
 	return tlmi_sysfs_init();
 }
 
diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h
index f8e2682..2ce5086 100644
--- a/drivers/platform/x86/think-lmi.h
+++ b/drivers/platform/x86/think-lmi.h
@@ -55,7 +55,6 @@
 struct think_lmi {
 	struct wmi_device *wmi_device;
 
-	int settings_count;
 	bool can_set_bios_settings;
 	bool can_get_bios_selections;
 	bool can_set_bios_password;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 9c632df..b3ac9c3 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1105,15 +1105,6 @@
 	return status;
 }
 
-/* Query FW and update rfkill sw state for all rfkill switches */
-static void tpacpi_rfk_update_swstate_all(void)
-{
-	unsigned int i;
-
-	for (i = 0; i < TPACPI_RFK_SW_MAX; i++)
-		tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[i]);
-}
-
 /*
  * Sync the HW-blocking state of all rfkill switches,
  * do notice it causes the rfkill core to schedule uevents
@@ -3074,9 +3065,6 @@
 	if (wlsw == TPACPI_RFK_RADIO_OFF)
 		tpacpi_rfk_update_hwblock_state(true);
 
-	/* Sync sw blocking state */
-	tpacpi_rfk_update_swstate_all();
-
 	/* Sync hw blocking state last if it is hw-unblocked */
 	if (wlsw == TPACPI_RFK_RADIO_ON)
 		tpacpi_rfk_update_hwblock_state(false);
@@ -8766,6 +8754,7 @@
 	TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (1st gen) */
 	TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (2nd gen) */
 	TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (3nd gen) */
+	TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (4nd gen) */
 	TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL),	/* P15 (1st gen) / P15v (1st gen) */
 	TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL),	/* X1 Carbon (9th gen) */
 };
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 44faa3a..b740866 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -166,16 +166,13 @@
 
 static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 {
-	struct em_perf_domain *pd;
 	struct dtpm_cpu *dtpm_cpu;
 
-	pd = em_cpu_get(cpu);
-	if (!pd)
-		return -EINVAL;
-
 	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+	if (dtpm_cpu)
+		dtpm_update_power(&dtpm_cpu->dtpm);
 
-	return dtpm_update_power(&dtpm_cpu->dtpm);
+	return 0;
 }
 
 static int cpuhp_dtpm_cpu_online(unsigned int cpu)
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index 6bc5791..08e429a 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -1699,12 +1699,9 @@
 
 /* PTP Hardware Clock interface */
 
-/**
+/*
  * Maximum absolute value for write phase offset in picoseconds
  *
- * @channel:  channel
- * @delta_ns: delta in nanoseconds
- *
  * Destination signed register is 32-bit register in resolution of 50ps
  *
  * 0x7fffffff * 50 =  2147483647 * 50 = 107374182350
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 34f943c..0f1b5a7 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -1304,10 +1304,11 @@
 	if (!ext)
 		return -ENOMEM;
 
-	err = -EINVAL;
 	ext->mem = ptp_ocp_get_mem(bp, r);
-	if (!ext->mem)
+	if (IS_ERR(ext->mem)) {
+		err = PTR_ERR(ext->mem);
 		goto out;
+	}
 
 	ext->bp = bp;
 	ext->info = r->extra;
@@ -1371,8 +1372,8 @@
 	void __iomem *mem;
 
 	mem = ptp_ocp_get_mem(bp, r);
-	if (!mem)
-		return -EINVAL;
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
 
 	bp_assign_entry(bp, r, mem);
 
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index aa29841..21e3b05 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -476,7 +476,9 @@
 	depends on PLAT_SAMSUNG || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
 	depends on HAS_IOMEM
 	help
-	  Generic PWM framework driver for Samsung.
+	  Generic PWM framework driver for Samsung S3C24xx, S3C64xx, S5Pv210
+	  and Exynos SoCs.
+	  Choose Y here only if you build for such Samsung SoC.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-samsung.
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 4527f09..fb04a43 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -532,6 +532,15 @@
 	struct pwm_chip *chip;
 	int err;
 
+	/*
+	 * Some lowlevel driver's implementations of .apply() make use of
+	 * mutexes, also with some drivers only returning when the new
+	 * configuration is active calling pwm_apply_state() from atomic context
+	 * is a bad idea. So make it explicit that calling this function might
+	 * sleep.
+	 */
+	might_sleep();
+
 	if (!pwm || !state || !state->period ||
 	    state->duty_cycle > state->period)
 		return -EINVAL;
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index e748604..98b34ea 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -24,7 +24,6 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c
index dd94c431..0a4ff55 100644
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c
@@ -117,6 +117,20 @@
 	return (channel == 0) ? 0 : (channel + 1);
 }
 
+static void __pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
+				      struct pwm_device *pwm)
+{
+	unsigned int tcon_chan = to_tcon_channel(pwm->hwpwm);
+	u32 tcon;
+
+	tcon = readl(chip->base + REG_TCON);
+	tcon |= TCON_MANUALUPDATE(tcon_chan);
+	writel(tcon, chip->base + REG_TCON);
+
+	tcon &= ~TCON_MANUALUPDATE(tcon_chan);
+	writel(tcon, chip->base + REG_TCON);
+}
+
 static void pwm_samsung_set_divisor(struct samsung_pwm_chip *pwm,
 				    unsigned int channel, u8 divisor)
 {
@@ -276,6 +290,13 @@
 	tcon &= ~TCON_AUTORELOAD(tcon_chan);
 	writel(tcon, our_chip->base + REG_TCON);
 
+	/*
+	 * In case the PWM is at 100% duty cycle, force a manual
+	 * update to prevent the signal from staying high.
+	 */
+	if (readl(our_chip->base + REG_TCMPB(pwm->hwpwm)) == (u32)-1U)
+		__pwm_samsung_manual_update(our_chip, pwm);
+
 	our_chip->disabled_mask |= BIT(pwm->hwpwm);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
@@ -284,18 +305,11 @@
 static void pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
 				      struct pwm_device *pwm)
 {
-	unsigned int tcon_chan = to_tcon_channel(pwm->hwpwm);
-	u32 tcon;
 	unsigned long flags;
 
 	spin_lock_irqsave(&samsung_pwm_lock, flags);
 
-	tcon = readl(chip->base + REG_TCON);
-	tcon |= TCON_MANUALUPDATE(tcon_chan);
-	writel(tcon, chip->base + REG_TCON);
-
-	tcon &= ~TCON_MANUALUPDATE(tcon_chan);
-	writel(tcon, chip->base + REG_TCON);
+	__pwm_samsung_manual_update(chip, pwm);
 
 	spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
diff --git a/drivers/pwm/pwm-visconti.c b/drivers/pwm/pwm-visconti.c
index af4e37d..927c4cb 100644
--- a/drivers/pwm/pwm-visconti.c
+++ b/drivers/pwm/pwm-visconti.c
@@ -144,28 +144,17 @@
 	if (IS_ERR(priv->base))
 		return PTR_ERR(priv->base);
 
-	platform_set_drvdata(pdev, priv);
-
 	priv->chip.dev = dev;
 	priv->chip.ops = &visconti_pwm_ops;
 	priv->chip.npwm = 4;
 
-	ret = pwmchip_add(&priv->chip);
+	ret = devm_pwmchip_add(&pdev->dev, &priv->chip);
 	if (ret < 0)
 		return dev_err_probe(&pdev->dev, ret, "Cannot register visconti PWM\n");
 
 	return 0;
 }
 
-static int visconti_pwm_remove(struct platform_device *pdev)
-{
-	struct visconti_pwm_chip *priv = platform_get_drvdata(pdev);
-
-	pwmchip_remove(&priv->chip);
-
-	return 0;
-}
-
 static const struct of_device_id visconti_pwm_of_match[] = {
 	{ .compatible = "toshiba,visconti-pwm", },
 	{ }
@@ -178,7 +167,6 @@
 		.of_match_table = visconti_pwm_of_match,
 	},
 	.probe = visconti_pwm_probe,
-	.remove = visconti_pwm_remove,
 };
 module_platform_driver(visconti_pwm_driver);
 
diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index ea2aa15..480bfc2 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c
@@ -56,7 +56,7 @@
 #define to_vt8500_chip(chip)	container_of(chip, struct vt8500_chip, chip)
 
 #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
-static inline void pwm_busy_wait(struct vt8500_chip *vt8500, int nr, u8 bitmask)
+static inline void vt8500_pwm_busy_wait(struct vt8500_chip *vt8500, int nr, u8 bitmask)
 {
 	int loops = msecs_to_loops(10);
 	u32 mask = bitmask << (nr << 8);
@@ -106,18 +106,18 @@
 	dc = c;
 
 	writel(prescale, vt8500->base + REG_SCALAR(pwm->hwpwm));
-	pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_SCALAR_UPDATE);
+	vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_SCALAR_UPDATE);
 
 	writel(pv, vt8500->base + REG_PERIOD(pwm->hwpwm));
-	pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_PERIOD_UPDATE);
+	vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_PERIOD_UPDATE);
 
 	writel(dc, vt8500->base + REG_DUTY(pwm->hwpwm));
-	pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_DUTY_UPDATE);
+	vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_DUTY_UPDATE);
 
 	val = readl(vt8500->base + REG_CTRL(pwm->hwpwm));
 	val |= CTRL_AUTOLOAD;
 	writel(val, vt8500->base + REG_CTRL(pwm->hwpwm));
-	pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
+	vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
 
 	clk_disable(vt8500->clk);
 	return 0;
@@ -138,7 +138,7 @@
 	val = readl(vt8500->base + REG_CTRL(pwm->hwpwm));
 	val |= CTRL_ENABLE;
 	writel(val, vt8500->base + REG_CTRL(pwm->hwpwm));
-	pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
+	vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
 
 	return 0;
 }
@@ -151,7 +151,7 @@
 	val = readl(vt8500->base + REG_CTRL(pwm->hwpwm));
 	val &= ~CTRL_ENABLE;
 	writel(val, vt8500->base + REG_CTRL(pwm->hwpwm));
-	pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
+	vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
 
 	clk_disable(vt8500->clk);
 }
@@ -171,7 +171,7 @@
 		val &= ~CTRL_INVERT;
 
 	writel(val, vt8500->base + REG_CTRL(pwm->hwpwm));
-	pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
+	vt8500_pwm_busy_wait(vt8500, pwm->hwpwm, STATUS_CTRL_UPDATE);
 
 	return 0;
 }
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 7208eeb8..058e56a1 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -441,6 +441,7 @@
 
 config RTC_DRV_PCF8523
 	tristate "NXP PCF8523"
+	select REGMAP_I2C
 	help
 	  If you say yes here you get support for the NXP PCF8523 RTC
 	  chips.
@@ -582,14 +583,6 @@
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-tps65910.
 
-config RTC_DRV_TPS80031
-	tristate "TI TPS80031/TPS80032 RTC driver"
-	depends on MFD_TPS80031
-	help
-	  TI Power Management IC TPS80031 supports RTC functionality
-	  along with alarm. This driver supports the RTC driver for
-	  the TPS80031 RTC module.
-
 config RTC_DRV_RC5T583
 	tristate "RICOH 5T583 RTC driver"
 	depends on MFD_RC5T583
@@ -1929,4 +1922,14 @@
 	  This can also be built as a module. If so, the module will
 	  be named "rtc_wilco_ec".
 
+config RTC_DRV_MSC313
+	tristate "MStar MSC313 RTC"
+        depends on ARCH_MSTARV7 || COMPILE_TEST
+	help
+	  If you say yes here you get support for the Mstar MSC313e On-Chip
+	  Real Time Clock.
+
+	  This driver can also be built as a module, if so, the module
+	  will be called "rtc-msc313".
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 5ceeafe..678a8ef 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -103,6 +103,7 @@
 obj-$(CONFIG_RTC_DRV_MESON)	+= rtc-meson.o
 obj-$(CONFIG_RTC_DRV_MOXART)	+= rtc-moxart.o
 obj-$(CONFIG_RTC_DRV_MPC5121)	+= rtc-mpc5121.o
+obj-$(CONFIG_RTC_DRV_MSC313)	+= rtc-msc313.o
 obj-$(CONFIG_RTC_DRV_MSM6242)	+= rtc-msm6242.o
 obj-$(CONFIG_RTC_DRV_MT2712)	+= rtc-mt2712.o
 obj-$(CONFIG_RTC_DRV_MT6397)	+= rtc-mt6397.o
@@ -169,7 +170,6 @@
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_TPS6586X)	+= rtc-tps6586x.o
 obj-$(CONFIG_RTC_DRV_TPS65910)	+= rtc-tps65910.o
-obj-$(CONFIG_RTC_DRV_TPS80031)	+= rtc-tps80031.o
 obj-$(CONFIG_RTC_DRV_TWL4030)	+= rtc-twl.o
 obj-$(CONFIG_RTC_DRV_V3020)	+= rtc-v3020.o
 obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index f77bc08..4b460c6 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -232,6 +232,7 @@
 	rtc->pie_enabled = 0;
 
 	set_bit(RTC_FEATURE_ALARM, rtc->features);
+	set_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features);
 
 	return rtc;
 }
@@ -334,7 +335,8 @@
 	 * letting any rtc_class_open() users access it again
 	 */
 	rtc_proc_del_device(rtc);
-	cdev_device_del(&rtc->char_dev, &rtc->dev);
+	if (!test_bit(RTC_NO_CDEV, &rtc->flags))
+		cdev_device_del(&rtc->char_dev, &rtc->dev);
 	rtc->ops = NULL;
 	mutex_unlock(&rtc->ops_lock);
 }
@@ -363,7 +365,9 @@
 
 	rtc->id = id;
 	rtc->dev.parent = dev;
-	dev_set_name(&rtc->dev, "rtc%d", id);
+	err = dev_set_name(&rtc->dev, "rtc%d", id);
+	if (err)
+		return ERR_PTR(err);
 
 	err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc);
 	if (err)
@@ -386,6 +390,12 @@
 	if (!rtc->ops->set_alarm)
 		clear_bit(RTC_FEATURE_ALARM, rtc->features);
 
+	if (rtc->uie_unsupported)
+		clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features);
+
+	if (rtc->ops->set_offset)
+		set_bit(RTC_FEATURE_CORRECTION, rtc->features);
+
 	rtc->owner = owner;
 	rtc_device_get_offset(rtc);
 
@@ -397,12 +407,14 @@
 	rtc_dev_prepare(rtc);
 
 	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
-	if (err)
+	if (err) {
+		set_bit(RTC_NO_CDEV, &rtc->flags);
 		dev_warn(rtc->dev.parent, "failed to add char device %d:%d\n",
 			 MAJOR(rtc->dev.devt), rtc->id);
-	else
+	} else {
 		dev_dbg(rtc->dev.parent, "char device (%d:%d)\n",
 			MAJOR(rtc->dev.devt), rtc->id);
+	}
 
 	rtc_proc_add_device(rtc);
 
diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c
index 5b8ebe8..e104972 100644
--- a/drivers/rtc/dev.c
+++ b/drivers/rtc/dev.c
@@ -208,6 +208,7 @@
 	const struct rtc_class_ops *ops = rtc->ops;
 	struct rtc_time tm;
 	struct rtc_wkalrm alarm;
+	struct rtc_param param;
 	void __user *uarg = (void __user *)arg;
 
 	err = mutex_lock_interruptible(&rtc->ops_lock);
@@ -221,6 +222,7 @@
 	switch (cmd) {
 	case RTC_EPOCH_SET:
 	case RTC_SET_TIME:
+	case RTC_PARAM_SET:
 		if (!capable(CAP_SYS_TIME))
 			err = -EACCES;
 		break;
@@ -382,6 +384,69 @@
 			err = -EFAULT;
 		return err;
 
+	case RTC_PARAM_GET:
+		if (copy_from_user(&param, uarg, sizeof(param))) {
+			mutex_unlock(&rtc->ops_lock);
+			return -EFAULT;
+		}
+
+		switch(param.param) {
+			long offset;
+		case RTC_PARAM_FEATURES:
+			if (param.index != 0)
+				err = -EINVAL;
+			param.uvalue = rtc->features[0];
+			break;
+
+		case RTC_PARAM_CORRECTION:
+			mutex_unlock(&rtc->ops_lock);
+			if (param.index != 0)
+				return -EINVAL;
+			err = rtc_read_offset(rtc, &offset);
+			mutex_lock(&rtc->ops_lock);
+			if (err == 0)
+				param.svalue = offset;
+			break;
+
+		default:
+			if (rtc->ops->param_get)
+				err = rtc->ops->param_get(rtc->dev.parent, &param);
+			else
+				err = -EINVAL;
+		}
+
+		if (!err)
+			if (copy_to_user(uarg, &param, sizeof(param)))
+				err = -EFAULT;
+
+		break;
+
+	case RTC_PARAM_SET:
+		if (copy_from_user(&param, uarg, sizeof(param))) {
+			mutex_unlock(&rtc->ops_lock);
+			return -EFAULT;
+		}
+
+		switch(param.param) {
+		case RTC_PARAM_FEATURES:
+			err = -EINVAL;
+			break;
+
+		case RTC_PARAM_CORRECTION:
+			mutex_unlock(&rtc->ops_lock);
+			if (param.index != 0)
+				return -EINVAL;
+			return rtc_set_offset(rtc, param.svalue);
+
+		default:
+			if (rtc->ops->param_set)
+				err = rtc->ops->param_set(rtc->dev.parent, &param);
+			else
+				err = -EINVAL;
+		}
+
+		break;
+
 	default:
 		/* Finally try the driver's ioctl interface */
 		if (ops->ioctl) {
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 9a2bd49..d8e8357 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -423,6 +423,7 @@
 	if (err)
 		return err;
 	now = rtc_tm_to_time64(&tm);
+
 	if (scheduled <= now)
 		return -ETIME;
 	/*
@@ -447,6 +448,7 @@
 
 int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
+	ktime_t alarm_time;
 	int err;
 
 	if (!rtc->ops)
@@ -468,7 +470,15 @@
 	if (rtc->aie_timer.enabled)
 		rtc_timer_remove(rtc, &rtc->aie_timer);
 
-	rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time);
+	alarm_time = rtc_tm_to_ktime(alarm->time);
+	/*
+	 * Round down so we never miss a deadline, checking for past deadline is
+	 * done in __rtc_set_alarm
+	 */
+	if (test_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features))
+		alarm_time = ktime_sub_ns(alarm_time, (u64)alarm->time.tm_sec * NSEC_PER_SEC);
+
+	rtc->aie_timer.node.expires = alarm_time;
 	rtc->aie_timer.period = 0;
 	if (alarm->enabled)
 		err = rtc_timer_enqueue(rtc, &rtc->aie_timer);
@@ -561,7 +571,8 @@
 	if (rtc->uie_rtctimer.enabled == enabled)
 		goto out;
 
-	if (rtc->uie_unsupported || !test_bit(RTC_FEATURE_ALARM, rtc->features)) {
+	if (!test_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features) ||
+	    !test_bit(RTC_FEATURE_ALARM, rtc->features)) {
 		mutex_unlock(&rtc->ops_lock);
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 		return rtc_dev_update_irq_enable_emul(rtc, enabled);
diff --git a/drivers/rtc/rtc-ab-eoz9.c b/drivers/rtc/rtc-ab-eoz9.c
index a9b3555..e188ab5 100644
--- a/drivers/rtc/rtc-ab-eoz9.c
+++ b/drivers/rtc/rtc-ab-eoz9.c
@@ -534,7 +534,6 @@
 	data->rtc->ops = &rtc_ops;
 	data->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
 	data->rtc->range_max = RTC_TIMESTAMP_END_2099;
-	data->rtc->uie_unsupported = 1;
 	clear_bit(RTC_FEATURE_ALARM, data->rtc->features);
 
 	if (client->irq > 0) {
@@ -546,6 +545,8 @@
 			dev_err(dev, "failed to request alarm irq\n");
 			return ret;
 		}
+	} else {
+		clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, data->rtc->features);
 	}
 
 	if (client->irq > 0 || device_property_read_bool(dev, "wakeup-source")) {
diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index b400488..ea33e14 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -184,25 +184,9 @@
 {
 	int retval, i;
 	unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)];
-	unsigned long mins, secs = 0, cursec = 0;
-	struct rtc_time curtm;
+	unsigned long mins;
 
-	/* Get the number of seconds since 1970 */
-	secs = rtc_tm_to_time64(&alarm->time);
-
-	/*
-	 * Check whether alarm is set less than 1min.
-	 * Since our RTC doesn't support alarm resolution less than 1min,
-	 * return -EINVAL, so UIE EMUL can take it up, incase of UIE_ON
-	 */
-	ab8500_rtc_read_time(dev, &curtm); /* Read current time */
-	cursec = rtc_tm_to_time64(&curtm);
-	if ((secs - cursec) < 59) {
-		dev_dbg(dev, "Alarm less than 1 minute not supported\r\n");
-		return -EINVAL;
-	}
-
-	mins = secs / 60;
+	mins = (unsigned long)rtc_tm_to_time64(&alarm->time) / 60;
 
 	buf[2] = mins & 0xFF;
 	buf[1] = (mins >> 8) & 0xFF;
@@ -394,7 +378,8 @@
 	dev_pm_set_wake_irq(&pdev->dev, irq);
 	platform_set_drvdata(pdev, rtc);
 
-	rtc->uie_unsupported = 1;
+	set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features);
+	clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features);
 
 	rtc->range_max = (1ULL << 24) * 60 - 1; // 24-bit minutes + 59 secs
 	rtc->start_secs = RTC_TIMESTAMP_BEGIN_2000;
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index b3de6d2..2f83ade 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -199,11 +199,18 @@
 MODULE_DEVICE_TABLE(of, ds1302_dt_ids);
 #endif
 
+static const struct spi_device_id ds1302_spi_ids[] = {
+	{ .name = "ds1302", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(spi, ds1302_spi_ids);
+
 static struct spi_driver ds1302_driver = {
 	.driver.name	= "rtc-ds1302",
 	.driver.of_match_table = of_match_ptr(ds1302_dt_ids),
 	.probe		= ds1302_probe,
 	.remove		= ds1302_remove,
+	.id_table	= ds1302_spi_ids,
 };
 
 module_spi_driver(ds1302_driver);
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index 66fc861..93ce72b 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -219,12 +219,19 @@
 };
 MODULE_DEVICE_TABLE(of, ds1390_of_match);
 
+static const struct spi_device_id ds1390_spi_ids[] = {
+	{ .name = "ds1390" },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ds1390_spi_ids);
+
 static struct spi_driver ds1390_driver = {
 	.driver = {
 		.name	= "rtc-ds1390",
 		.of_match_table = of_match_ptr(ds1390_of_match),
 	},
 	.probe	= ds1390_probe,
+	.id_table = ds1390_spi_ids,
 };
 
 module_spi_driver(ds1390_driver);
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index f736f8c..6d383b6 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -557,7 +557,7 @@
 		 * registered automatically when being referenced.
 		 */
 		of_node_put(fixed_clock);
-		return 0;
+		return NULL;
 	}
 
 	/* First disable the clock */
diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c
index bad7792..0d515b3 100644
--- a/drivers/rtc/rtc-mcp795.c
+++ b/drivers/rtc/rtc-mcp795.c
@@ -430,12 +430,19 @@
 MODULE_DEVICE_TABLE(of, mcp795_of_match);
 #endif
 
+static const struct spi_device_id mcp795_spi_ids[] = {
+	{ .name = "mcp795" },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, mcp795_spi_ids);
+
 static struct spi_driver mcp795_driver = {
 		.driver = {
 				.name = "rtc-mcp795",
 				.of_match_table = of_match_ptr(mcp795_of_match),
 		},
 		.probe = mcp795_probe,
+		.id_table = mcp795_spi_ids,
 };
 
 module_spi_driver(mcp795_driver);
diff --git a/drivers/rtc/rtc-msc313.c b/drivers/rtc/rtc-msc313.c
new file mode 100644
index 0000000..f3fde01
--- /dev/null
+++ b/drivers/rtc/rtc-msc313.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Real time clocks driver for MStar/SigmaStar ARMv7 SoCs.
+ * Based on "Real Time Clock driver for msb252x." that was contained
+ * in various MStar kernels.
+ *
+ * (C) 2019 Daniel Palmer
+ * (C) 2021 Romain Perier
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+/* Registers */
+#define REG_RTC_CTRL		0x00
+#define REG_RTC_FREQ_CW_L	0x04
+#define REG_RTC_FREQ_CW_H	0x08
+#define REG_RTC_LOAD_VAL_L	0x0C
+#define REG_RTC_LOAD_VAL_H	0x10
+#define REG_RTC_MATCH_VAL_L	0x14
+#define REG_RTC_MATCH_VAL_H	0x18
+#define REG_RTC_STATUS_INT	0x1C
+#define REG_RTC_CNT_VAL_L	0x20
+#define REG_RTC_CNT_VAL_H	0x24
+
+/* Control bits for REG_RTC_CTRL */
+#define SOFT_RSTZ_BIT		BIT(0)
+#define CNT_EN_BIT		BIT(1)
+#define WRAP_EN_BIT		BIT(2)
+#define LOAD_EN_BIT		BIT(3)
+#define READ_EN_BIT		BIT(4)
+#define INT_MASK_BIT		BIT(5)
+#define INT_FORCE_BIT		BIT(6)
+#define INT_CLEAR_BIT		BIT(7)
+
+/* Control bits for REG_RTC_STATUS_INT */
+#define RAW_INT_BIT		BIT(0)
+#define ALM_INT_BIT		BIT(1)
+
+struct msc313_rtc {
+	struct rtc_device *rtc_dev;
+	void __iomem *rtc_base;
+};
+
+static int msc313_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct msc313_rtc *priv = dev_get_drvdata(dev);
+	unsigned long seconds;
+
+	seconds = readw(priv->rtc_base + REG_RTC_MATCH_VAL_L)
+			| ((unsigned long)readw(priv->rtc_base + REG_RTC_MATCH_VAL_H) << 16);
+
+	rtc_time64_to_tm(seconds, &alarm->time);
+
+	if (!(readw(priv->rtc_base + REG_RTC_CTRL) & INT_MASK_BIT))
+		alarm->enabled = 1;
+
+	return 0;
+}
+
+static int msc313_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct msc313_rtc *priv = dev_get_drvdata(dev);
+	u16 reg;
+
+	reg = readw(priv->rtc_base + REG_RTC_CTRL);
+	if (enabled)
+		reg &= ~INT_MASK_BIT;
+	else
+		reg |= INT_MASK_BIT;
+	writew(reg, priv->rtc_base + REG_RTC_CTRL);
+	return 0;
+}
+
+static int msc313_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct msc313_rtc *priv = dev_get_drvdata(dev);
+	unsigned long seconds;
+
+	seconds = rtc_tm_to_time64(&alarm->time);
+	writew((seconds & 0xFFFF), priv->rtc_base + REG_RTC_MATCH_VAL_L);
+	writew((seconds >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_MATCH_VAL_H);
+
+	msc313_rtc_alarm_irq_enable(dev, alarm->enabled);
+
+	return 0;
+}
+
+static bool msc313_rtc_get_enabled(struct msc313_rtc *priv)
+{
+	return readw(priv->rtc_base + REG_RTC_CTRL) & CNT_EN_BIT;
+}
+
+static void msc313_rtc_set_enabled(struct msc313_rtc *priv)
+{
+	u16 reg;
+
+	reg = readw(priv->rtc_base + REG_RTC_CTRL);
+	reg |= CNT_EN_BIT;
+	writew(reg, priv->rtc_base + REG_RTC_CTRL);
+}
+
+static int msc313_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct msc313_rtc *priv = dev_get_drvdata(dev);
+	u32 seconds;
+	u16 reg;
+
+	if (!msc313_rtc_get_enabled(priv))
+		return -EINVAL;
+
+	reg = readw(priv->rtc_base + REG_RTC_CTRL);
+	writew(reg | READ_EN_BIT, priv->rtc_base + REG_RTC_CTRL);
+
+	/* Wait for HW latch done */
+	while (readw(priv->rtc_base + REG_RTC_CTRL) & READ_EN_BIT)
+		udelay(1);
+
+	seconds = readw(priv->rtc_base + REG_RTC_CNT_VAL_L)
+			| ((unsigned long)readw(priv->rtc_base + REG_RTC_CNT_VAL_H) << 16);
+
+	rtc_time64_to_tm(seconds, tm);
+
+	return 0;
+}
+
+static int msc313_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct msc313_rtc *priv = dev_get_drvdata(dev);
+	unsigned long seconds;
+	u16 reg;
+
+	seconds = rtc_tm_to_time64(tm);
+	writew(seconds & 0xFFFF, priv->rtc_base + REG_RTC_LOAD_VAL_L);
+	writew((seconds >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_LOAD_VAL_H);
+
+	/* Enable load for loading value into internal RTC counter */
+	reg = readw(priv->rtc_base + REG_RTC_CTRL);
+	writew(reg | LOAD_EN_BIT, priv->rtc_base + REG_RTC_CTRL);
+
+	/* Wait for HW latch done */
+	while (readw(priv->rtc_base + REG_RTC_CTRL) & LOAD_EN_BIT)
+		udelay(1);
+	msc313_rtc_set_enabled(priv);
+	return 0;
+}
+
+static const struct rtc_class_ops msc313_rtc_ops = {
+	.read_time = msc313_rtc_read_time,
+	.set_time = msc313_rtc_set_time,
+	.read_alarm = msc313_rtc_read_alarm,
+	.set_alarm = msc313_rtc_set_alarm,
+	.alarm_irq_enable = msc313_rtc_alarm_irq_enable,
+};
+
+static irqreturn_t msc313_rtc_interrupt(s32 irq, void *dev_id)
+{
+	struct msc313_rtc *priv = dev_get_drvdata(dev_id);
+	u16 reg;
+
+	reg = readw(priv->rtc_base + REG_RTC_STATUS_INT);
+	if (!(reg & ALM_INT_BIT))
+		return IRQ_NONE;
+
+	reg = readw(priv->rtc_base + REG_RTC_CTRL);
+	reg |= INT_CLEAR_BIT;
+	reg &= ~INT_FORCE_BIT;
+	writew(reg, priv->rtc_base + REG_RTC_CTRL);
+
+	rtc_update_irq(priv->rtc_dev, 1, RTC_IRQF | RTC_AF);
+
+	return IRQ_HANDLED;
+}
+
+static int msc313_rtc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct msc313_rtc *priv;
+	unsigned long rate;
+	struct clk *clk;
+	int ret;
+	int irq;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(struct msc313_rtc), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->rtc_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->rtc_base))
+		return PTR_ERR(priv->rtc_base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -EINVAL;
+
+	priv->rtc_dev = devm_rtc_allocate_device(dev);
+	if (IS_ERR(priv->rtc_dev))
+		return PTR_ERR(priv->rtc_dev);
+
+	priv->rtc_dev->ops = &msc313_rtc_ops;
+	priv->rtc_dev->range_max = U32_MAX;
+
+	ret = devm_request_irq(dev, irq, msc313_rtc_interrupt, IRQF_SHARED,
+			       dev_name(&pdev->dev), &pdev->dev);
+	if (ret) {
+		dev_err(dev, "Could not request IRQ\n");
+		return ret;
+	}
+
+	clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(dev, "No input reference clock\n");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(dev, "Failed to enable the reference clock, %d\n", ret);
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(dev, (void (*) (void *))clk_disable_unprepare, clk);
+	if (ret)
+		return ret;
+
+	rate = clk_get_rate(clk);
+	writew(rate & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_L);
+	writew((rate >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_H);
+
+	platform_set_drvdata(pdev, priv);
+
+	return devm_rtc_register_device(priv->rtc_dev);
+}
+
+static const struct of_device_id msc313_rtc_of_match_table[] = {
+	{ .compatible = "mstar,msc313-rtc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, msc313_rtc_of_match_table);
+
+static struct platform_driver msc313_rtc_driver = {
+	.probe = msc313_rtc_probe,
+	.driver = {
+		.name = "msc313-rtc",
+		.of_match_table = msc313_rtc_of_match_table,
+	},
+};
+
+module_platform_driver(msc313_rtc_driver);
+
+MODULE_AUTHOR("Daniel Palmer <daniel@thingy.jp>");
+MODULE_AUTHOR("Romain Perier <romain.perier@gmail.com>");
+MODULE_DESCRIPTION("MStar RTC Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index d46e0f0..4d4f3b1 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -1029,6 +1029,5 @@
 
 module_platform_driver(omap_rtc_driver);
 
-MODULE_ALIAS("platform:omap_rtc");
 MODULE_AUTHOR("George G. Davis (and others)");
 MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
index 0f58cac..7473e6c 100644
--- a/drivers/rtc/rtc-pcf2123.c
+++ b/drivers/rtc/rtc-pcf2123.c
@@ -451,12 +451,21 @@
 MODULE_DEVICE_TABLE(of, pcf2123_dt_ids);
 #endif
 
+static const struct spi_device_id pcf2123_spi_ids[] = {
+	{ .name = "pcf2123", },
+	{ .name = "rv2123", },
+	{ .name = "rtc-pcf2123", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(spi, pcf2123_spi_ids);
+
 static struct spi_driver pcf2123_driver = {
 	.driver	= {
 			.name	= "rtc-pcf2123",
 			.of_match_table = of_match_ptr(pcf2123_dt_ids),
 	},
 	.probe	= pcf2123_probe,
+	.id_table = pcf2123_spi_ids,
 };
 
 module_spi_driver(pcf2123_driver);
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index 14da4ab..15e50bb 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -34,6 +34,7 @@
 #define PCF85063_REG_CTRL1		0x00 /* status */
 #define PCF85063_REG_CTRL1_CAP_SEL	BIT(0)
 #define PCF85063_REG_CTRL1_STOP		BIT(5)
+#define PCF85063_REG_CTRL1_EXT_TEST	BIT(7)
 
 #define PCF85063_REG_CTRL2		0x01
 #define PCF85063_CTRL2_AF		BIT(6)
@@ -117,6 +118,7 @@
 	 * reset state until all time/date registers are written
 	 */
 	rc = regmap_update_bits(pcf85063->regmap, PCF85063_REG_CTRL1,
+				PCF85063_REG_CTRL1_EXT_TEST |
 				PCF85063_REG_CTRL1_STOP,
 				PCF85063_REG_CTRL1_STOP);
 	if (rc)
@@ -297,7 +299,7 @@
 		if (ret < 0)
 			return ret;
 
-		status = status & PCF85063_REG_SC_OS ? RTC_VL_DATA_INVALID : 0;
+		status = (status & PCF85063_REG_SC_OS) ? RTC_VL_DATA_INVALID : 0;
 
 		return put_user(status, (unsigned int __user *)arg);
 
@@ -479,6 +481,18 @@
 	struct clk *clk;
 	struct clk_init_data init;
 	struct device_node *node = pcf85063->rtc->dev.parent->of_node;
+	struct device_node *fixed_clock;
+
+	fixed_clock = of_get_child_by_name(node, "clock");
+	if (fixed_clock) {
+		/*
+		 * skip registering square wave clock when a fixed
+		 * clock has been registered. The fixed clock is
+		 * registered automatically when being referenced.
+		 */
+		of_node_put(fixed_clock);
+		return NULL;
+	}
 
 	init.name = "pcf85063-clkout";
 	init.ops = &pcf85063_clkout_ops;
diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c
index 8b6fb20..c93acad 100644
--- a/drivers/rtc/rtc-pcf8523.c
+++ b/drivers/rtc/rtc-pcf8523.c
@@ -4,8 +4,10 @@
  */
 
 #include <linux/bcd.h>
+#include <linux/bitfield.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
+#include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
 #include <linux/pm_wakeirq.h>
@@ -19,11 +21,10 @@
 #define PCF8523_CONTROL2_AF BIT(3)
 
 #define PCF8523_REG_CONTROL3 0x02
-#define PCF8523_CONTROL3_PM_BLD BIT(7) /* battery low detection disabled */
-#define PCF8523_CONTROL3_PM_VDD BIT(6) /* switch-over disabled */
-#define PCF8523_CONTROL3_PM_DSM BIT(5) /* direct switching mode */
-#define PCF8523_CONTROL3_PM_MASK 0xe0
+#define PCF8523_CONTROL3_PM  GENMASK(7,5)
+#define PCF8523_PM_STANDBY   0x7
 #define PCF8523_CONTROL3_BLF BIT(2) /* battery low bit, read-only */
+#define PCF8523_CONTROL3_BSF BIT(3)
 
 #define PCF8523_REG_SECONDS  0x03
 #define PCF8523_SECONDS_OS BIT(7)
@@ -48,127 +49,45 @@
 
 struct pcf8523 {
 	struct rtc_device *rtc;
-	struct i2c_client *client;
+	struct regmap *regmap;
 };
 
-static int pcf8523_read(struct i2c_client *client, u8 reg, u8 *valuep)
+static int pcf8523_load_capacitance(struct pcf8523 *pcf8523, struct device_node *node)
 {
-	struct i2c_msg msgs[2];
-	u8 value = 0;
-	int err;
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = 0;
-	msgs[0].len = sizeof(reg);
-	msgs[0].buf = &reg;
-
-	msgs[1].addr = client->addr;
-	msgs[1].flags = I2C_M_RD;
-	msgs[1].len = sizeof(value);
-	msgs[1].buf = &value;
-
-	err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-	if (err < 0)
-		return err;
-
-	*valuep = value;
-
-	return 0;
-}
-
-static int pcf8523_write(struct i2c_client *client, u8 reg, u8 value)
-{
-	u8 buffer[2] = { reg, value };
-	struct i2c_msg msg;
-	int err;
-
-	msg.addr = client->addr;
-	msg.flags = 0;
-	msg.len = sizeof(buffer);
-	msg.buf = buffer;
-
-	err = i2c_transfer(client->adapter, &msg, 1);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
-static int pcf8523_voltage_low(struct i2c_client *client)
-{
-	u8 value;
-	int err;
-
-	err = pcf8523_read(client, PCF8523_REG_CONTROL3, &value);
-	if (err < 0)
-		return err;
-
-	return !!(value & PCF8523_CONTROL3_BLF);
-}
-
-static int pcf8523_load_capacitance(struct i2c_client *client)
-{
-	u32 load;
-	u8 value;
-	int err;
-
-	err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value);
-	if (err < 0)
-		return err;
+	u32 load, value = 0;
 
 	load = 12500;
-	of_property_read_u32(client->dev.of_node, "quartz-load-femtofarads",
-			     &load);
+	of_property_read_u32(node, "quartz-load-femtofarads", &load);
 
 	switch (load) {
 	default:
-		dev_warn(&client->dev, "Unknown quartz-load-femtofarads value: %d. Assuming 12500",
+		dev_warn(&pcf8523->rtc->dev, "Unknown quartz-load-femtofarads value: %d. Assuming 12500",
 			 load);
 		fallthrough;
 	case 12500:
 		value |= PCF8523_CONTROL1_CAP_SEL;
 		break;
 	case 7000:
-		value &= ~PCF8523_CONTROL1_CAP_SEL;
 		break;
 	}
 
-	err = pcf8523_write(client, PCF8523_REG_CONTROL1, value);
-
-	return err;
-}
-
-static int pcf8523_set_pm(struct i2c_client *client, u8 pm)
-{
-	u8 value;
-	int err;
-
-	err = pcf8523_read(client, PCF8523_REG_CONTROL3, &value);
-	if (err < 0)
-		return err;
-
-	value = (value & ~PCF8523_CONTROL3_PM_MASK) | pm;
-
-	err = pcf8523_write(client, PCF8523_REG_CONTROL3, value);
-	if (err < 0)
-		return err;
-
-	return 0;
+	return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1,
+				  PCF8523_CONTROL1_CAP_SEL, value);
 }
 
 static irqreturn_t pcf8523_irq(int irq, void *dev_id)
 {
-	struct pcf8523 *pcf8523 = i2c_get_clientdata(dev_id);
-	u8 value;
+	struct pcf8523 *pcf8523 = dev_id;
+	u32 value;
 	int err;
 
-	err = pcf8523_read(pcf8523->client, PCF8523_REG_CONTROL2, &value);
+	err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL2, &value);
 	if (err < 0)
 		return IRQ_HANDLED;
 
 	if (value & PCF8523_CONTROL2_AF) {
 		value &= ~PCF8523_CONTROL2_AF;
-		pcf8523_write(pcf8523->client, PCF8523_REG_CONTROL2, value);
+		regmap_write(pcf8523->regmap, PCF8523_REG_CONTROL2, value);
 		rtc_update_irq(pcf8523->rtc, 1, RTC_IRQF | RTC_AF);
 
 		return IRQ_HANDLED;
@@ -177,68 +96,14 @@
 	return IRQ_NONE;
 }
 
-static int pcf8523_stop_rtc(struct i2c_client *client)
-{
-	u8 value;
-	int err;
-
-	err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value);
-	if (err < 0)
-		return err;
-
-	value |= PCF8523_CONTROL1_STOP;
-
-	err = pcf8523_write(client, PCF8523_REG_CONTROL1, value);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
-static int pcf8523_start_rtc(struct i2c_client *client)
-{
-	u8 value;
-	int err;
-
-	err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value);
-	if (err < 0)
-		return err;
-
-	value &= ~PCF8523_CONTROL1_STOP;
-
-	err = pcf8523_write(client, PCF8523_REG_CONTROL1, value);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
 static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	u8 start = PCF8523_REG_SECONDS, regs[7];
-	struct i2c_msg msgs[2];
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
+	u8 regs[7];
 	int err;
 
-	err = pcf8523_voltage_low(client);
-	if (err < 0) {
-		return err;
-	} else if (err > 0) {
-		dev_err(dev, "low voltage detected, time is unreliable\n");
-		return -EINVAL;
-	}
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = 0;
-	msgs[0].len = 1;
-	msgs[0].buf = &start;
-
-	msgs[1].addr = client->addr;
-	msgs[1].flags = I2C_M_RD;
-	msgs[1].len = sizeof(regs);
-	msgs[1].buf = regs;
-
-	err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_SECONDS, regs,
+			       sizeof(regs));
 	if (err < 0)
 		return err;
 
@@ -258,63 +123,50 @@
 
 static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct i2c_msg msg;
-	u8 regs[8];
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
+	u8 regs[7];
 	int err;
 
-	err = pcf8523_stop_rtc(client);
+	err = regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1,
+				 PCF8523_CONTROL1_STOP, PCF8523_CONTROL1_STOP);
 	if (err < 0)
 		return err;
 
-	regs[0] = PCF8523_REG_SECONDS;
 	/* This will purposely overwrite PCF8523_SECONDS_OS */
-	regs[1] = bin2bcd(tm->tm_sec);
-	regs[2] = bin2bcd(tm->tm_min);
-	regs[3] = bin2bcd(tm->tm_hour);
-	regs[4] = bin2bcd(tm->tm_mday);
-	regs[5] = tm->tm_wday;
-	regs[6] = bin2bcd(tm->tm_mon + 1);
-	regs[7] = bin2bcd(tm->tm_year - 100);
+	regs[0] = bin2bcd(tm->tm_sec);
+	regs[1] = bin2bcd(tm->tm_min);
+	regs[2] = bin2bcd(tm->tm_hour);
+	regs[3] = bin2bcd(tm->tm_mday);
+	regs[4] = tm->tm_wday;
+	regs[5] = bin2bcd(tm->tm_mon + 1);
+	regs[6] = bin2bcd(tm->tm_year - 100);
 
-	msg.addr = client->addr;
-	msg.flags = 0;
-	msg.len = sizeof(regs);
-	msg.buf = regs;
-
-	err = i2c_transfer(client->adapter, &msg, 1);
+	err = regmap_bulk_write(pcf8523->regmap, PCF8523_REG_SECONDS, regs,
+				sizeof(regs));
 	if (err < 0) {
 		/*
 		 * If the time cannot be set, restart the RTC anyway. Note
 		 * that errors are ignored if the RTC cannot be started so
 		 * that we have a chance to propagate the original error.
 		 */
-		pcf8523_start_rtc(client);
+		regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1,
+				   PCF8523_CONTROL1_STOP, 0);
 		return err;
 	}
 
-	return pcf8523_start_rtc(client);
+	return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1,
+				 PCF8523_CONTROL1_STOP, 0);
 }
 
 static int pcf8523_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	u8 start = PCF8523_REG_MINUTE_ALARM, regs[4];
-	struct i2c_msg msgs[2];
-	u8 value;
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
+	u8 regs[4];
+	u32 value;
 	int err;
 
-	msgs[0].addr = client->addr;
-	msgs[0].flags = 0;
-	msgs[0].len = 1;
-	msgs[0].buf = &start;
-
-	msgs[1].addr = client->addr;
-	msgs[1].flags = I2C_M_RD;
-	msgs[1].len = sizeof(regs);
-	msgs[1].buf = regs;
-
-	err = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_MINUTE_ALARM, regs,
+			       sizeof(regs));
 	if (err < 0)
 		return err;
 
@@ -324,12 +176,12 @@
 	tm->time.tm_mday = bcd2bin(regs[2] & 0x3F);
 	tm->time.tm_wday = bcd2bin(regs[3] & 0x7);
 
-	err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value);
+	err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL1, &value);
 	if (err < 0)
 		return err;
 	tm->enabled = !!(value & PCF8523_CONTROL1_AIE);
 
-	err = pcf8523_read(client, PCF8523_REG_CONTROL2, &value);
+	err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL2, &value);
 	if (err < 0)
 		return err;
 	tm->pending = !!(value & PCF8523_CONTROL2_AF);
@@ -339,30 +191,16 @@
 
 static int pcf8523_irq_enable(struct device *dev, unsigned int enabled)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	u8 value;
-	int err;
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
 
-	err = pcf8523_read(client, PCF8523_REG_CONTROL1, &value);
-	if (err < 0)
-		return err;
-
-	value &= PCF8523_CONTROL1_AIE;
-
-	if (enabled)
-		value |= PCF8523_CONTROL1_AIE;
-
-	err = pcf8523_write(client, PCF8523_REG_CONTROL1, value);
-	if (err < 0)
-		return err;
-
-	return 0;
+	return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL1,
+				  PCF8523_CONTROL1_AIE, enabled ?
+				  PCF8523_CONTROL1_AIE : 0);
 }
 
 static int pcf8523_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct i2c_msg msg;
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
 	u8 regs[5];
 	int err;
 
@@ -370,7 +208,7 @@
 	if (err)
 		return err;
 
-	err = pcf8523_write(client, PCF8523_REG_CONTROL2, 0);
+	err = regmap_write(pcf8523->regmap, PCF8523_REG_CONTROL2, 0);
 	if (err < 0)
 		return err;
 
@@ -382,16 +220,13 @@
 		rtc_time64_to_tm(alarm_time, &tm->time);
 	}
 
-	regs[0] = PCF8523_REG_MINUTE_ALARM;
-	regs[1] = bin2bcd(tm->time.tm_min);
-	regs[2] = bin2bcd(tm->time.tm_hour);
-	regs[3] = bin2bcd(tm->time.tm_mday);
-	regs[4] = ALARM_DIS;
-	msg.addr = client->addr;
-	msg.flags = 0;
-	msg.len = sizeof(regs);
-	msg.buf = regs;
-	err = i2c_transfer(client->adapter, &msg, 1);
+	regs[0] = bin2bcd(tm->time.tm_min);
+	regs[1] = bin2bcd(tm->time.tm_hour);
+	regs[2] = bin2bcd(tm->time.tm_mday);
+	regs[3] = ALARM_DIS;
+
+	err = regmap_bulk_write(pcf8523->regmap, PCF8523_REG_MINUTE_ALARM, regs,
+				sizeof(regs));
 	if (err < 0)
 		return err;
 
@@ -401,24 +236,101 @@
 	return 0;
 }
 
-#ifdef CONFIG_RTC_INTF_DEV
+static int pcf8523_param_get(struct device *dev, struct rtc_param *param)
+{
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
+	int ret;
+
+	switch(param->param) {
+		u32 value;
+
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		ret = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL3, &value);
+		if (ret < 0)
+			return ret;
+
+		value = FIELD_GET(PCF8523_CONTROL3_PM, value);
+
+		switch(value) {
+		case 0x0:
+		case 0x4:
+			param->uvalue = RTC_BSM_LEVEL;
+			break;
+		case 0x1:
+		case 0x5:
+			param->uvalue = RTC_BSM_DIRECT;
+			break;
+		case PCF8523_PM_STANDBY:
+			param->uvalue = RTC_BSM_STANDBY;
+			break;
+		default:
+			param->uvalue = RTC_BSM_DISABLED;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int pcf8523_param_set(struct device *dev, struct rtc_param *param)
+{
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
+
+	switch(param->param) {
+		u8 mode;
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		switch (param->uvalue) {
+		case RTC_BSM_DISABLED:
+			mode = 0x2;
+			break;
+		case RTC_BSM_DIRECT:
+			mode = 0x1;
+			break;
+		case RTC_BSM_LEVEL:
+			mode = 0x0;
+			break;
+		case RTC_BSM_STANDBY:
+			mode = PCF8523_PM_STANDBY;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		return regmap_update_bits(pcf8523->regmap, PCF8523_REG_CONTROL3,
+					  PCF8523_CONTROL3_PM,
+					  FIELD_PREP(PCF8523_CONTROL3_PM, mode));
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int pcf8523_rtc_ioctl(struct device *dev, unsigned int cmd,
 			     unsigned long arg)
 {
-	struct i2c_client *client = to_i2c_client(dev);
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
 	unsigned int flags = 0;
-	u8 value;
+	u32 value;
 	int ret;
 
 	switch (cmd) {
 	case RTC_VL_READ:
-		ret = pcf8523_voltage_low(client);
+		ret = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL3, &value);
 		if (ret < 0)
 			return ret;
-		if (ret)
+
+		if (value & PCF8523_CONTROL3_BLF)
 			flags |= RTC_VL_BACKUP_LOW;
 
-		ret = pcf8523_read(client, PCF8523_REG_SECONDS, &value);
+		ret = regmap_read(pcf8523->regmap, PCF8523_REG_SECONDS, &value);
 		if (ret < 0)
 			return ret;
 
@@ -431,18 +343,15 @@
 		return -ENOIOCTLCMD;
 	}
 }
-#else
-#define pcf8523_rtc_ioctl NULL
-#endif
 
 static int pcf8523_rtc_read_offset(struct device *dev, long *offset)
 {
-	struct i2c_client *client = to_i2c_client(dev);
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
 	int err;
-	u8 value;
+	u32 value;
 	s8 val;
 
-	err = pcf8523_read(client, PCF8523_REG_OFFSET, &value);
+	err = regmap_read(pcf8523->regmap, PCF8523_REG_OFFSET, &value);
 	if (err < 0)
 		return err;
 
@@ -455,9 +364,9 @@
 
 static int pcf8523_rtc_set_offset(struct device *dev, long offset)
 {
-	struct i2c_client *client = to_i2c_client(dev);
+	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
 	long reg_m0, reg_m1;
-	u8 value;
+	u32 value;
 
 	reg_m0 = clamp(DIV_ROUND_CLOSEST(offset, 4340), -64L, 63L);
 	reg_m1 = clamp(DIV_ROUND_CLOSEST(offset, 4069), -64L, 63L);
@@ -467,7 +376,7 @@
 	else
 		value = (reg_m1 & 0x7f) | PCF8523_OFFSET_MODE;
 
-	return pcf8523_write(client, PCF8523_REG_OFFSET, value);
+	return regmap_write(pcf8523->regmap, PCF8523_REG_OFFSET, value);
 }
 
 static const struct rtc_class_ops pcf8523_rtc_ops = {
@@ -479,6 +388,14 @@
 	.ioctl = pcf8523_rtc_ioctl,
 	.read_offset = pcf8523_rtc_read_offset,
 	.set_offset = pcf8523_rtc_set_offset,
+	.param_get = pcf8523_param_get,
+	.param_set = pcf8523_param_set,
+};
+
+static const struct regmap_config regmap_config = {
+        .reg_bits = 8,
+        .val_bits = 8,
+        .max_register = 0x13,
 };
 
 static int pcf8523_probe(struct i2c_client *client,
@@ -487,6 +404,7 @@
 	struct pcf8523 *pcf8523;
 	struct rtc_device *rtc;
 	bool wakeup_source = false;
+	u32 value;
 	int err;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
@@ -496,46 +414,60 @@
 	if (!pcf8523)
 		return -ENOMEM;
 
+	pcf8523->regmap = devm_regmap_init_i2c(client, &regmap_config);
+	if (IS_ERR(pcf8523->regmap))
+		return PTR_ERR(pcf8523->regmap);
+
 	i2c_set_clientdata(client, pcf8523);
-	pcf8523->client = client;
-
-	err = pcf8523_load_capacitance(client);
-	if (err < 0)
-		dev_warn(&client->dev, "failed to set xtal load capacitance: %d",
-			 err);
-
-	err = pcf8523_set_pm(client, 0);
-	if (err < 0)
-		return err;
 
 	rtc = devm_rtc_allocate_device(&client->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
-
 	pcf8523->rtc = rtc;
+
+	err = pcf8523_load_capacitance(pcf8523, client->dev.of_node);
+	if (err < 0)
+		dev_warn(&client->dev, "failed to set xtal load capacitance: %d",
+			 err);
+
+	err = regmap_read(pcf8523->regmap, PCF8523_REG_SECONDS, &value);
+	if (err < 0)
+		return err;
+
+	if (value & PCF8523_SECONDS_OS) {
+		err = regmap_read(pcf8523->regmap, PCF8523_REG_CONTROL3, &value);
+		if (err < 0)
+			return err;
+
+		if (FIELD_GET(PCF8523_CONTROL3_PM, value) == PCF8523_PM_STANDBY) {
+			err = regmap_write(pcf8523->regmap, PCF8523_REG_CONTROL3,
+					   value & ~PCF8523_CONTROL3_PM);
+			if (err < 0)
+				return err;
+		}
+	}
+
 	rtc->ops = &pcf8523_rtc_ops;
 	rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
 	rtc->range_max = RTC_TIMESTAMP_END_2099;
 	rtc->uie_unsupported = 1;
 
 	if (client->irq > 0) {
-		err = pcf8523_write(client, PCF8523_TMR_CLKOUT_CTRL, 0x38);
+		err = regmap_write(pcf8523->regmap, PCF8523_TMR_CLKOUT_CTRL, 0x38);
 		if (err < 0)
 			return err;
 
 		err = devm_request_threaded_irq(&client->dev, client->irq,
 						NULL, pcf8523_irq,
 						IRQF_SHARED | IRQF_ONESHOT | IRQF_TRIGGER_LOW,
-						dev_name(&rtc->dev), client);
+						dev_name(&rtc->dev), pcf8523);
 		if (err)
 			return err;
 
 		dev_pm_set_wake_irq(&client->dev, client->irq);
 	}
 
-#ifdef CONFIG_OF
 	wakeup_source = of_property_read_bool(client->dev.of_node, "wakeup-source");
-#endif
 	if (client->irq > 0 || wakeup_source)
 		device_init_wakeup(&client->dev, true);
 
@@ -548,19 +480,17 @@
 };
 MODULE_DEVICE_TABLE(i2c, pcf8523_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id pcf8523_of_match[] = {
 	{ .compatible = "nxp,pcf8523" },
 	{ .compatible = "microcrystal,rv8523" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, pcf8523_of_match);
-#endif
 
 static struct i2c_driver pcf8523_driver = {
 	.driver = {
 		.name = "rtc-pcf8523",
-		.of_match_table = of_match_ptr(pcf8523_of_match),
+		.of_match_table = pcf8523_of_match,
 	},
 	.probe = pcf8523_probe,
 	.id_table = pcf8523_id,
diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c
index 12c8073..cdc623b 100644
--- a/drivers/rtc/rtc-rv3028.c
+++ b/drivers/rtc/rtc-rv3028.c
@@ -10,6 +10,7 @@
 
 #include <linux/clk-provider.h>
 #include <linux/bcd.h>
+#include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
@@ -80,6 +81,10 @@
 
 #define RV3028_BACKUP_TCE		BIT(5)
 #define RV3028_BACKUP_TCR_MASK		GENMASK(1,0)
+#define RV3028_BACKUP_BSM		GENMASK(3,2)
+
+#define RV3028_BACKUP_BSM_DSM		0x1
+#define RV3028_BACKUP_BSM_LSM		0x3
 
 #define OFFSET_STEP_PPT			953674
 
@@ -512,6 +517,71 @@
 
 }
 
+static int rv3028_param_get(struct device *dev, struct rtc_param *param)
+{
+	struct rv3028_data *rv3028 = dev_get_drvdata(dev);
+	int ret;
+
+	switch(param->param) {
+		u32 value;
+
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		ret = regmap_read(rv3028->regmap, RV3028_BACKUP, &value);
+		if (ret < 0)
+			return ret;
+
+		value = FIELD_GET(RV3028_BACKUP_BSM, value);
+
+		switch(value) {
+		case RV3028_BACKUP_BSM_DSM:
+			param->uvalue = RTC_BSM_DIRECT;
+			break;
+		case RV3028_BACKUP_BSM_LSM:
+			param->uvalue = RTC_BSM_LEVEL;
+			break;
+		default:
+			param->uvalue = RTC_BSM_DISABLED;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rv3028_param_set(struct device *dev, struct rtc_param *param)
+{
+	struct rv3028_data *rv3028 = dev_get_drvdata(dev);
+
+	switch(param->param) {
+		u8 mode;
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		switch (param->uvalue) {
+		case RTC_BSM_DISABLED:
+			mode = 0;
+			break;
+		case RTC_BSM_DIRECT:
+			mode = RV3028_BACKUP_BSM_DSM;
+			break;
+		case RTC_BSM_LEVEL:
+			mode = RV3028_BACKUP_BSM_LSM;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		return rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_BSM,
+					 FIELD_PREP(RV3028_BACKUP_BSM, mode));
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int rv3028_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 {
 	struct rv3028_data *rv3028 = dev_get_drvdata(dev);
@@ -776,6 +846,8 @@
 	.read_offset = rv3028_read_offset,
 	.set_offset = rv3028_set_offset,
 	.ioctl = rv3028_ioctl,
+	.param_get = rv3028_param_get,
+	.param_set = rv3028_param_set,
 };
 
 static const struct regmap_config regmap_config = {
@@ -878,6 +950,8 @@
 	if (ret)
 		return ret;
 
+	set_bit(RTC_FEATURE_BACKUP_SWITCH_MODE, rv3028->rtc->features);
+
 	rv3028->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
 	rv3028->rtc->range_max = RTC_TIMESTAMP_END_2099;
 	rv3028->rtc->ops = &rv3028_rtc_ops;
diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c
index d63102d..c3bee30 100644
--- a/drivers/rtc/rtc-rv3032.c
+++ b/drivers/rtc/rtc-rv3032.c
@@ -106,6 +106,7 @@
 struct rv3032_data {
 	struct regmap *regmap;
 	struct rtc_device *rtc;
+	bool trickle_charger_set;
 #ifdef CONFIG_COMMON_CLK
 	struct clk_hw clkout_hw;
 #endif
@@ -310,14 +311,6 @@
 	u8 ctrl = 0;
 	int ret;
 
-	/* The alarm has no seconds, round up to nearest minute */
-	if (alrm->time.tm_sec) {
-		time64_t alarm_time = rtc_tm_to_time64(&alrm->time);
-
-		alarm_time += 60 - alrm->time.tm_sec;
-		rtc_time64_to_tm(alarm_time, &alrm->time);
-	}
-
 	ret = regmap_update_bits(rv3032->regmap, RV3032_CTRL2,
 				 RV3032_CTRL2_AIE | RV3032_CTRL2_UIE, 0);
 	if (ret)
@@ -402,6 +395,75 @@
 				 FIELD_PREP(RV3032_OFFSET_MSK, offset));
 }
 
+static int rv3032_param_get(struct device *dev, struct rtc_param *param)
+{
+	struct rv3032_data *rv3032 = dev_get_drvdata(dev);
+	int ret;
+
+	switch(param->param) {
+		u32 value;
+
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		ret = regmap_read(rv3032->regmap, RV3032_PMU, &value);
+		if (ret < 0)
+			return ret;
+
+		value = FIELD_GET(RV3032_PMU_BSM, value);
+
+		switch(value) {
+		case RV3032_PMU_BSM_DSM:
+			param->uvalue = RTC_BSM_DIRECT;
+			break;
+		case RV3032_PMU_BSM_LSM:
+			param->uvalue = RTC_BSM_LEVEL;
+			break;
+		default:
+			param->uvalue = RTC_BSM_DISABLED;
+		}
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rv3032_param_set(struct device *dev, struct rtc_param *param)
+{
+	struct rv3032_data *rv3032 = dev_get_drvdata(dev);
+
+	switch(param->param) {
+		u8 mode;
+	case RTC_PARAM_BACKUP_SWITCH_MODE:
+		if (rv3032->trickle_charger_set)
+			return -EINVAL;
+
+		switch (param->uvalue) {
+		case RTC_BSM_DISABLED:
+			mode = 0;
+			break;
+		case RTC_BSM_DIRECT:
+			mode = RV3032_PMU_BSM_DSM;
+			break;
+		case RTC_BSM_LEVEL:
+			mode = RV3032_PMU_BSM_LSM;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		return rv3032_update_cfg(rv3032, RV3032_PMU, RV3032_PMU_BSM,
+					 FIELD_PREP(RV3032_PMU_BSM, mode));
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int rv3032_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 {
 	struct rv3032_data *rv3032 = dev_get_drvdata(dev);
@@ -541,6 +603,8 @@
 		return 0;
 	}
 
+	rv3032->trickle_charger_set = true;
+
 	return rv3032_update_cfg(rv3032, RV3032_PMU,
 				 RV3032_PMU_TCR | RV3032_PMU_TCM | RV3032_PMU_BSM,
 				 val | FIELD_PREP(RV3032_PMU_TCR, i));
@@ -617,11 +681,11 @@
 
 	ret = rv3032_enter_eerd(rv3032, &eerd);
 	if (ret)
-		goto exit_eerd;
+		return ret;
 
 	ret = regmap_write(rv3032->regmap, RV3032_CLKOUT1, hfd & 0xff);
 	if (ret)
-		return ret;
+		goto exit_eerd;
 
 	ret = regmap_write(rv3032->regmap, RV3032_CLKOUT2, RV3032_CLKOUT2_OS |
 			    FIELD_PREP(RV3032_CLKOUT2_HFD_MSK, hfd >> 8));
@@ -813,6 +877,8 @@
 	.read_alarm = rv3032_get_alarm,
 	.set_alarm = rv3032_set_alarm,
 	.alarm_irq_enable = rv3032_alarm_irq_enable,
+	.param_get = rv3032_param_get,
+	.param_set = rv3032_param_set,
 };
 
 static const struct regmap_config regmap_config = {
@@ -883,6 +949,9 @@
 
 	rv3032_trickle_charger_setup(&client->dev, rv3032);
 
+	set_bit(RTC_FEATURE_BACKUP_SWITCH_MODE, rv3032->rtc->features);
+	set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rv3032->rtc->features);
+
 	rv3032->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
 	rv3032->rtc->range_max = RTC_TIMESTAMP_END_2099;
 	rv3032->rtc->ops = &rv3032_rtc_ops;
diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 72adef5..0d5ed38 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -340,8 +340,8 @@
 		}
 	}
 
-	ctrl[1] &= ~RV8803_FLAG_AF;
-	err = rv8803_write_reg(rv8803->client, RV8803_FLAG, ctrl[1]);
+	ctrl[0] &= ~RV8803_FLAG_AF;
+	err = rv8803_write_reg(rv8803->client, RV8803_FLAG, ctrl[0]);
 	mutex_unlock(&rv8803->flags_lock);
 	if (err)
 		return err;
diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c
index f4d4250..758fd6e 100644
--- a/drivers/rtc/rtc-rx6110.c
+++ b/drivers/rtc/rtc-rx6110.c
@@ -422,7 +422,7 @@
 static int rx6110_i2c_probe(struct i2c_client *client,
 			    const struct i2c_device_id *id)
 {
-	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct i2c_adapter *adapter = client->adapter;
 	struct rx6110_data *rx6110;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index d38aaf0..5bfdd34 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -248,9 +248,6 @@
 	u8 date[7];
 	int ret;
 
-	if ((dt->tm_year < 100) || (dt->tm_year > 199))
-		return -EINVAL;
-
 	/*
 	 * Here the read-only bits are written as "0".  I'm not sure if that
 	 * is sound.
@@ -318,9 +315,6 @@
 	u8 ald[2];
 	int ctrl2, err;
 
-	if (client->irq <= 0)
-		return -EINVAL;
-
 	err = rx8025_read_regs(client, RX8025_REG_ALDMIN, 2, ald);
 	if (err)
 		return err;
@@ -355,20 +349,6 @@
 	u8 ald[2];
 	int err;
 
-	if (client->irq <= 0)
-		return -EINVAL;
-
-	/*
-	 * Hardware alarm precision is 1 minute!
-	 * round up to nearest minute
-	 */
-	if (t->time.tm_sec) {
-		time64_t alarm_time = rtc_tm_to_time64(&t->time);
-
-		alarm_time += 60 - t->time.tm_sec;
-		rtc_time64_to_tm(alarm_time, &t->time);
-	}
-
 	ald[0] = bin2bcd(t->time.tm_min);
 	if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224)
 		ald[1] = bin2bcd(t->time.tm_hour);
@@ -423,17 +403,7 @@
 	return 0;
 }
 
-static const struct rtc_class_ops rx8025_rtc_ops = {
-	.read_time = rx8025_get_time,
-	.set_time = rx8025_set_time,
-	.read_alarm = rx8025_read_alarm,
-	.set_alarm = rx8025_set_alarm,
-	.alarm_irq_enable = rx8025_alarm_irq_enable,
-};
-
 /*
- * Clock precision adjustment support
- *
  * According to the RX8025 SA/NB application manual the frequency and
  * temperature characteristics can be approximated using the following
  * equation:
@@ -444,11 +414,8 @@
  *   a : Coefficient = (-35 +-5) * 10**-9
  *   ut: Ultimate temperature in degree = +25 +-5 degree
  *   t : Any temperature in degree
- *
- * Note that the clock adjustment in ppb must be entered (which is
- * the negative value of the deviation).
  */
-static int rx8025_get_clock_adjust(struct device *dev, int *adj)
+static int rx8025_read_offset(struct device *dev, long *offset)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	int digoff;
@@ -457,63 +424,75 @@
 	if (digoff < 0)
 		return digoff;
 
-	*adj = digoff >= 64 ? digoff - 128 : digoff;
-	if (*adj > 0)
-		(*adj)--;
-	*adj *= -RX8025_ADJ_RESOLUTION;
+	*offset = digoff >= 64 ? digoff - 128 : digoff;
+	if (*offset > 0)
+		(*offset)--;
+	*offset *= RX8025_ADJ_RESOLUTION;
 
 	return 0;
 }
 
-static int rx8025_set_clock_adjust(struct device *dev, int adj)
+static int rx8025_set_offset(struct device *dev, long offset)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	u8 digoff;
 	int err;
 
-	adj /= -RX8025_ADJ_RESOLUTION;
-	if (adj > RX8025_ADJ_DATA_MAX)
-		adj = RX8025_ADJ_DATA_MAX;
-	else if (adj < RX8025_ADJ_DATA_MIN)
-		adj = RX8025_ADJ_DATA_MIN;
-	else if (adj > 0)
-		adj++;
-	else if (adj < 0)
-		adj += 128;
-	digoff = adj;
+	offset /= RX8025_ADJ_RESOLUTION;
+	if (offset > RX8025_ADJ_DATA_MAX)
+		offset = RX8025_ADJ_DATA_MAX;
+	else if (offset < RX8025_ADJ_DATA_MIN)
+		offset = RX8025_ADJ_DATA_MIN;
+	else if (offset > 0)
+		offset++;
+	else if (offset < 0)
+		offset += 128;
+	digoff = offset;
 
 	err = rx8025_write_reg(client, RX8025_REG_DIGOFF, digoff);
 	if (err)
 		return err;
 
-	dev_dbg(dev, "%s: write 0x%02x\n", __func__, digoff);
-
 	return 0;
 }
 
+static const struct rtc_class_ops rx8025_rtc_ops = {
+	.read_time = rx8025_get_time,
+	.set_time = rx8025_set_time,
+	.read_alarm = rx8025_read_alarm,
+	.set_alarm = rx8025_set_alarm,
+	.alarm_irq_enable = rx8025_alarm_irq_enable,
+	.read_offset = rx8025_read_offset,
+	.set_offset = rx8025_set_offset,
+};
+
 static ssize_t rx8025_sysfs_show_clock_adjust(struct device *dev,
 					      struct device_attribute *attr,
 					      char *buf)
 {
-	int err, adj;
+	long adj;
+	int err;
 
-	err = rx8025_get_clock_adjust(dev, &adj);
+	dev_warn_once(dev, "clock_adjust_ppb is deprecated, use offset\n");
+	err = rx8025_read_offset(dev, &adj);
 	if (err)
 		return err;
 
-	return sprintf(buf, "%d\n", adj);
+	return sprintf(buf, "%ld\n", -adj);
 }
 
 static ssize_t rx8025_sysfs_store_clock_adjust(struct device *dev,
 					       struct device_attribute *attr,
 					       const char *buf, size_t count)
 {
-	int adj, err;
+	long adj;
+	int err;
 
-	if (sscanf(buf, "%i", &adj) != 1)
+	dev_warn_once(dev, "clock_adjust_ppb is deprecated, use offset\n");
+	if (kstrtol(buf, 10, &adj) != 0)
 		return -EINVAL;
 
-	err = rx8025_set_clock_adjust(dev, adj);
+	err = rx8025_set_offset(dev, -adj);
 
 	return err ? err : count;
 }
@@ -522,15 +501,14 @@
 		   rx8025_sysfs_show_clock_adjust,
 		   rx8025_sysfs_store_clock_adjust);
 
-static int rx8025_sysfs_register(struct device *dev)
-{
-	return device_create_file(dev, &dev_attr_clock_adjust_ppb);
-}
+static struct attribute *rx8025_attrs[] = {
+	&dev_attr_clock_adjust_ppb.attr,
+	NULL
+};
 
-static void rx8025_sysfs_unregister(struct device *dev)
-{
-	device_remove_file(dev, &dev_attr_clock_adjust_ppb);
-}
+static const struct attribute_group rx8025_attr_group = {
+	.attrs	= rx8025_attrs,
+};
 
 static int rx8025_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
@@ -559,12 +537,13 @@
 	if (err)
 		return err;
 
-	rx8025->rtc = devm_rtc_device_register(&client->dev, client->name,
-					  &rx8025_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rx8025->rtc)) {
-		dev_err(&client->dev, "unable to register the class device\n");
+	rx8025->rtc = devm_rtc_allocate_device(&client->dev);
+	if (IS_ERR(rx8025->rtc))
 		return PTR_ERR(rx8025->rtc);
-	}
+
+	rx8025->rtc->ops = &rx8025_rtc_ops;
+	rx8025->rtc->range_min = RTC_TIMESTAMP_BEGIN_1900;
+	rx8025->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
 	if (client->irq > 0) {
 		dev_info(&client->dev, "IRQ %d supplied\n", client->irq);
@@ -572,25 +551,20 @@
 						rx8025_handle_irq,
 						IRQF_ONESHOT,
 						"rx8025", client);
-		if (err) {
-			dev_err(&client->dev, "unable to request IRQ, alarms disabled\n");
-			client->irq = 0;
-		}
+		if (err)
+			clear_bit(RTC_FEATURE_ALARM, rx8025->rtc->features);
 	}
 
 	rx8025->rtc->max_user_freq = 1;
 
-	/* the rx8025 alarm only supports a minute accuracy */
-	rx8025->rtc->uie_unsupported = 1;
+	set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rx8025->rtc->features);
+	clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rx8025->rtc->features);
 
-	err = rx8025_sysfs_register(&client->dev);
-	return err;
-}
+	err = rtc_add_group(rx8025->rtc, &rx8025_attr_group);
+	if (err)
+		return err;
 
-static int rx8025_remove(struct i2c_client *client)
-{
-	rx8025_sysfs_unregister(&client->dev);
-	return 0;
+	return devm_rtc_register_device(rx8025->rtc);
 }
 
 static struct i2c_driver rx8025_driver = {
@@ -598,7 +572,6 @@
 		.name = "rtc-rx8025",
 	},
 	.probe		= rx8025_probe,
-	.remove		= rx8025_remove,
 	.id_table	= rx8025_id,
 };
 
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index b5bdeda..26278c7 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -285,9 +285,6 @@
 		alm->time.tm_min, alm->time.tm_hour, alm->time.tm_mday,
 		alm->time.tm_mon, alm->time.tm_year, alm->time.tm_wday);
 
-	if (alm->time.tm_sec != 0)
-		dev_warn(&client->dev, "Alarms are only supported on a per minute basis!\n");
-
 	/* disable interrupt (which deasserts the irq line) */
 	err = s35390a_set_reg(s35390a, S35390A_CMD_STATUS2, &sts, sizeof(sts));
 	if (err < 0)
@@ -491,8 +488,8 @@
 	s35390a->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
 	s35390a->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-	/* supports per-minute alarms only, therefore set uie_unsupported */
-	s35390a->rtc->uie_unsupported = 1;
+	set_bit(RTC_FEATURE_ALARM_RES_MINUTE, s35390a->rtc->features);
+	clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, s35390a->rtc->features );
 
 	if (status1 & S35390A_FLAG_INT2)
 		rtc_update_irq(s35390a->rtc, 1, RTC_AF);
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index e57d3ca..db529733 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -127,10 +127,9 @@
 	return ret;
 }
 
-/* Time read/write */
-static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
+/* Read time from RTC and convert it from BCD */
+static int s3c_rtc_read_time(struct s3c_rtc *info, struct rtc_time *tm)
 {
-	struct s3c_rtc *info = dev_get_drvdata(dev);
 	unsigned int have_retried = 0;
 	int ret;
 
@@ -139,54 +138,40 @@
 		return ret;
 
 retry_get_time:
-	rtc_tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
-	rtc_tm->tm_hour = readb(info->base + S3C2410_RTCHOUR);
-	rtc_tm->tm_mday = readb(info->base + S3C2410_RTCDATE);
-	rtc_tm->tm_mon  = readb(info->base + S3C2410_RTCMON);
-	rtc_tm->tm_year = readb(info->base + S3C2410_RTCYEAR);
-	rtc_tm->tm_sec  = readb(info->base + S3C2410_RTCSEC);
+	tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
+	tm->tm_hour = readb(info->base + S3C2410_RTCHOUR);
+	tm->tm_mday = readb(info->base + S3C2410_RTCDATE);
+	tm->tm_mon  = readb(info->base + S3C2410_RTCMON);
+	tm->tm_year = readb(info->base + S3C2410_RTCYEAR);
+	tm->tm_sec  = readb(info->base + S3C2410_RTCSEC);
 
-	/* the only way to work out whether the system was mid-update
+	/*
+	 * The only way to work out whether the system was mid-update
 	 * when we read it is to check the second counter, and if it
 	 * is zero, then we re-try the entire read
 	 */
-
-	if (rtc_tm->tm_sec == 0 && !have_retried) {
+	if (tm->tm_sec == 0 && !have_retried) {
 		have_retried = 1;
 		goto retry_get_time;
 	}
 
-	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
-	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
-	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
-	rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday);
-	rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon);
-	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
-
 	s3c_rtc_disable_clk(info);
 
-	rtc_tm->tm_year += 100;
-	rtc_tm->tm_mon -= 1;
+	tm->tm_sec  = bcd2bin(tm->tm_sec);
+	tm->tm_min  = bcd2bin(tm->tm_min);
+	tm->tm_hour = bcd2bin(tm->tm_hour);
+	tm->tm_mday = bcd2bin(tm->tm_mday);
+	tm->tm_mon  = bcd2bin(tm->tm_mon);
+	tm->tm_year = bcd2bin(tm->tm_year);
 
-	dev_dbg(dev, "read time %ptR\n", rtc_tm);
 	return 0;
 }
 
-static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
+/* Convert time to BCD and write it to RTC */
+static int s3c_rtc_write_time(struct s3c_rtc *info, const struct rtc_time *tm)
 {
-	struct s3c_rtc *info = dev_get_drvdata(dev);
-	int year = tm->tm_year - 100;
 	int ret;
 
-	dev_dbg(dev, "set time %ptR\n", tm);
-
-	/* we get around y2k by simply not supporting it */
-
-	if (year < 0 || year >= 100) {
-		dev_err(dev, "rtc only supports 100 years\n");
-		return -EINVAL;
-	}
-
 	ret = s3c_rtc_enable_clk(info);
 	if (ret)
 		return ret;
@@ -195,14 +180,48 @@
 	writeb(bin2bcd(tm->tm_min),  info->base + S3C2410_RTCMIN);
 	writeb(bin2bcd(tm->tm_hour), info->base + S3C2410_RTCHOUR);
 	writeb(bin2bcd(tm->tm_mday), info->base + S3C2410_RTCDATE);
-	writeb(bin2bcd(tm->tm_mon + 1), info->base + S3C2410_RTCMON);
-	writeb(bin2bcd(year), info->base + S3C2410_RTCYEAR);
+	writeb(bin2bcd(tm->tm_mon),  info->base + S3C2410_RTCMON);
+	writeb(bin2bcd(tm->tm_year), info->base + S3C2410_RTCYEAR);
 
 	s3c_rtc_disable_clk(info);
 
 	return 0;
 }
 
+static int s3c_rtc_gettime(struct device *dev, struct rtc_time *tm)
+{
+	struct s3c_rtc *info = dev_get_drvdata(dev);
+	int ret;
+
+	ret = s3c_rtc_read_time(info, tm);
+	if (ret)
+		return ret;
+
+	/* Convert internal representation to actual date/time */
+	tm->tm_year += 100;
+	tm->tm_mon -= 1;
+
+	dev_dbg(dev, "read time %ptR\n", tm);
+	return 0;
+}
+
+static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
+{
+	struct s3c_rtc *info = dev_get_drvdata(dev);
+	struct rtc_time rtc_tm = *tm;
+
+	dev_dbg(dev, "set time %ptR\n", tm);
+
+	/*
+	 * Convert actual date/time to internal representation.
+	 * We get around Y2K by simply not supporting it.
+	 */
+	rtc_tm.tm_year -= 100;
+	rtc_tm.tm_mon += 1;
+
+	return s3c_rtc_write_time(info, &rtc_tm);
+}
+
 static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
@@ -447,15 +466,20 @@
 
 	device_init_wakeup(&pdev->dev, 1);
 
-	/* register RTC and exit */
-	info->rtc = devm_rtc_device_register(&pdev->dev, "s3c", &s3c_rtcops,
-					     THIS_MODULE);
+	info->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(info->rtc)) {
-		dev_err(&pdev->dev, "cannot attach rtc\n");
 		ret = PTR_ERR(info->rtc);
 		goto err_nortc;
 	}
 
+	info->rtc->ops = &s3c_rtcops;
+	info->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
+	info->rtc->range_max = RTC_TIMESTAMP_END_2099;
+
+	ret = devm_rtc_register_device(info->rtc);
+	if (ret)
+		goto err_nortc;
+
 	ret = devm_request_irq(&pdev->dev, info->irq_alarm, s3c_rtc_alarmirq,
 			       0, "s3c2410-rtc alarm", info);
 	if (ret) {
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index fb9c6b7..4243fe6 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -861,4 +861,3 @@
 MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
 MODULE_DESCRIPTION("Samsung S5M/S2MPS14 RTC driver");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:s5m-rtc");
diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c
index adec1b1..711832c 100644
--- a/drivers/rtc/rtc-sun6i.c
+++ b/drivers/rtc/rtc-sun6i.c
@@ -673,8 +673,17 @@
 	struct sun6i_rtc_dev *chip = sun6i_rtc;
 	int ret;
 
-	if (!chip)
-		return -ENODEV;
+	if (!chip) {
+		chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+
+		spin_lock_init(&chip->lock);
+
+		chip->base = devm_platform_ioremap_resource(pdev, 0);
+		if (IS_ERR(chip->base))
+			return PTR_ERR(chip->base);
+	}
 
 	platform_set_drvdata(pdev, chip);
 
diff --git a/drivers/rtc/rtc-tps80031.c b/drivers/rtc/rtc-tps80031.c
deleted file mode 100644
index c77b8ea..0000000
--- a/drivers/rtc/rtc-tps80031.c
+++ /dev/null
@@ -1,324 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * rtc-tps80031.c -- TI TPS80031/TPS80032 RTC driver
- *
- * RTC driver for TI TPS80031/TPS80032 Fully Integrated
- * Power Management with Power Path and Battery Charger
- *
- * Copyright (c) 2012, NVIDIA Corporation.
- *
- * Author: Laxman Dewangan <ldewangan@nvidia.com>
- */
-
-#include <linux/bcd.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mfd/tps80031.h>
-#include <linux/platform_device.h>
-#include <linux/pm.h>
-#include <linux/rtc.h>
-#include <linux/slab.h>
-
-#define ENABLE_ALARM_INT			0x08
-#define ALARM_INT_STATUS			0x40
-
-/**
- * Setting bit to 1 in STOP_RTC will run the RTC and
- * setting this bit to 0 will freeze RTC.
- */
-#define STOP_RTC				0x1
-
-/* Power on reset Values of RTC registers */
-#define TPS80031_RTC_POR_YEAR			0
-#define TPS80031_RTC_POR_MONTH			1
-#define TPS80031_RTC_POR_DAY			1
-
-/* Numbers of registers for time and alarms */
-#define TPS80031_RTC_TIME_NUM_REGS		7
-#define TPS80031_RTC_ALARM_NUM_REGS		6
-
-/**
- * PMU RTC have only 2 nibbles to store year information, so using an
- * offset of 100 to set the base year as 2000 for our driver.
- */
-#define RTC_YEAR_OFFSET 100
-
-struct tps80031_rtc {
-	struct rtc_device	*rtc;
-	int			irq;
-};
-
-static int tps80031_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	u8 buff[TPS80031_RTC_TIME_NUM_REGS];
-	int ret;
-
-	ret = tps80031_reads(dev->parent, TPS80031_SLAVE_ID1,
-			TPS80031_SECONDS_REG, TPS80031_RTC_TIME_NUM_REGS, buff);
-	if (ret < 0) {
-		dev_err(dev, "reading RTC_SECONDS_REG failed, err = %d\n", ret);
-		return ret;
-	}
-
-	tm->tm_sec = bcd2bin(buff[0]);
-	tm->tm_min = bcd2bin(buff[1]);
-	tm->tm_hour = bcd2bin(buff[2]);
-	tm->tm_mday = bcd2bin(buff[3]);
-	tm->tm_mon = bcd2bin(buff[4]) - 1;
-	tm->tm_year = bcd2bin(buff[5]) + RTC_YEAR_OFFSET;
-	tm->tm_wday = bcd2bin(buff[6]);
-	return 0;
-}
-
-static int tps80031_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	u8 buff[7];
-	int ret;
-
-	buff[0] = bin2bcd(tm->tm_sec);
-	buff[1] = bin2bcd(tm->tm_min);
-	buff[2] = bin2bcd(tm->tm_hour);
-	buff[3] = bin2bcd(tm->tm_mday);
-	buff[4] = bin2bcd(tm->tm_mon + 1);
-	buff[5] = bin2bcd(tm->tm_year % RTC_YEAR_OFFSET);
-	buff[6] = bin2bcd(tm->tm_wday);
-
-	/* Stop RTC while updating the RTC time registers */
-	ret = tps80031_clr_bits(dev->parent, TPS80031_SLAVE_ID1,
-				TPS80031_RTC_CTRL_REG, STOP_RTC);
-	if (ret < 0) {
-		dev_err(dev->parent, "Stop RTC failed, err = %d\n", ret);
-		return ret;
-	}
-
-	ret = tps80031_writes(dev->parent, TPS80031_SLAVE_ID1,
-			TPS80031_SECONDS_REG,
-			TPS80031_RTC_TIME_NUM_REGS, buff);
-	if (ret < 0) {
-		dev_err(dev, "writing RTC_SECONDS_REG failed, err %d\n", ret);
-		return ret;
-	}
-
-	ret = tps80031_set_bits(dev->parent, TPS80031_SLAVE_ID1,
-				TPS80031_RTC_CTRL_REG, STOP_RTC);
-	if (ret < 0)
-		dev_err(dev->parent, "Start RTC failed, err = %d\n", ret);
-	return ret;
-}
-
-static int tps80031_rtc_alarm_irq_enable(struct device *dev,
-					 unsigned int enable)
-{
-	int ret;
-
-	if (enable)
-		ret = tps80031_set_bits(dev->parent, TPS80031_SLAVE_ID1,
-				TPS80031_RTC_INTERRUPTS_REG, ENABLE_ALARM_INT);
-	else
-		ret = tps80031_clr_bits(dev->parent, TPS80031_SLAVE_ID1,
-				TPS80031_RTC_INTERRUPTS_REG, ENABLE_ALARM_INT);
-	if (ret < 0) {
-		dev_err(dev, "Update on RTC_INT failed, err = %d\n", ret);
-		return ret;
-	}
-	return 0;
-}
-
-static int tps80031_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
-{
-	u8 buff[TPS80031_RTC_ALARM_NUM_REGS];
-	int ret;
-
-	buff[0] = bin2bcd(alrm->time.tm_sec);
-	buff[1] = bin2bcd(alrm->time.tm_min);
-	buff[2] = bin2bcd(alrm->time.tm_hour);
-	buff[3] = bin2bcd(alrm->time.tm_mday);
-	buff[4] = bin2bcd(alrm->time.tm_mon + 1);
-	buff[5] = bin2bcd(alrm->time.tm_year % RTC_YEAR_OFFSET);
-	ret = tps80031_writes(dev->parent, TPS80031_SLAVE_ID1,
-			TPS80031_ALARM_SECONDS_REG,
-			TPS80031_RTC_ALARM_NUM_REGS, buff);
-	if (ret < 0) {
-		dev_err(dev, "Writing RTC_ALARM failed, err %d\n", ret);
-		return ret;
-	}
-	return tps80031_rtc_alarm_irq_enable(dev, alrm->enabled);
-}
-
-static int tps80031_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
-{
-	u8 buff[6];
-	int ret;
-
-	ret = tps80031_reads(dev->parent, TPS80031_SLAVE_ID1,
-			TPS80031_ALARM_SECONDS_REG,
-			TPS80031_RTC_ALARM_NUM_REGS, buff);
-	if (ret < 0) {
-		dev_err(dev->parent,
-			"reading RTC_ALARM failed, err = %d\n", ret);
-		return ret;
-	}
-
-	alrm->time.tm_sec = bcd2bin(buff[0]);
-	alrm->time.tm_min = bcd2bin(buff[1]);
-	alrm->time.tm_hour = bcd2bin(buff[2]);
-	alrm->time.tm_mday = bcd2bin(buff[3]);
-	alrm->time.tm_mon = bcd2bin(buff[4]) - 1;
-	alrm->time.tm_year = bcd2bin(buff[5]) + RTC_YEAR_OFFSET;
-	return 0;
-}
-
-static int clear_alarm_int_status(struct device *dev, struct tps80031_rtc *rtc)
-{
-	int ret;
-	u8 buf;
-
-	/**
-	 * As per datasheet, A dummy read of this  RTC_STATUS_REG register
-	 * is necessary before each I2C read in order to update the status
-	 * register value.
-	 */
-	ret = tps80031_read(dev->parent, TPS80031_SLAVE_ID1,
-				TPS80031_RTC_STATUS_REG, &buf);
-	if (ret < 0) {
-		dev_err(dev, "reading RTC_STATUS failed. err = %d\n", ret);
-		return ret;
-	}
-
-	/* clear Alarm status bits.*/
-	ret = tps80031_set_bits(dev->parent, TPS80031_SLAVE_ID1,
-			TPS80031_RTC_STATUS_REG, ALARM_INT_STATUS);
-	if (ret < 0) {
-		dev_err(dev, "clear Alarm INT failed, err = %d\n", ret);
-		return ret;
-	}
-	return 0;
-}
-
-static irqreturn_t tps80031_rtc_irq(int irq, void *data)
-{
-	struct device *dev = data;
-	struct tps80031_rtc *rtc = dev_get_drvdata(dev);
-	int ret;
-
-	ret = clear_alarm_int_status(dev, rtc);
-	if (ret < 0)
-		return ret;
-
-	rtc_update_irq(rtc->rtc, 1, RTC_IRQF | RTC_AF);
-	return IRQ_HANDLED;
-}
-
-static const struct rtc_class_ops tps80031_rtc_ops = {
-	.read_time = tps80031_rtc_read_time,
-	.set_time = tps80031_rtc_set_time,
-	.set_alarm = tps80031_rtc_set_alarm,
-	.read_alarm = tps80031_rtc_read_alarm,
-	.alarm_irq_enable = tps80031_rtc_alarm_irq_enable,
-};
-
-static int tps80031_rtc_probe(struct platform_device *pdev)
-{
-	struct tps80031_rtc *rtc;
-	struct rtc_time tm;
-	int ret;
-
-	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
-	if (!rtc)
-		return -ENOMEM;
-
-	rtc->irq = platform_get_irq(pdev, 0);
-	platform_set_drvdata(pdev, rtc);
-
-	/* Start RTC */
-	ret = tps80031_set_bits(pdev->dev.parent, TPS80031_SLAVE_ID1,
-			TPS80031_RTC_CTRL_REG, STOP_RTC);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to start RTC. err = %d\n", ret);
-		return ret;
-	}
-
-	/* If RTC have POR values, set time 01:01:2000 */
-	tps80031_rtc_read_time(&pdev->dev, &tm);
-	if ((tm.tm_year == RTC_YEAR_OFFSET + TPS80031_RTC_POR_YEAR) &&
-		(tm.tm_mon == (TPS80031_RTC_POR_MONTH - 1)) &&
-		(tm.tm_mday == TPS80031_RTC_POR_DAY)) {
-		tm.tm_year = 2000;
-		tm.tm_mday = 1;
-		tm.tm_mon = 1;
-		ret = tps80031_rtc_set_time(&pdev->dev, &tm);
-		if (ret < 0) {
-			dev_err(&pdev->dev,
-				"RTC set time failed, err = %d\n", ret);
-			return ret;
-		}
-	}
-
-	/* Clear alarm intretupt status if it is there */
-	ret = clear_alarm_int_status(&pdev->dev, rtc);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Clear alarm int failed, err = %d\n", ret);
-		return ret;
-	}
-
-	rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-			       &tps80031_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc->rtc)) {
-		ret = PTR_ERR(rtc->rtc);
-		dev_err(&pdev->dev, "RTC registration failed, err %d\n", ret);
-		return ret;
-	}
-
-	ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL,
-			tps80031_rtc_irq,
-			IRQF_ONESHOT,
-			dev_name(&pdev->dev), rtc);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "request IRQ:%d failed, err = %d\n",
-			 rtc->irq, ret);
-		return ret;
-	}
-	device_set_wakeup_capable(&pdev->dev, 1);
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int tps80031_rtc_suspend(struct device *dev)
-{
-	struct tps80031_rtc *rtc = dev_get_drvdata(dev);
-
-	if (device_may_wakeup(dev))
-		enable_irq_wake(rtc->irq);
-	return 0;
-}
-
-static int tps80031_rtc_resume(struct device *dev)
-{
-	struct tps80031_rtc *rtc = dev_get_drvdata(dev);
-
-	if (device_may_wakeup(dev))
-		disable_irq_wake(rtc->irq);
-	return 0;
-};
-#endif
-
-static SIMPLE_DEV_PM_OPS(tps80031_pm_ops, tps80031_rtc_suspend,
-			tps80031_rtc_resume);
-
-static struct platform_driver tps80031_rtc_driver = {
-	.driver	= {
-		.name	= "tps80031-rtc",
-		.pm	= &tps80031_pm_ops,
-	},
-	.probe	= tps80031_rtc_probe,
-};
-
-module_platform_driver(tps80031_rtc_driver);
-
-MODULE_ALIAS("platform:tps80031-rtc");
-MODULE_DESCRIPTION("TI TPS80031/TPS80032 RTC driver");
-MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 2c40fe1..6043c83 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -731,7 +731,7 @@
 		ff_flag = (devmap->features & DASD_FEATURE_FAILFAST) != 0;
 	else
 		ff_flag = (DASD_FEATURE_DEFAULT & DASD_FEATURE_FAILFAST) != 0;
-	return snprintf(buf, PAGE_SIZE, ff_flag ? "1\n" : "0\n");
+	return sysfs_emit(buf, ff_flag ? "1\n" : "0\n");
 }
 
 static ssize_t dasd_ff_store(struct device *dev, struct device_attribute *attr,
@@ -773,7 +773,7 @@
 	spin_unlock(&dasd_devmap_lock);
 
 out:
-	return snprintf(buf, PAGE_SIZE, ro_flag ? "1\n" : "0\n");
+	return sysfs_emit(buf, ro_flag ? "1\n" : "0\n");
 }
 
 static ssize_t
@@ -834,7 +834,7 @@
 		erplog = (devmap->features & DASD_FEATURE_ERPLOG) != 0;
 	else
 		erplog = (DASD_FEATURE_DEFAULT & DASD_FEATURE_ERPLOG) != 0;
-	return snprintf(buf, PAGE_SIZE, erplog ? "1\n" : "0\n");
+	return sysfs_emit(buf, erplog ? "1\n" : "0\n");
 }
 
 static ssize_t
@@ -1033,13 +1033,13 @@
 		dasd_put_device(device);
 		goto out;
 	} else {
-		len = snprintf(buf, PAGE_SIZE, "%s\n",
-			       device->discipline->name);
+		len = sysfs_emit(buf, "%s\n",
+				 device->discipline->name);
 		dasd_put_device(device);
 		return len;
 	}
 out:
-	len = snprintf(buf, PAGE_SIZE, "none\n");
+	len = sysfs_emit(buf, "none\n");
 	return len;
 }
 
@@ -1056,30 +1056,30 @@
 	if (!IS_ERR(device)) {
 		switch (device->state) {
 		case DASD_STATE_NEW:
-			len = snprintf(buf, PAGE_SIZE, "new\n");
+			len = sysfs_emit(buf, "new\n");
 			break;
 		case DASD_STATE_KNOWN:
-			len = snprintf(buf, PAGE_SIZE, "detected\n");
+			len = sysfs_emit(buf, "detected\n");
 			break;
 		case DASD_STATE_BASIC:
-			len = snprintf(buf, PAGE_SIZE, "basic\n");
+			len = sysfs_emit(buf, "basic\n");
 			break;
 		case DASD_STATE_UNFMT:
-			len = snprintf(buf, PAGE_SIZE, "unformatted\n");
+			len = sysfs_emit(buf, "unformatted\n");
 			break;
 		case DASD_STATE_READY:
-			len = snprintf(buf, PAGE_SIZE, "ready\n");
+			len = sysfs_emit(buf, "ready\n");
 			break;
 		case DASD_STATE_ONLINE:
-			len = snprintf(buf, PAGE_SIZE, "online\n");
+			len = sysfs_emit(buf, "online\n");
 			break;
 		default:
-			len = snprintf(buf, PAGE_SIZE, "no stat\n");
+			len = sysfs_emit(buf, "no stat\n");
 			break;
 		}
 		dasd_put_device(device);
 	} else
-		len = snprintf(buf, PAGE_SIZE, "unknown\n");
+		len = sysfs_emit(buf, "unknown\n");
 	return len;
 }
 
@@ -1120,7 +1120,7 @@
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	vendor = "";
 	if (IS_ERR(device))
-		return snprintf(buf, PAGE_SIZE, "%s\n", vendor);
+		return sysfs_emit(buf, "%s\n", vendor);
 
 	if (device->discipline && device->discipline->get_uid &&
 	    !device->discipline->get_uid(device, &uid))
@@ -1128,7 +1128,7 @@
 
 	dasd_put_device(device);
 
-	return snprintf(buf, PAGE_SIZE, "%s\n", vendor);
+	return sysfs_emit(buf, "%s\n", vendor);
 }
 
 static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL);
@@ -1148,7 +1148,7 @@
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	uid_string[0] = 0;
 	if (IS_ERR(device))
-		return snprintf(buf, PAGE_SIZE, "%s\n", uid_string);
+		return sysfs_emit(buf, "%s\n", uid_string);
 
 	if (device->discipline && device->discipline->get_uid &&
 	    !device->discipline->get_uid(device, &uid)) {
@@ -1183,7 +1183,7 @@
 	}
 	dasd_put_device(device);
 
-	return snprintf(buf, PAGE_SIZE, "%s\n", uid_string);
+	return sysfs_emit(buf, "%s\n", uid_string);
 }
 static DEVICE_ATTR(uid, 0444, dasd_uid_show, NULL);
 
@@ -1201,7 +1201,7 @@
 		eer_flag = dasd_eer_enabled(devmap->device);
 	else
 		eer_flag = 0;
-	return snprintf(buf, PAGE_SIZE, eer_flag ? "1\n" : "0\n");
+	return sysfs_emit(buf, eer_flag ? "1\n" : "0\n");
 }
 
 static ssize_t
@@ -1243,7 +1243,7 @@
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	if (IS_ERR(device))
 		return -ENODEV;
-	len = snprintf(buf, PAGE_SIZE, "%lu\n", device->default_expires);
+	len = sysfs_emit(buf, "%lu\n", device->default_expires);
 	dasd_put_device(device);
 	return len;
 }
@@ -1283,7 +1283,7 @@
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	if (IS_ERR(device))
 		return -ENODEV;
-	len = snprintf(buf, PAGE_SIZE, "%lu\n", device->default_retries);
+	len = sysfs_emit(buf, "%lu\n", device->default_retries);
 	dasd_put_device(device);
 	return len;
 }
@@ -1324,7 +1324,7 @@
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	if (IS_ERR(device))
 		return -ENODEV;
-	len = snprintf(buf, PAGE_SIZE, "%lu\n", device->blk_timeout);
+	len = sysfs_emit(buf, "%lu\n", device->blk_timeout);
 	dasd_put_device(device);
 	return len;
 }
@@ -1398,11 +1398,11 @@
 		return -ENODEV;
 	if (!device->discipline || !device->discipline->hpf_enabled) {
 		dasd_put_device(device);
-		return snprintf(buf, PAGE_SIZE, "%d\n", dasd_nofcx);
+		return sysfs_emit(buf, "%d\n", dasd_nofcx);
 	}
 	hpf = device->discipline->hpf_enabled(device);
 	dasd_put_device(device);
-	return snprintf(buf, PAGE_SIZE, "%d\n", hpf);
+	return sysfs_emit(buf, "%d\n", hpf);
 }
 
 static DEVICE_ATTR(hpf, 0444, dasd_hpf_show, NULL);
@@ -1416,13 +1416,13 @@
 
 	devmap = dasd_find_busid(dev_name(dev));
 	if (IS_ERR(devmap)) {
-		rc = snprintf(buf, PAGE_SIZE, "ignore\n");
+		rc = sysfs_emit(buf, "ignore\n");
 	} else {
 		spin_lock(&dasd_devmap_lock);
 		if (devmap->features & DASD_FEATURE_FAILONSLCK)
-			rc = snprintf(buf, PAGE_SIZE, "fail\n");
+			rc = sysfs_emit(buf, "fail\n");
 		else
-			rc = snprintf(buf, PAGE_SIZE, "ignore\n");
+			rc = sysfs_emit(buf, "ignore\n");
 		spin_unlock(&dasd_devmap_lock);
 	}
 	return rc;
@@ -1457,14 +1457,14 @@
 
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	if (IS_ERR(device))
-		return snprintf(buf, PAGE_SIZE, "none\n");
+		return sysfs_emit(buf, "none\n");
 
 	if (test_bit(DASD_FLAG_IS_RESERVED, &device->flags))
-		rc = snprintf(buf, PAGE_SIZE, "reserved\n");
+		rc = sysfs_emit(buf, "reserved\n");
 	else if (test_bit(DASD_FLAG_LOCK_STOLEN, &device->flags))
-		rc = snprintf(buf, PAGE_SIZE, "lost\n");
+		rc = sysfs_emit(buf, "lost\n");
 	else
-		rc = snprintf(buf, PAGE_SIZE, "none\n");
+		rc = sysfs_emit(buf, "none\n");
 	dasd_put_device(device);
 	return rc;
 }
@@ -1531,7 +1531,7 @@
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	if (IS_ERR(device))
 		return -ENODEV;
-	len = snprintf(buf, PAGE_SIZE, "%lu\n", device->path_thrhld);
+	len = sysfs_emit(buf, "%lu\n", device->path_thrhld);
 	dasd_put_device(device);
 	return len;
 }
@@ -1578,7 +1578,7 @@
 	else
 		flag = (DASD_FEATURE_DEFAULT &
 			DASD_FEATURE_PATH_AUTODISABLE) != 0;
-	return snprintf(buf, PAGE_SIZE, flag ? "1\n" : "0\n");
+	return sysfs_emit(buf, flag ? "1\n" : "0\n");
 }
 
 static ssize_t
@@ -1616,7 +1616,7 @@
 	device = dasd_device_from_cdev(to_ccwdev(dev));
 	if (IS_ERR(device))
 		return -ENODEV;
-	len = snprintf(buf, PAGE_SIZE, "%lu\n", device->path_interval);
+	len = sysfs_emit(buf, "%lu\n", device->path_interval);
 	dasd_put_device(device);
 	return len;
 }
@@ -1662,9 +1662,9 @@
 		return -ENODEV;
 	fc_sec = dasd_path_get_fcs_device(device);
 	if (fc_sec == -EINVAL)
-		rc = snprintf(buf, PAGE_SIZE, "Inconsistent\n");
+		rc = sysfs_emit(buf, "Inconsistent\n");
 	else
-		rc = snprintf(buf, PAGE_SIZE, "%s\n", dasd_path_get_fcs_str(fc_sec));
+		rc = sysfs_emit(buf, "%s\n", dasd_path_get_fcs_str(fc_sec));
 	dasd_put_device(device);
 
 	return rc;
@@ -1677,7 +1677,7 @@
 	struct dasd_path *path = to_dasd_path(kobj);
 	unsigned int fc_sec = path->fc_security;
 
-	return snprintf(buf, PAGE_SIZE, "%s\n", dasd_path_get_fcs_str(fc_sec));
+	return sysfs_emit(buf, "%s\n", dasd_path_get_fcs_str(fc_sec));
 }
 
 static struct kobj_attribute path_fcs_attribute =
@@ -1698,7 +1698,7 @@
 		val = _func(device);					\
 	dasd_put_device(device);					\
 									\
-	return snprintf(buf, PAGE_SIZE, "%d\n", val);			\
+	return sysfs_emit(buf, "%d\n", val);			\
 }									\
 static DEVICE_ATTR(_name, 0444, dasd_##_name##_show, NULL);		\
 
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 646ec796..dfde0d9 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -1047,24 +1047,24 @@
 static ssize_t
 raw3270_model_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%i\n",
-			((struct raw3270 *) dev_get_drvdata(dev))->model);
+	return sysfs_emit(buf, "%i\n",
+			  ((struct raw3270 *)dev_get_drvdata(dev))->model);
 }
 static DEVICE_ATTR(model, 0444, raw3270_model_show, NULL);
 
 static ssize_t
 raw3270_rows_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%i\n",
-			((struct raw3270 *) dev_get_drvdata(dev))->rows);
+	return sysfs_emit(buf, "%i\n",
+			  ((struct raw3270 *)dev_get_drvdata(dev))->rows);
 }
 static DEVICE_ATTR(rows, 0444, raw3270_rows_show, NULL);
 
 static ssize_t
 raw3270_columns_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%i\n",
-			((struct raw3270 *) dev_get_drvdata(dev))->cols);
+	return sysfs_emit(buf, "%i\n",
+			  ((struct raw3270 *)dev_get_drvdata(dev))->cols);
 }
 static DEVICE_ATTR(columns, 0444, raw3270_columns_show, NULL);
 
diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c
index 1f5fab6..f7e75d9 100644
--- a/drivers/s390/char/tape_std.c
+++ b/drivers/s390/char/tape_std.c
@@ -53,7 +53,6 @@
 tape_std_assign(struct tape_device *device)
 {
 	int                  rc;
-	struct timer_list    timeout;
 	struct tape_request *request;
 
 	request = tape_alloc_request(2, 11);
@@ -70,7 +69,7 @@
 	 * So we set up a timeout for this call.
 	 */
 	timer_setup(&request->timer, tape_std_assign_timeout, 0);
-	mod_timer(&timeout, jiffies + 2 * HZ);
+	mod_timer(&request->timer, jiffies + msecs_to_jiffies(2000));
 
 	rc = tape_do_io_interruptible(device, request);
 
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 1097e76..5440f28 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -285,7 +285,7 @@
 	if (status < 0)
 		return status;
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", status);
+	return sysfs_emit(buf, "%d\n", status);
 }
 
 static int cfg_wait_idle(void);
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 2bc55cc..ce9e751 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -437,8 +437,8 @@
 	struct subchannel *sch = to_subchannel(dev);
 	struct pmcw *pmcw = &sch->schib.pmcw;
 
-	if ((pmcw->st == SUBCHANNEL_TYPE_IO ||
-	     pmcw->st == SUBCHANNEL_TYPE_MSG) && pmcw->dnv)
+	if ((pmcw->st == SUBCHANNEL_TYPE_IO && pmcw->dnv) ||
+	    (pmcw->st == SUBCHANNEL_TYPE_MSG && pmcw->w))
 		return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid,
 				  pmcw->dev);
 	else
diff --git a/drivers/scsi/elx/libefc_sli/sli4.c b/drivers/scsi/elx/libefc_sli/sli4.c
index 6c6c04e..907d67a 100644
--- a/drivers/scsi/elx/libefc_sli/sli4.c
+++ b/drivers/scsi/elx/libefc_sli/sli4.c
@@ -4145,7 +4145,7 @@
 sli_get_read_config(struct sli4 *sli4)
 {
 	struct sli4_rsp_read_config *conf = sli4->bmbx.virt;
-	u32 i, total, total_size;
+	u32 i, total;
 	u32 *base;
 
 	if (sli_cmd_read_config(sli4, sli4->bmbx.virt)) {
@@ -4203,8 +4203,7 @@
 
 	for (i = 0; i < SLI4_RSRC_MAX; i++) {
 		total = sli4->ext[i].number * sli4->ext[i].size;
-		total_size = BITS_TO_LONGS(total) * sizeof(long);
-		sli4->ext[i].use_map = kzalloc(total_size, GFP_KERNEL);
+		sli4->ext[i].use_map = bitmap_zalloc(total, GFP_KERNEL);
 		if (!sli4->ext[i].use_map) {
 			efc_log_err(sli4, "bitmap memory allocation failed %d\n",
 				    i);
@@ -4743,7 +4742,7 @@
 	sli4->ext[0].base = NULL;
 
 	for (i = 0; i < SLI4_RSRC_MAX; i++) {
-		kfree(sli4->ext[i].use_map);
+		bitmap_free(sli4->ext[i].use_map);
 		sli4->ext[i].use_map = NULL;
 		sli4->ext[i].base = NULL;
 	}
@@ -4784,7 +4783,7 @@
 	for (i = 0; i < SLI4_RSRC_MAX; i++) {
 		sli4->ext[i].base = NULL;
 
-		kfree(sli4->ext[i].use_map);
+		bitmap_free(sli4->ext[i].use_map);
 		sli4->ext[i].use_map = NULL;
 	}
 
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index cda0135..8049b00 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -388,6 +388,7 @@
 	shost->shost_state = SHOST_CREATED;
 	INIT_LIST_HEAD(&shost->__devices);
 	INIT_LIST_HEAD(&shost->__targets);
+	INIT_LIST_HEAD(&shost->eh_abort_list);
 	INIT_LIST_HEAD(&shost->eh_cmd_q);
 	INIT_LIST_HEAD(&shost->starved_list);
 	init_waitqueue_head(&shost->host_wait);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 27eb652..81dab9b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -639,8 +639,8 @@
 	mpi_request->IOCParameter = MPI26_SET_IOC_PARAMETER_SYNC_TIMESTAMP;
 	current_time = ktime_get_real();
 	TimeStamp = ktime_to_ms(current_time);
-	mpi_request->Reserved7 = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
-	mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp >> 32);
+	mpi_request->Reserved7 = cpu_to_le32(TimeStamp >> 32);
+	mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
 	init_completion(&ioc->scsih_cmds.done);
 	ioc->put_smid_default(ioc, smid);
 	dinitprintk(ioc, ioc_info(ioc,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index db6a759..a0af986 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -142,6 +142,8 @@
 
 #define MPT_MAX_CALLBACKS		32
 
+#define MPT_MAX_HBA_NUM_PHYS		32
+
 #define INTERNAL_CMDS_COUNT		10	/* reserved cmds */
 /* reserved for issuing internally framed scsi io cmds */
 #define INTERNAL_SCSIIO_CMDS_COUNT	3
@@ -798,6 +800,7 @@
  * @enclosure_handle: handle for this a member of an enclosure
  * @device_info: bitwise defining capabilities of this sas_host/expander
  * @responding: used in _scsih_expander_device_mark_responding
+ * @nr_phys_allocated: Allocated memory for this many count phys
  * @phy: a list of phys that make up this sas_host/expander
  * @sas_port_list: list of ports attached to this sas_host/expander
  * @port: hba port entry containing node's port number info
@@ -813,6 +816,7 @@
 	u16	enclosure_handle;
 	u64	enclosure_logical_id;
 	u8	responding;
+	u8	nr_phys_allocated;
 	struct hba_port *port;
 	struct	_sas_phy *phy;
 	struct list_head sas_port_list;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index cee7170b..00792767 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3869,7 +3869,7 @@
 
 	shost_for_each_device(sdev, ioc->shost) {
 		sas_device_priv_data = sdev->hostdata;
-		if (!sas_device_priv_data)
+		if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
 			continue;
 		if (sas_device_priv_data->sas_target->sas_address
 		    != sas_address)
@@ -6406,11 +6406,26 @@
 	int i, j, count = 0, lcount = 0;
 	int ret;
 	u64 sas_addr;
+	u8 num_phys;
 
 	drsprintk(ioc, ioc_info(ioc,
 	    "updating ports for sas_host(0x%016llx)\n",
 	    (unsigned long long)ioc->sas_hba.sas_address));
 
+	mpt3sas_config_get_number_hba_phys(ioc, &num_phys);
+	if (!num_phys) {
+		ioc_err(ioc, "failure at %s:%d/%s()!\n",
+		    __FILE__, __LINE__, __func__);
+		return;
+	}
+
+	if (num_phys > ioc->sas_hba.nr_phys_allocated) {
+		ioc_err(ioc, "failure at %s:%d/%s()!\n",
+		   __FILE__, __LINE__, __func__);
+		return;
+	}
+	ioc->sas_hba.num_phys = num_phys;
+
 	port_table = kcalloc(ioc->sas_hba.num_phys,
 	    sizeof(struct hba_port), GFP_KERNEL);
 	if (!port_table)
@@ -6611,6 +6626,30 @@
 			ioc->sas_hba.phy[i].hba_vphy = 1;
 		}
 
+		/*
+		 * Add new HBA phys to STL if these new phys got added as part
+		 * of HBA Firmware upgrade/downgrade operation.
+		 */
+		if (!ioc->sas_hba.phy[i].phy) {
+			if ((mpt3sas_config_get_phy_pg0(ioc, &mpi_reply,
+							&phy_pg0, i))) {
+				ioc_err(ioc, "failure at %s:%d/%s()!\n",
+					__FILE__, __LINE__, __func__);
+				continue;
+			}
+			ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
+				MPI2_IOCSTATUS_MASK;
+			if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
+				ioc_err(ioc, "failure at %s:%d/%s()!\n",
+					__FILE__, __LINE__, __func__);
+				continue;
+			}
+			ioc->sas_hba.phy[i].phy_id = i;
+			mpt3sas_transport_add_host_phy(ioc,
+				&ioc->sas_hba.phy[i], phy_pg0,
+				ioc->sas_hba.parent_dev);
+			continue;
+		}
 		ioc->sas_hba.phy[i].handle = ioc->sas_hba.handle;
 		attached_handle = le16_to_cpu(sas_iounit_pg0->PhyData[i].
 		    AttachedDevHandle);
@@ -6622,6 +6661,19 @@
 		    attached_handle, i, link_rate,
 		    ioc->sas_hba.phy[i].port);
 	}
+	/*
+	 * Clear the phy details if this phy got disabled as part of
+	 * HBA Firmware upgrade/downgrade operation.
+	 */
+	for (i = ioc->sas_hba.num_phys;
+	     i < ioc->sas_hba.nr_phys_allocated; i++) {
+		if (ioc->sas_hba.phy[i].phy &&
+		    ioc->sas_hba.phy[i].phy->negotiated_linkrate >=
+		    SAS_LINK_RATE_1_5_GBPS)
+			mpt3sas_transport_update_links(ioc,
+				ioc->sas_hba.sas_address, 0, i,
+				MPI2_SAS_NEG_LINK_RATE_PHY_DISABLED, NULL);
+	}
  out:
 	kfree(sas_iounit_pg0);
 }
@@ -6654,7 +6706,10 @@
 			__FILE__, __LINE__, __func__);
 		return;
 	}
-	ioc->sas_hba.phy = kcalloc(num_phys,
+
+	ioc->sas_hba.nr_phys_allocated = max_t(u8,
+	    MPT_MAX_HBA_NUM_PHYS, num_phys);
+	ioc->sas_hba.phy = kcalloc(ioc->sas_hba.nr_phys_allocated,
 	    sizeof(struct _sas_phy), GFP_KERNEL);
 	if (!ioc->sas_hba.phy) {
 		ioc_err(ioc, "failure at %s:%d/%s()!\n",
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 30f9545..032efb2 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2762,7 +2762,12 @@
 			if (fcport->loop_id != FC_NO_LOOP_ID)
 				fcport->logout_on_delete = 1;
 
-			qlt_schedule_sess_for_deletion(fcport);
+			if (!EDIF_NEGOTIATION_PENDING(fcport)) {
+				ql_dbg(ql_dbg_disc, fcport->vha, 0x911e,
+				       "%s %d schedule session deletion\n", __func__,
+				       __LINE__);
+				qlt_schedule_sess_for_deletion(fcport);
+			}
 		} else {
 			qla2x00_port_logout(fcport->vha, fcport);
 		}
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 8924eeb9..9ebf4a2 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -639,9 +639,9 @@
 	u8 els_opcode;
 	u8 vp_idx;
 	__le16 nport_handle;
-	u16 control_flags;
+	u16 control_flags, ox_id;
 	__le32 rx_xchg_address;
-	port_id_t did;
+	port_id_t did, sid;
 	u32 tx_len, tx_byte_count, rx_len, rx_byte_count;
 	dma_addr_t tx_addr, rx_addr;
 
diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
index ad746c6..53d2b85 100644
--- a/drivers/scsi/qla2xxx/qla_edif.c
+++ b/drivers/scsi/qla2xxx/qla_edif.c
@@ -218,7 +218,7 @@
 		    "%s edif not enabled\n", __func__);
 		goto done;
 	}
-	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+	if (DBELL_INACTIVE(vha)) {
 		ql_dbg(ql_dbg_edif, vha, 0x09102,
 		    "%s doorbell not enabled\n", __func__);
 		goto done;
@@ -290,63 +290,6 @@
 	return false;
 }
 
-static void qla_edif_reset_auth_wait(struct fc_port *fcport, int state,
-		int waitonly)
-{
-	int cnt, max_cnt = 200;
-	bool traced = false;
-
-	fcport->keep_nport_handle = 1;
-
-	if (!waitonly) {
-		qla2x00_set_fcport_disc_state(fcport, state);
-		qlt_schedule_sess_for_deletion(fcport);
-	} else {
-		qla2x00_set_fcport_disc_state(fcport, state);
-	}
-
-	ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
-		"%s: waiting for session, max_cnt=%u\n",
-		__func__, max_cnt);
-
-	cnt = 0;
-
-	if (waitonly) {
-		/* Marker wait min 10 msecs. */
-		msleep(50);
-		cnt += 50;
-	}
-	while (1) {
-		if (!traced) {
-			ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
-			    "%s: session sleep.\n",
-			    __func__);
-			traced = true;
-		}
-		msleep(20);
-		cnt++;
-		if (waitonly && (fcport->disc_state == state ||
-			fcport->disc_state == DSC_LOGIN_COMPLETE))
-			break;
-		if (fcport->disc_state == DSC_LOGIN_AUTH_PEND)
-			break;
-		if (cnt > max_cnt)
-			break;
-	}
-
-	if (!waitonly) {
-		ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
-		    "%s: waited for session - %8phC, loopid=%x portid=%06x fcport=%p state=%u, cnt=%u\n",
-		    __func__, fcport->port_name, fcport->loop_id,
-		    fcport->d_id.b24, fcport, fcport->disc_state, cnt);
-	} else {
-		ql_dbg(ql_dbg_edif, fcport->vha, 0xf086,
-		    "%s: waited ONLY for session - %8phC, loopid=%x portid=%06x fcport=%p state=%u, cnt=%u\n",
-		    __func__, fcport->port_name, fcport->loop_id,
-		    fcport->d_id.b24, fcport, fcport->disc_state, cnt);
-	}
-}
-
 static void
 qla_edif_free_sa_ctl(fc_port_t *fcport, struct edif_sa_ctl *sa_ctl,
 	int index)
@@ -529,7 +472,8 @@
 	struct app_start_reply	appreply;
 	struct fc_port  *fcport, *tf;
 
-	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app start\n", __func__);
+	ql_log(ql_log_info, vha, 0x1313,
+	       "EDIF application registration with driver, FC device connections will be re-established.\n");
 
 	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
 	    bsg_job->request_payload.sg_cnt, &appstart,
@@ -538,9 +482,9 @@
 	ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app_vid=%x app_start_flags %x\n",
 	     __func__, appstart.app_info.app_vid, appstart.app_start_flags);
 
-	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+	if (DBELL_INACTIVE(vha)) {
 		/* mark doorbell as active since an app is now present */
-		vha->e_dbell.db_flags = EDB_ACTIVE;
+		vha->e_dbell.db_flags |= EDB_ACTIVE;
 	} else {
 		ql_dbg(ql_dbg_edif, vha, 0x911e, "%s doorbell already active\n",
 		     __func__);
@@ -554,37 +498,36 @@
 		qla2xxx_wake_dpc(vha);
 	} else {
 		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
+			ql_dbg(ql_dbg_edif, vha, 0x2058,
+			       "FCSP - nn %8phN pn %8phN portid=%06x.\n",
+			       fcport->node_name, fcport->port_name,
+			       fcport->d_id.b24);
 			ql_dbg(ql_dbg_edif, vha, 0xf084,
-			       "%s: sess %p %8phC lid %#04x s_id %06x logout %d\n",
-			       __func__, fcport, fcport->port_name,
-			       fcport->loop_id, fcport->d_id.b24,
-			       fcport->logout_on_delete);
-
-			ql_dbg(ql_dbg_edif, vha, 0xf084,
-			       "keep %d els_logo %d disc state %d auth state %d stop state %d\n",
-			       fcport->keep_nport_handle,
-			       fcport->send_els_logo, fcport->disc_state,
-			       fcport->edif.auth_state, fcport->edif.app_stop);
+			       "%s: se_sess %p / sess %p from port %8phC "
+			       "loop_id %#04x s_id %06x logout %d "
+			       "keep %d els_logo %d disc state %d auth state %d"
+			       "stop state %d\n",
+			       __func__, fcport->se_sess, fcport,
+			       fcport->port_name, fcport->loop_id,
+			       fcport->d_id.b24, fcport->logout_on_delete,
+			       fcport->keep_nport_handle, fcport->send_els_logo,
+			       fcport->disc_state, fcport->edif.auth_state,
+			       fcport->edif.app_stop);
 
 			if (atomic_read(&vha->loop_state) == LOOP_DOWN)
 				break;
-			if (!(fcport->flags & FCF_FCSP_DEVICE))
-				continue;
 
 			fcport->edif.app_started = 1;
-			if (fcport->edif.app_stop ||
-			    (fcport->disc_state != DSC_LOGIN_COMPLETE &&
-			     fcport->disc_state != DSC_LOGIN_PEND &&
-			     fcport->disc_state != DSC_DELETED)) {
-				/* no activity */
-				fcport->edif.app_stop = 0;
+			fcport->login_retry = vha->hw->login_retry_count;
 
-				ql_dbg(ql_dbg_edif, vha, 0x911e,
-				       "%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
-				       __func__, fcport->port_name);
-				fcport->edif.app_sess_online = 1;
-				qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 0);
-			}
+			/* no activity */
+			fcport->edif.app_stop = 0;
+
+			ql_dbg(ql_dbg_edif, vha, 0x911e,
+			       "%s wwpn %8phC calling qla_edif_reset_auth_wait\n",
+			       __func__, fcport->port_name);
+			fcport->edif.app_sess_online = 0;
+			qlt_schedule_sess_for_deletion(fcport);
 			qla_edif_sa_ctl_init(vha, fcport);
 		}
 	}
@@ -601,14 +544,14 @@
 	appreply.edif_enode_active = vha->pur_cinfo.enode_flags;
 	appreply.edif_edb_active = vha->e_dbell.db_flags;
 
-	bsg_job->reply_len = sizeof(struct fc_bsg_reply) +
-	    sizeof(struct app_start_reply);
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
 
 	SET_DID_STATUS(bsg_reply->result, DID_OK);
 
-	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
-	    bsg_job->reply_payload.sg_cnt, &appreply,
-	    sizeof(struct app_start_reply));
+	bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+							       bsg_job->reply_payload.sg_cnt,
+							       &appreply,
+							       sizeof(struct app_start_reply));
 
 	ql_dbg(ql_dbg_edif, vha, 0x911d,
 	    "%s app start completed with 0x%x\n",
@@ -800,15 +743,15 @@
 		ql_dbg(ql_dbg_edif, vha, 0x911e,
 		    "%s AUTH complete - RESUME with prli for wwpn %8phC\n",
 		    __func__, fcport->port_name);
-		qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 1);
 		qla24xx_post_prli_work(vha, fcport);
 	}
 
 errstate_exit:
 	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
-	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
-	    bsg_job->reply_payload.sg_cnt, &appplogireply,
-	    sizeof(struct app_plogi_reply));
+	bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+							       bsg_job->reply_payload.sg_cnt,
+							       &appplogireply,
+							       sizeof(struct app_plogi_reply));
 
 	return rval;
 }
@@ -873,7 +816,7 @@
 
 		if (qla_ini_mode_enabled(fcport->vha)) {
 			fcport->send_els_logo = 1;
-			qla_edif_reset_auth_wait(fcport, DSC_LOGIN_PEND, 0);
+			qlt_schedule_sess_for_deletion(fcport);
 		}
 	}
 
@@ -891,7 +834,7 @@
 qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
 {
 	int32_t			rval = 0;
-	int32_t			num_cnt;
+	int32_t			pcnt = 0;
 	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
 	struct app_pinfo_req	app_req;
 	struct app_pinfo_reply	*app_reply;
@@ -903,16 +846,14 @@
 	    bsg_job->request_payload.sg_cnt, &app_req,
 	    sizeof(struct app_pinfo_req));
 
-	num_cnt = app_req.num_ports;	/* num of ports alloc'd by app */
-
 	app_reply = kzalloc((sizeof(struct app_pinfo_reply) +
-	    sizeof(struct app_pinfo) * num_cnt), GFP_KERNEL);
+	    sizeof(struct app_pinfo) * app_req.num_ports), GFP_KERNEL);
+
 	if (!app_reply) {
 		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
 		rval = -1;
 	} else {
 		struct fc_port	*fcport = NULL, *tf;
-		uint32_t	pcnt = 0;
 
 		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
 			if (!(fcport->flags & FCF_FCSP_DEVICE))
@@ -924,7 +865,7 @@
 			    "APP request entry - portid=%06x.\n", tdid.b24);
 
 			/* Ran out of space */
-			if (pcnt > app_req.num_ports)
+			if (pcnt >= app_req.num_ports)
 				break;
 
 			if (tdid.b24 != 0 && tdid.b24 != fcport->d_id.b24)
@@ -981,9 +922,11 @@
 		SET_DID_STATUS(bsg_reply->result, DID_OK);
 	}
 
-	sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
-	    bsg_job->reply_payload.sg_cnt, app_reply,
-	    sizeof(struct app_pinfo_reply) + sizeof(struct app_pinfo) * num_cnt);
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+	bsg_reply->reply_payload_rcv_len = sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
+							       bsg_job->reply_payload.sg_cnt,
+							       app_reply,
+							       sizeof(struct app_pinfo_reply) + sizeof(struct app_pinfo) * pcnt);
 
 	kfree(app_reply);
 
@@ -1000,10 +943,11 @@
 {
 	int32_t			rval = 0;
 	struct fc_bsg_reply	*bsg_reply = bsg_job->reply;
-	uint32_t ret_size, size;
+	uint32_t size;
 
 	struct app_sinfo_req	app_req;
 	struct app_stats_reply	*app_reply;
+	uint32_t pcnt = 0;
 
 	sg_copy_to_buffer(bsg_job->request_payload.sg_list,
 	    bsg_job->request_payload.sg_cnt, &app_req,
@@ -1019,18 +963,12 @@
 	size = sizeof(struct app_stats_reply) +
 	    (sizeof(struct app_sinfo) * app_req.num_ports);
 
-	if (size > bsg_job->reply_payload.payload_len)
-		ret_size = bsg_job->reply_payload.payload_len;
-	else
-		ret_size = size;
-
 	app_reply = kzalloc(size, GFP_KERNEL);
 	if (!app_reply) {
 		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
 		rval = -1;
 	} else {
 		struct fc_port	*fcport = NULL, *tf;
-		uint32_t	pcnt = 0;
 
 		list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
 			if (fcport->edif.enable) {
@@ -1054,9 +992,11 @@
 		SET_DID_STATUS(bsg_reply->result, DID_OK);
 	}
 
+	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
 	bsg_reply->reply_payload_rcv_len =
 	    sg_copy_from_buffer(bsg_job->reply_payload.sg_list,
-	       bsg_job->reply_payload.sg_cnt, app_reply, ret_size);
+	       bsg_job->reply_payload.sg_cnt, app_reply,
+	       sizeof(struct app_stats_reply) + (sizeof(struct app_sinfo) * pcnt));
 
 	kfree(app_reply);
 
@@ -1130,8 +1070,7 @@
 		    __func__,
 		    bsg_request->rqst_data.h_vendor.vendor_cmd[1]);
 		rval = EXT_STATUS_INVALID_PARAM;
-		bsg_job->reply_len = sizeof(struct fc_bsg_reply);
-		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
+		done = false;
 		break;
 	}
 
@@ -1330,7 +1269,7 @@
 		goto done;
 	}
 
-	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+	if (DBELL_INACTIVE(vha)) {
 		ql_log(ql_log_warn, vha, 0x70a1, "App not started\n");
 		rval = -EIO;
 		SET_DID_STATUS(bsg_reply->result, DID_ERROR);
@@ -1651,6 +1590,40 @@
 	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
 }
 
+static void qla_enode_clear(scsi_qla_host_t *vha, port_id_t portid)
+{
+	unsigned    long flags;
+	struct enode    *e, *tmp;
+	struct purexevent   *purex;
+	LIST_HEAD(enode_list);
+
+	if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) {
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		       "%s enode not active\n", __func__);
+		return;
+	}
+	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
+	list_for_each_entry_safe(e, tmp, &vha->pur_cinfo.head, list) {
+		purex = &e->u.purexinfo;
+		if (purex->pur_info.pur_sid.b24 == portid.b24) {
+			ql_dbg(ql_dbg_edif, vha, 0x911d,
+			    "%s free ELS sid=%06x. xchg %x, nb=%xh\n",
+			    __func__, portid.b24,
+			    purex->pur_info.pur_rx_xchg_address,
+			    purex->pur_info.pur_bytes_rcvd);
+
+			list_del_init(&e->list);
+			list_add_tail(&e->list, &enode_list);
+		}
+	}
+	spin_unlock_irqrestore(&vha->pur_cinfo.pur_lock, flags);
+
+	list_for_each_entry_safe(e, tmp, &enode_list, list) {
+		list_del_init(&e->list);
+		qla_enode_free(vha, e);
+	}
+}
+
 /*
  *  allocate enode struct and populate buffer
  *  returns: enode pointer with buffers
@@ -1695,41 +1668,25 @@
 qla_enode_find(scsi_qla_host_t *vha, uint32_t ntype, uint32_t p1, uint32_t p2)
 {
 	struct enode		*node_rtn = NULL;
-	struct enode		*list_node = NULL;
+	struct enode		*list_node, *q;
 	unsigned long		flags;
-	struct list_head	*pos, *q;
 	uint32_t		sid;
-	uint32_t		rw_flag;
 	struct purexevent	*purex;
 
 	/* secure the list from moving under us */
 	spin_lock_irqsave(&vha->pur_cinfo.pur_lock, flags);
 
-	list_for_each_safe(pos, q, &vha->pur_cinfo.head) {
-		list_node = list_entry(pos, struct enode, list);
+	list_for_each_entry_safe(list_node, q, &vha->pur_cinfo.head, list) {
 
 		/* node type determines what p1 and p2 are */
 		purex = &list_node->u.purexinfo;
 		sid = p1;
-		rw_flag = p2;
 
 		if (purex->pur_info.pur_sid.b24 == sid) {
-			if (purex->pur_info.pur_pend == 1 &&
-			    rw_flag == PUR_GET) {
-				/*
-				 * if the receive is in progress
-				 * and its a read/get then can't
-				 * transfer yet
-				 */
-				ql_dbg(ql_dbg_edif, vha, 0x9106,
-				    "%s purex xfer in progress for sid=%x\n",
-				    __func__, sid);
-			} else {
-				/* found it and its complete */
-				node_rtn = list_node;
-				list_del(pos);
-				break;
-			}
+			/* found it and its complete */
+			node_rtn = list_node;
+			list_del(&list_node->list);
+			break;
 		}
 	}
 
@@ -1802,7 +1759,8 @@
 	qla_els_pt_iocb(vha, els_iocb, a);
 
 	ql_dbg(ql_dbg_edif, vha, 0x0183,
-	    "Sending ELS reject...\n");
+	    "Sending ELS reject ox_id %04x s:%06x -> d:%06x\n",
+	    a->ox_id, a->sid.b24, a->did.b24);
 	ql_dump_buffer(ql_dbg_edif + ql_dbg_verbose, vha, 0x0185,
 	    vha->hw->elsrej.c, sizeof(*vha->hw->elsrej.c));
 	/* flush iocb to mem before notifying hw doorbell */
@@ -1814,7 +1772,7 @@
 void
 qla_edb_init(scsi_qla_host_t *vha)
 {
-	if (vha->e_dbell.db_flags == EDB_ACTIVE) {
+	if (DBELL_ACTIVE(vha)) {
 		/* list already init'd - error */
 		ql_dbg(ql_dbg_edif, vha, 0x09102,
 		    "edif db already initialized, cannot reinit\n");
@@ -1850,6 +1808,57 @@
 	node->ntype = N_UNDEF;
 }
 
+static void qla_edb_clear(scsi_qla_host_t *vha, port_id_t portid)
+{
+	unsigned long flags;
+	struct edb_node *e, *tmp;
+	port_id_t sid;
+	LIST_HEAD(edb_list);
+
+	if (DBELL_INACTIVE(vha)) {
+		/* doorbell list not enabled */
+		ql_dbg(ql_dbg_edif, vha, 0x09102,
+		       "%s doorbell not enabled\n", __func__);
+		return;
+	}
+
+	/* grab lock so list doesn't move */
+	spin_lock_irqsave(&vha->e_dbell.db_lock, flags);
+	list_for_each_entry_safe(e, tmp, &vha->e_dbell.head, list) {
+		switch (e->ntype) {
+		case VND_CMD_AUTH_STATE_NEEDED:
+		case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN:
+			sid = e->u.plogi_did;
+			break;
+		case VND_CMD_AUTH_STATE_ELS_RCVD:
+			sid = e->u.els_sid;
+			break;
+		case VND_CMD_AUTH_STATE_SAUPDATE_COMPL:
+			/* app wants to see this  */
+			continue;
+		default:
+			ql_log(ql_log_warn, vha, 0x09102,
+			       "%s unknown node type: %x\n", __func__, e->ntype);
+			sid.b24 = 0;
+			break;
+		}
+		if (sid.b24 == portid.b24) {
+			ql_dbg(ql_dbg_edif, vha, 0x910f,
+			       "%s free doorbell event : node type = %x %p\n",
+			       __func__, e->ntype, e);
+			list_del_init(&e->list);
+			list_add_tail(&e->list, &edb_list);
+		}
+	}
+	spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags);
+
+	list_for_each_entry_safe(e, tmp, &edb_list, list) {
+		qla_edb_node_free(vha, e);
+		list_del_init(&e->list);
+		kfree(e);
+	}
+}
+
 /* function called when app is stopping */
 
 void
@@ -1858,7 +1867,7 @@
 	unsigned long flags;
 	struct edb_node *node, *q;
 
-	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+	if (DBELL_INACTIVE(vha)) {
 		/* doorbell list not enabled */
 		ql_dbg(ql_dbg_edif, vha, 0x09102,
 		    "%s doorbell not enabled\n", __func__);
@@ -1909,7 +1918,7 @@
 {
 	unsigned long		flags;
 
-	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+	if (DBELL_INACTIVE(vha)) {
 		/* doorbell list not enabled */
 		ql_dbg(ql_dbg_edif, vha, 0x09102,
 		    "%s doorbell not enabled\n", __func__);
@@ -1940,7 +1949,7 @@
 		return;
 	}
 
-	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+	if (DBELL_INACTIVE(vha)) {
 		if (fcport)
 			fcport->edif.auth_state = dbtype;
 		/* doorbell list not enabled */
@@ -2035,7 +2044,7 @@
 	struct qla_hw_data *ha = vha->hw;
 
 	if (!vha->vp_idx && N2N_TOPO(ha) && ha->flags.n2n_fw_acc_sec) {
-		if (vha->e_dbell.db_flags != EDB_ACTIVE &&
+		if (DBELL_INACTIVE(vha) &&
 		    ha->edif_post_stop_cnt_down) {
 			ha->edif_post_stop_cnt_down--;
 
@@ -2073,7 +2082,7 @@
 	sz = 256;
 
 	/* stop new threads from waiting if we're not init'd */
-	if (vha->e_dbell.db_flags != EDB_ACTIVE) {
+	if (DBELL_INACTIVE(vha)) {
 		ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x09122,
 		    "%s error - edif db not enabled\n", __func__);
 		return 0;
@@ -2346,6 +2355,7 @@
 	a.tx_addr = vha->hw->elsrej.cdma;
 	a.vp_idx = vha->vp_idx;
 	a.control_flags = EPD_ELS_RJT;
+	a.ox_id = le16_to_cpu(p->ox_id);
 
 	sid = p->s_id[0] | (p->s_id[1] << 8) | (p->s_id[2] << 16);
 
@@ -2357,7 +2367,7 @@
 		return;
 	}
 
-	if (totlen > MAX_PAYLOAD) {
+	if (totlen > ELS_MAX_PAYLOAD) {
 		ql_dbg(ql_dbg_edif, vha, 0x0910d,
 		    "%s WARNING: verbose ELS frame received (totlen=%x)\n",
 		    __func__, totlen);
@@ -2387,7 +2397,6 @@
 
 	purex = &ptr->u.purexinfo;
 	purex->pur_info.pur_sid = a.did;
-	purex->pur_info.pur_pend = 0;
 	purex->pur_info.pur_bytes_rcvd = totlen;
 	purex->pur_info.pur_rx_xchg_address = le32_to_cpu(p->rx_xchg_addr);
 	purex->pur_info.pur_nphdl = le16_to_cpu(p->nport_handle);
@@ -2396,6 +2405,8 @@
 	purex->pur_info.pur_did.b.al_pa =  p->d_id[0];
 	purex->pur_info.vp_idx = p->vp_idx;
 
+	a.sid = purex->pur_info.pur_did;
+
 	rc = __qla_copy_purex_to_buffer(vha, pkt, rsp, purex->msgp,
 		purex->msgp_len);
 	if (rc) {
@@ -2419,7 +2430,7 @@
 
 	fcport = qla2x00_find_fcport_by_pid(host, &purex->pur_info.pur_sid);
 
-	if (host->e_dbell.db_flags != EDB_ACTIVE ||
+	if (DBELL_INACTIVE(vha) ||
 	    (fcport && EDIF_SESSION_DOWN(fcport))) {
 		ql_dbg(ql_dbg_edif, host, 0x0910c, "%s e_dbell.db_flags =%x %06x\n",
 		    __func__, host->e_dbell.db_flags,
@@ -2436,7 +2447,7 @@
 	ql_dbg(ql_dbg_edif, host, 0x0910c,
 	    "%s COMPLETE purex->pur_info.pur_bytes_rcvd =%xh s:%06x -> d:%06x xchg=%xh\n",
 	    __func__, purex->pur_info.pur_bytes_rcvd, purex->pur_info.pur_sid.b24,
-	    purex->pur_info.pur_did.b24, p->rx_xchg_addr);
+	    purex->pur_info.pur_did.b24, purex->pur_info.pur_rx_xchg_address);
 
 	qla_edb_eventcreate(host, VND_CMD_AUTH_STATE_ELS_RCVD, sid, 0, NULL);
 }
@@ -3139,18 +3150,14 @@
 /* release any sadb entries -- only done at teardown */
 void qla_edif_sadb_release(struct qla_hw_data *ha)
 {
-	struct list_head *pos;
-	struct list_head *tmp;
-	struct edif_sa_index_entry *entry;
+	struct edif_sa_index_entry *entry, *tmp;
 
-	list_for_each_safe(pos, tmp, &ha->sadb_rx_index_list) {
-		entry = list_entry(pos, struct edif_sa_index_entry, next);
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_rx_index_list, next) {
 		list_del(&entry->next);
 		kfree(entry);
 	}
 
-	list_for_each_safe(pos, tmp, &ha->sadb_tx_index_list) {
-		entry = list_entry(pos, struct edif_sa_index_entry, next);
+	list_for_each_entry_safe(entry, tmp, &ha->sadb_tx_index_list, next) {
 		list_del(&entry->next);
 		kfree(entry);
 	}
@@ -3449,7 +3456,7 @@
 
 void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess)
 {
-	if (sess->edif.app_sess_online && vha->e_dbell.db_flags & EDB_ACTIVE) {
+	if (sess->edif.app_sess_online && DBELL_ACTIVE(vha)) {
 		ql_dbg(ql_dbg_disc, vha, 0xf09c,
 			"%s: sess %8phN send port_offline event\n",
 			__func__, sess->port_name);
@@ -3459,3 +3466,12 @@
 		qla2x00_post_aen_work(vha, FCH_EVT_PORT_OFFLINE, sess->d_id.b24);
 	}
 }
+
+void qla_edif_clear_appdata(struct scsi_qla_host *vha, struct fc_port *fcport)
+{
+	if (!(fcport->flags & FCF_FCSP_DEVICE))
+		return;
+
+	qla_edb_clear(vha, fcport->d_id);
+	qla_enode_clear(vha, fcport->d_id);
+}
diff --git a/drivers/scsi/qla2xxx/qla_edif.h b/drivers/scsi/qla2xxx/qla_edif.h
index 9e8f28d..a965ca8 100644
--- a/drivers/scsi/qla2xxx/qla_edif.h
+++ b/drivers/scsi/qla2xxx/qla_edif.h
@@ -41,9 +41,12 @@
 };
 
 enum db_flags_t {
-	EDB_ACTIVE = 0x1,
+	EDB_ACTIVE = BIT_0,
 };
 
+#define DBELL_ACTIVE(_v) (_v->e_dbell.db_flags & EDB_ACTIVE)
+#define DBELL_INACTIVE(_v) (!(_v->e_dbell.db_flags & EDB_ACTIVE))
+
 struct edif_dbell {
 	enum db_flags_t		db_flags;
 	spinlock_t		db_lock;
@@ -93,7 +96,6 @@
 };
 
 #define        NUM_ENTRIES     256
-#define        MAX_PAYLOAD     1024
 #define        PUR_GET         1
 
 struct dinfo {
@@ -102,7 +104,6 @@
 };
 
 struct pur_ninfo {
-	unsigned int	pur_pend:1;
 	port_id_t       pur_sid;
 	port_id_t	pur_did;
 	uint8_t		vp_idx;
@@ -128,9 +129,15 @@
 	} u;
 };
 
+#define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES))
+
 #define EDIF_SESSION_DOWN(_s) \
 	(qla_ini_mode_enabled(_s->vha) && (_s->disc_state == DSC_DELETE_PEND || \
 	 _s->disc_state == DSC_DELETED || \
 	 !_s->edif.app_sess_online))
 
+#define EDIF_NEGOTIATION_PENDING(_fcport) \
+	(DBELL_ACTIVE(_fcport->vha) && \
+	 (_fcport->disc_state == DSC_LOGIN_AUTH_PEND))
+
 #endif	/* __QLA_EDIF_H */
diff --git a/drivers/scsi/qla2xxx/qla_edif_bsg.h b/drivers/scsi/qla2xxx/qla_edif_bsg.h
index 58b718d..53026d8 100644
--- a/drivers/scsi/qla2xxx/qla_edif_bsg.h
+++ b/drivers/scsi/qla2xxx/qla_edif_bsg.h
@@ -8,7 +8,7 @@
 #define __QLA_EDIF_BSG_H
 
 /* BSG Vendor specific commands */
-#define	ELS_MAX_PAYLOAD		1024
+#define	ELS_MAX_PAYLOAD		2112
 #ifndef	WWN_SIZE
 #define WWN_SIZE		8
 #endif
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 3c4fa8b..8d8503a 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -142,6 +142,8 @@
 void qla2x00_release_all_sadb(struct scsi_qla_host *vha, struct fc_port *fcport);
 int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsgjob);
 void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess);
+void qla_edif_clear_appdata(struct scsi_qla_host *vha,
+			    struct fc_port *fcport);
 const char *sc_to_str(uint16_t cmd);
 
 /*
@@ -171,7 +173,6 @@
 extern int ql2xgffidenable;
 extern int ql2xenabledif;
 extern int ql2xenablehba_err_chk;
-extern int ql2xtargetreset;
 extern int ql2xdontresethba;
 extern uint64_t ql2xmaxlun;
 extern int ql2xmdcapmask;
@@ -818,7 +819,6 @@
 extern void qlafx00_fxdisc_iocb(srb_t *, struct fxdisc_entry_fx00 *);
 extern void qlafx00_timer_routine(scsi_qla_host_t *);
 extern int qlafx00_rescan_isp(scsi_qla_host_t *);
-extern int qlafx00_loop_reset(scsi_qla_host_t *vha);
 
 /* qla82xx related functions */
 
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 908a72e..070b636 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -330,12 +330,9 @@
 		lio->u.logio.flags |= SRB_LOGIN_PRLI_ONLY;
 	} else {
 		if (vha->hw->flags.edif_enabled &&
-		    vha->e_dbell.db_flags & EDB_ACTIVE) {
+		    DBELL_ACTIVE(vha)) {
 			lio->u.logio.flags |=
 				(SRB_LOGIN_FCSP | SRB_LOGIN_SKIP_PRLI);
-			ql_dbg(ql_dbg_disc, vha, 0x2072,
-			    "Async-login: w/ FCSP %8phC hdl=%x, loopid=%x portid=%06x\n",
-			    fcport->port_name, sp->handle, fcport->loop_id, fcport->d_id.b24);
 		} else {
 			lio->u.logio.flags |= SRB_LOGIN_COND_PLOGI;
 		}
@@ -344,12 +341,14 @@
 	if (NVME_TARGET(vha->hw, fcport))
 		lio->u.logio.flags |= SRB_LOGIN_SKIP_PRLI;
 
-	ql_dbg(ql_dbg_disc, vha, 0x2072,
-	       "Async-login - %8phC hdl=%x, loopid=%x portid=%06x retries=%d.\n",
-	       fcport->port_name, sp->handle, fcport->loop_id,
-	       fcport->d_id.b24, fcport->login_retry);
-
 	rval = qla2x00_start_sp(sp);
+
+	ql_dbg(ql_dbg_disc, vha, 0x2072,
+	       "Async-login - %8phC hdl=%x, loopid=%x portid=%06x retries=%d %s.\n",
+	       fcport->port_name, sp->handle, fcport->loop_id,
+	       fcport->d_id.b24, fcport->login_retry,
+	       lio->u.logio.flags & SRB_LOGIN_FCSP ? "FCSP" : "");
+
 	if (rval != QLA_SUCCESS) {
 		fcport->flags |= FCF_LOGIN_NEEDED;
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
@@ -862,7 +861,7 @@
 				break;
 			case DSC_LS_PLOGI_COMP:
 				if (vha->hw->flags.edif_enabled &&
-				    vha->e_dbell.db_flags & EDB_ACTIVE) {
+				    DBELL_ACTIVE(vha)) {
 					/* check to see if App support secure or not */
 					qla24xx_post_gpdb_work(vha, fcport, 0);
 					break;
@@ -987,8 +986,6 @@
 	    sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1],
 	    sp->u.iocb_cmd.u.mbx.in_mb[2]);
 
-	if (res == QLA_FUNCTION_TIMEOUT)
-		return;
 
 	sp->fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE);
 	memset(&ea, 0, sizeof(ea));
@@ -1026,8 +1023,8 @@
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
 	list_for_each_entry_safe(fcport, tf, &h, gnl_entry) {
-		list_del_init(&fcport->gnl_entry);
 		spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+		list_del_init(&fcport->gnl_entry);
 		fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
 		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 		ea.fcport = fcport;
@@ -1454,7 +1451,7 @@
 			qla2x00_post_aen_work(vha, FCH_EVT_PORT_ONLINE,
 			    fcport->d_id.b24);
 
-			if (vha->e_dbell.db_flags ==  EDB_ACTIVE) {
+			if (DBELL_ACTIVE(vha)) {
 				ql_dbg(ql_dbg_disc, vha, 0x20ef,
 				    "%s %d %8phC EDIF: post DB_AUTH: AUTH needed\n",
 				    __func__, __LINE__, fcport->port_name);
@@ -1786,16 +1783,72 @@
 	fc_port_t *fcport;
 	unsigned long flags;
 
-	fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
-	if (fcport) {
-		if (fcport->flags & FCF_FCP2_DEVICE) {
-			ql_dbg(ql_dbg_disc, vha, 0x2115,
-			       "Delaying session delete for FCP2 portid=%06x %8phC ",
-			       fcport->d_id.b24, fcport->port_name);
-			return;
+	switch (ea->id.b.rsvd_1) {
+	case RSCN_PORT_ADDR:
+		fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+		if (fcport) {
+			if (fcport->flags & FCF_FCP2_DEVICE) {
+				ql_dbg(ql_dbg_disc, vha, 0x2115,
+				       "Delaying session delete for FCP2 portid=%06x %8phC ",
+					fcport->d_id.b24, fcport->port_name);
+				return;
+			}
+
+			if (vha->hw->flags.edif_enabled && DBELL_ACTIVE(vha)) {
+				/*
+				 * On ipsec start by remote port, Target port
+				 * may use RSCN to trigger initiator to
+				 * relogin. If driver is already in the
+				 * process of a relogin, then ignore the RSCN
+				 * and allow the current relogin to continue.
+				 * This reduces thrashing of the connection.
+				 */
+				if (atomic_read(&fcport->state) == FCS_ONLINE) {
+					/*
+					 * If state = online, then set scan_needed=1 to do relogin.
+					 * Otherwise we're already in the middle of a relogin
+					 */
+					fcport->scan_needed = 1;
+					fcport->rscn_gen++;
+				}
+			} else {
+				fcport->scan_needed = 1;
+				fcport->rscn_gen++;
+			}
 		}
-		fcport->scan_needed = 1;
-		fcport->rscn_gen++;
+		break;
+	case RSCN_AREA_ADDR:
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->flags & FCF_FCP2_DEVICE)
+				continue;
+
+			if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) {
+				fcport->scan_needed = 1;
+				fcport->rscn_gen++;
+			}
+		}
+		break;
+	case RSCN_DOM_ADDR:
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->flags & FCF_FCP2_DEVICE)
+				continue;
+
+			if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) {
+				fcport->scan_needed = 1;
+				fcport->rscn_gen++;
+			}
+		}
+		break;
+	case RSCN_FAB_ADDR:
+	default:
+		list_for_each_entry(fcport, &vha->vp_fcports, list) {
+			if (fcport->flags & FCF_FCP2_DEVICE)
+				continue;
+
+			fcport->scan_needed = 1;
+			fcport->rscn_gen++;
+		}
+		break;
 	}
 
 	spin_lock_irqsave(&vha->work_lock, flags);
@@ -4187,7 +4240,7 @@
 		 * fw shal not send PRLI after PLOGI Acc
 		 */
 		if (ha->flags.edif_enabled &&
-		    vha->e_dbell.db_flags & EDB_ACTIVE) {
+		    DBELL_ACTIVE(vha)) {
 			ha->fw_options[3] |= BIT_15;
 			ha->flags.n2n_fw_acc_sec = 1;
 		} else {
@@ -4433,6 +4486,10 @@
 		    (ha->flags.fawwpn_enabled) ? "enabled" : "disabled");
 	}
 
+	/* ELS pass through payload is limit by frame size. */
+	if (ha->flags.edif_enabled)
+		mid_init_cb->init_cb.frame_payload_size = cpu_to_le16(ELS_MAX_PAYLOAD);
+
 	rval = qla2x00_init_firmware(vha, ha->init_cb_size);
 next_check:
 	if (rval) {
@@ -5339,8 +5396,7 @@
 			 * use link up to wake up app to get ready for
 			 * authentication.
 			 */
-			if (ha->flags.edif_enabled &&
-			    !(vha->e_dbell.db_flags & EDB_ACTIVE))
+			if (ha->flags.edif_enabled && DBELL_INACTIVE(vha))
 				qla2x00_post_aen_work(vha, FCH_EVT_LINKUP,
 						      ha->link_data_rate);
 
@@ -5833,6 +5889,10 @@
 
 	qla2x00_update_fcport(fcport->vha, fcport);
 
+	ql_dbg(ql_dbg_disc, fcport->vha, 0x911e,
+	       "%s rscn gen %d/%d next DS %d\n", __func__,
+	       rscn_gen, fcport->rscn_gen, fcport->next_disc_state);
+
 	if (rscn_gen != fcport->rscn_gen) {
 		/* RSCN(s) came in while registration */
 		switch (fcport->next_disc_state) {
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 9d4ad1d..ed604f2 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -3034,8 +3034,7 @@
 	elsio->u.els_plogi.els_cmd = els_opcode;
 	elsio->u.els_plogi.els_plogi_pyld->opcode = els_opcode;
 
-	if (els_opcode == ELS_DCMD_PLOGI && vha->hw->flags.edif_enabled &&
-	    vha->e_dbell.db_flags & EDB_ACTIVE) {
+	if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) {
 		struct fc_els_flogi *p = ptr;
 
 		p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index b26f269..aaf6504 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2233,6 +2233,10 @@
 				}
 
 			} else if (comp_status == CS_PORT_LOGGED_OUT) {
+				ql_dbg(ql_dbg_disc, vha, 0x911e,
+				       "%s %d schedule session deletion\n",
+				       __func__, __LINE__);
+
 				els->u.els_plogi.len = 0;
 				res = DID_IMM_RETRY << 16;
 				qlt_schedule_sess_for_deletion(sp->fcport);
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 73a3531..10d2655 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1695,10 +1695,8 @@
 		mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10;
 	if (IS_FWI2_CAPABLE(vha->hw))
 		mcp->in_mb |= MBX_19|MBX_18|MBX_17|MBX_16;
-	if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw)) {
-		mcp->in_mb |= MBX_15;
-		mcp->out_mb |= MBX_7|MBX_21|MBX_22|MBX_23;
-	}
+	if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw))
+		mcp->in_mb |= MBX_15|MBX_21|MBX_22|MBX_23;
 
 	mcp->tov = MBX_TOV_SECONDS;
 	mcp->flags = 0;
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 6e920da..350b0c4 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -739,29 +739,6 @@
 }
 
 int
-qlafx00_loop_reset(scsi_qla_host_t *vha)
-{
-	int ret;
-	struct fc_port *fcport;
-	struct qla_hw_data *ha = vha->hw;
-
-	if (ql2xtargetreset) {
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (fcport->port_type != FCT_TARGET)
-				continue;
-
-			ret = ha->isp_ops->target_reset(fcport, 0, 0);
-			if (ret != QLA_SUCCESS) {
-				ql_dbg(ql_dbg_taskm, vha, 0x803d,
-				    "Bus Reset failed: Reset=%d "
-				    "d_id=%x.\n", ret, fcport->d_id.b24);
-			}
-		}
-	}
-	return QLA_SUCCESS;
-}
-
-int
 qlafx00_iospace_config(struct qla_hw_data *ha)
 {
 	if (pci_request_selected_regions(ha->pdev, ha->bars,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index c3ff50f..abcd309 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -202,12 +202,6 @@
 		" 0 -- Regular doorbell.\n"
 		" 1 -- CAMRAM doorbell (faster).\n");
 
-int ql2xtargetreset = 1;
-module_param(ql2xtargetreset, int, S_IRUGO);
-MODULE_PARM_DESC(ql2xtargetreset,
-		 "Enable target reset."
-		 "Default is 1 - use hw defaults.");
-
 int ql2xgffidenable;
 module_param(ql2xgffidenable, int, S_IRUGO);
 MODULE_PARM_DESC(ql2xgffidenable,
@@ -1695,27 +1689,10 @@
 qla2x00_loop_reset(scsi_qla_host_t *vha)
 {
 	int ret;
-	struct fc_port *fcport;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (IS_QLAFX00(ha)) {
-		return qlafx00_loop_reset(vha);
-	}
-
-	if (ql2xtargetreset == 1 && ha->flags.enable_target_reset) {
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (fcport->port_type != FCT_TARGET)
-				continue;
-
-			ret = ha->isp_ops->target_reset(fcport, 0, 0);
-			if (ret != QLA_SUCCESS) {
-				ql_dbg(ql_dbg_taskm, vha, 0x802c,
-				    "Bus Reset failed: Reset=%d "
-				    "d_id=%x.\n", ret, fcport->d_id.b24);
-			}
-		}
-	}
-
+	if (IS_QLAFX00(ha))
+		return QLA_SUCCESS;
 
 	if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) {
 		atomic_set(&vha->loop_state, LOOP_DOWN);
@@ -3908,13 +3885,13 @@
 static inline void
 qla24xx_free_purex_list(struct purex_list *list)
 {
-	struct list_head *item, *next;
+	struct purex_item *item, *next;
 	ulong flags;
 
 	spin_lock_irqsave(&list->lock, flags);
-	list_for_each_safe(item, next, &list->head) {
-		list_del(item);
-		kfree(list_entry(item, struct purex_item, list));
+	list_for_each_entry_safe(item, next, &list->head, list) {
+		list_del(&item->list);
+		kfree(item);
 	}
 	spin_unlock_irqrestore(&list->lock, flags);
 }
@@ -4375,7 +4352,7 @@
 
 	/* allocate the purex dma pool */
 	ha->purex_dma_pool = dma_pool_create(name, &ha->pdev->dev,
-	    MAX_PAYLOAD, 8, 0);
+	    ELS_MAX_PAYLOAD, 8, 0);
 
 	if (!ha->purex_dma_pool) {
 		ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011b,
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 7d8242c..8993d43 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1003,6 +1003,7 @@
 					"%s bypassing release_all_sadb\n",
 					__func__);
 			}
+			qla_edif_clear_appdata(vha, sess);
 			qla_edif_sess_down(vha, sess);
 		}
 		qla2x00_mark_device_lost(vha, sess, 0);
@@ -4812,7 +4813,7 @@
 	}
 
 	if (vha->hw->flags.edif_enabled) {
-		if (!(vha->e_dbell.db_flags & EDB_ACTIVE)) {
+		if (DBELL_INACTIVE(vha)) {
 			ql_dbg(ql_dbg_disc, vha, 0xffff,
 			       "%s %d Term INOT due to app not started lid=%d, NportID %06X ",
 			       __func__, __LINE__, loop_id, port_id.b24);
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 4b11716..27e440f 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -6,9 +6,9 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "10.02.07.100-k"
+#define QLA2XXX_VERSION      "10.02.07.200-k"
 
 #define QLA_DRIVER_MAJOR_VER	10
 #define QLA_DRIVER_MINOR_VER	2
 #define QLA_DRIVER_PATCH_VER	7
-#define QLA_DRIVER_BETA_VER	100
+#define QLA_DRIVER_BETA_VER	200
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 06e7266..3c0da37 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1189,7 +1189,7 @@
 		 __func__, off_dst, scsi_bufflen(scp), act_len,
 		 scsi_get_resid(scp));
 	n = scsi_bufflen(scp) - (off_dst + act_len);
-	scsi_set_resid(scp, min_t(int, scsi_get_resid(scp), n));
+	scsi_set_resid(scp, min_t(u32, scsi_get_resid(scp), n));
 	return 0;
 }
 
@@ -1562,7 +1562,8 @@
 	unsigned char pq_pdt;
 	unsigned char *arr;
 	unsigned char *cmd = scp->cmnd;
-	int alloc_len, n, ret;
+	u32 alloc_len, n;
+	int ret;
 	bool have_wlun, is_disk, is_zbc, is_disk_zbc;
 
 	alloc_len = get_unaligned_be16(cmd + 3);
@@ -1585,7 +1586,8 @@
 		kfree(arr);
 		return check_condition_result;
 	} else if (0x1 & cmd[1]) {  /* EVPD bit set */
-		int lu_id_num, port_group_id, target_dev_id, len;
+		int lu_id_num, port_group_id, target_dev_id;
+		u32 len;
 		char lu_id_str[6];
 		int host_no = devip->sdbg_host->shost->host_no;
 		
@@ -1676,9 +1678,9 @@
 			kfree(arr);
 			return check_condition_result;
 		}
-		len = min(get_unaligned_be16(arr + 2) + 4, alloc_len);
+		len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len);
 		ret = fill_from_dev_buffer(scp, arr,
-			    min(len, SDEBUG_MAX_INQ_ARR_SZ));
+			    min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ));
 		kfree(arr);
 		return ret;
 	}
@@ -1714,7 +1716,7 @@
 	}
 	put_unaligned_be16(0x2100, arr + n);	/* SPL-4 no version claimed */
 	ret = fill_from_dev_buffer(scp, arr,
-			    min_t(int, alloc_len, SDEBUG_LONG_INQ_SZ));
+			    min_t(u32, alloc_len, SDEBUG_LONG_INQ_SZ));
 	kfree(arr);
 	return ret;
 }
@@ -1729,8 +1731,8 @@
 	unsigned char *cmd = scp->cmnd;
 	unsigned char arr[SCSI_SENSE_BUFFERSIZE];	/* assume >= 18 bytes */
 	bool dsense = !!(cmd[1] & 1);
-	int alloc_len = cmd[4];
-	int len = 18;
+	u32 alloc_len = cmd[4];
+	u32 len = 18;
 	int stopped_state = atomic_read(&devip->stopped);
 
 	memset(arr, 0, sizeof(arr));
@@ -1774,7 +1776,7 @@
 			arr[7] = 0xa;
 		}
 	}
-	return fill_from_dev_buffer(scp, arr, min_t(int, len, alloc_len));
+	return fill_from_dev_buffer(scp, arr, min_t(u32, len, alloc_len));
 }
 
 static int resp_start_stop(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
@@ -2312,7 +2314,8 @@
 {
 	int pcontrol, pcode, subpcode, bd_len;
 	unsigned char dev_spec;
-	int alloc_len, offset, len, target_dev_id;
+	u32 alloc_len, offset, len;
+	int target_dev_id;
 	int target = scp->device->id;
 	unsigned char *ap;
 	unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
@@ -2468,7 +2471,7 @@
 		arr[0] = offset - 1;
 	else
 		put_unaligned_be16((offset - 2), arr + 0);
-	return fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, offset));
+	return fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, offset));
 }
 
 #define SDEBUG_MAX_MSELECT_SZ 512
@@ -2499,11 +2502,11 @@
 			    __func__, param_len, res);
 	md_len = mselect6 ? (arr[0] + 1) : (get_unaligned_be16(arr + 0) + 2);
 	bd_len = mselect6 ? arr[3] : get_unaligned_be16(arr + 6);
-	if (md_len > 2) {
+	off = bd_len + (mselect6 ? 4 : 8);
+	if (md_len > 2 || off >= res) {
 		mk_sense_invalid_fld(scp, SDEB_IN_DATA, 0, -1);
 		return check_condition_result;
 	}
-	off = bd_len + (mselect6 ? 4 : 8);
 	mpage = arr[off] & 0x3f;
 	ps = !!(arr[off] & 0x80);
 	if (ps) {
@@ -2583,7 +2586,8 @@
 static int resp_log_sense(struct scsi_cmnd *scp,
 			  struct sdebug_dev_info *devip)
 {
-	int ppc, sp, pcode, subpcode, alloc_len, len, n;
+	int ppc, sp, pcode, subpcode;
+	u32 alloc_len, len, n;
 	unsigned char arr[SDEBUG_MAX_LSENSE_SZ];
 	unsigned char *cmd = scp->cmnd;
 
@@ -2653,9 +2657,9 @@
 		mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
 		return check_condition_result;
 	}
-	len = min_t(int, get_unaligned_be16(arr + 2) + 4, alloc_len);
+	len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len);
 	return fill_from_dev_buffer(scp, arr,
-		    min_t(int, len, SDEBUG_MAX_INQ_ARR_SZ));
+		    min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ));
 }
 
 static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip)
@@ -4259,6 +4263,8 @@
 		mk_sense_invalid_opcode(scp);
 		return check_condition_result;
 	}
+	if (vnum == 0)
+		return 0;	/* not an error */
 	a_num = is_bytchk3 ? 1 : vnum;
 	/* Treat following check like one for read (i.e. no write) access */
 	ret = check_device_access_params(scp, lba, a_num, false);
@@ -4322,6 +4328,8 @@
 	}
 	zs_lba = get_unaligned_be64(cmd + 2);
 	alloc_len = get_unaligned_be32(cmd + 10);
+	if (alloc_len == 0)
+		return 0;	/* not an error */
 	rep_opts = cmd[14] & 0x3f;
 	partial = cmd[14] & 0x80;
 
@@ -4426,7 +4434,7 @@
 	put_unaligned_be64(sdebug_capacity - 1, arr + 8);
 
 	rep_len = (unsigned long)desc - (unsigned long)arr;
-	ret = fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, rep_len));
+	ret = fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, rep_len));
 
 fini:
 	read_unlock(macc_lckp);
@@ -4649,6 +4657,7 @@
 			 struct sdeb_zone_state *zsp)
 {
 	enum sdebug_z_cond zc;
+	struct sdeb_store_info *sip = devip2sip(devip, false);
 
 	if (zbc_zone_is_conv(zsp))
 		return;
@@ -4660,6 +4669,10 @@
 	if (zsp->z_cond == ZC4_CLOSED)
 		devip->nr_closed--;
 
+	if (zsp->z_wp > zsp->z_start)
+		memset(sip->storep + zsp->z_start * sdebug_sector_size, 0,
+		       (zsp->z_wp - zsp->z_start) * sdebug_sector_size);
+
 	zsp->z_non_seq_resource = false;
 	zsp->z_wp = zsp->z_start;
 	zsp->z_cond = ZC1_EMPTY;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a531336..2371edb 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -133,6 +133,23 @@
 	return true;
 }
 
+static void scsi_eh_complete_abort(struct scsi_cmnd *scmd, struct Scsi_Host *shost)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	list_del_init(&scmd->eh_entry);
+	/*
+	 * If the abort succeeds, and there is no further
+	 * EH action, clear the ->last_reset time.
+	 */
+	if (list_empty(&shost->eh_abort_list) &&
+	    list_empty(&shost->eh_cmd_q))
+		if (shost->eh_deadline != -1)
+			shost->last_reset = 0;
+	spin_unlock_irqrestore(shost->host_lock, flags);
+}
+
 /**
  * scmd_eh_abort_handler - Handle command aborts
  * @work:	command to be aborted.
@@ -150,6 +167,7 @@
 		container_of(work, struct scsi_cmnd, abort_work.work);
 	struct scsi_device *sdev = scmd->device;
 	enum scsi_disposition rtn;
+	unsigned long flags;
 
 	if (scsi_host_eh_past_deadline(sdev->host)) {
 		SCSI_LOG_ERROR_RECOVERY(3,
@@ -173,12 +191,14 @@
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "retry aborted command\n"));
+				scsi_eh_complete_abort(scmd, sdev->host);
 				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
 				return;
 			} else {
 				SCSI_LOG_ERROR_RECOVERY(3,
 					scmd_printk(KERN_WARNING, scmd,
 						    "finish aborted command\n"));
+				scsi_eh_complete_abort(scmd, sdev->host);
 				scsi_finish_command(scmd);
 				return;
 			}
@@ -191,6 +211,9 @@
 		}
 	}
 
+	spin_lock_irqsave(sdev->host->host_lock, flags);
+	list_del_init(&scmd->eh_entry);
+	spin_unlock_irqrestore(sdev->host->host_lock, flags);
 	scsi_eh_scmd_add(scmd);
 }
 
@@ -221,6 +244,8 @@
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (shost->eh_deadline != -1 && !shost->last_reset)
 		shost->last_reset = jiffies;
+	BUG_ON(!list_empty(&scmd->eh_entry));
+	list_add_tail(&scmd->eh_entry, &shost->eh_abort_list);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 34412ea..400df33 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -347,6 +347,8 @@
 {
 	struct scsi_request *req = scsi_req(rq);
 
+	if (hdr->cmd_len < 6)
+		return -EMSGSIZE;
 	if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len))
 		return -EFAULT;
 	if (!scsi_cmd_allowed(req->cmd, mode))
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b731c29..621d841 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1153,6 +1153,7 @@
 	cmd->sense_buffer = buf;
 	cmd->prot_sdb = prot;
 	cmd->flags = flags;
+	INIT_LIST_HEAD(&cmd->eh_entry);
 	INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
 	cmd->jiffies_at_alloc = jiffies_at_alloc;
 	cmd->retries = retries;
@@ -1184,8 +1185,6 @@
 	}
 
 	cmd->cmd_len = scsi_req(req)->cmd_len;
-	if (cmd->cmd_len == 0)
-		cmd->cmd_len = scsi_command_size(cmd->cmnd);
 	cmd->cmnd = scsi_req(req)->cmd;
 	cmd->transfersize = blk_rq_bytes(req);
 	cmd->allowed = scsi_req(req)->retries;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index d3d3622..d4edce9 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -792,6 +792,7 @@
 	int i, ret;
 	struct scsi_device *sdev = to_scsi_device(dev);
 	enum scsi_device_state state = 0;
+	bool rescan_dev = false;
 
 	for (i = 0; i < ARRAY_SIZE(sdev_states); i++) {
 		const int len = strlen(sdev_states[i].name);
@@ -810,20 +811,27 @@
 	}
 
 	mutex_lock(&sdev->state_mutex);
-	ret = scsi_device_set_state(sdev, state);
-	/*
-	 * If the device state changes to SDEV_RUNNING, we need to
-	 * run the queue to avoid I/O hang, and rescan the device
-	 * to revalidate it. Running the queue first is necessary
-	 * because another thread may be waiting inside
-	 * blk_mq_freeze_queue_wait() and because that call may be
-	 * waiting for pending I/O to finish.
-	 */
-	if (ret == 0 && state == SDEV_RUNNING) {
+	if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) {
+		ret = 0;
+	} else {
+		ret = scsi_device_set_state(sdev, state);
+		if (ret == 0 && state == SDEV_RUNNING)
+			rescan_dev = true;
+	}
+	mutex_unlock(&sdev->state_mutex);
+
+	if (rescan_dev) {
+		/*
+		 * If the device state changes to SDEV_RUNNING, we need to
+		 * run the queue to avoid I/O hang, and rescan the device
+		 * to revalidate it. Running the queue first is necessary
+		 * because another thread may be waiting inside
+		 * blk_mq_freeze_queue_wait() and because that call may be
+		 * waiting for pending I/O to finish.
+		 */
 		blk_mq_run_hw_queues(sdev->request_queue, true);
 		scsi_rescan_device(dev);
 	}
-	mutex_unlock(&sdev->state_mutex);
 
 	return ret == 0 ? count : -EINVAL;
 }
@@ -1383,6 +1391,7 @@
 			 * We're treating error on bsg register as non-fatal, so
 			 * pretend nothing went wrong.
 			 */
+			error = PTR_ERR(sdev->bsg_dev);
 			sdev_printk(KERN_INFO, sdev,
 				    "Failed to register bsg queue, errno=%d\n",
 				    error);
@@ -1580,7 +1589,6 @@
 
 void scsi_sysfs_device_initialize(struct scsi_device *sdev)
 {
-	int i, j = 0;
 	unsigned long flags;
 	struct Scsi_Host *shost = sdev->host;
 	struct scsi_host_template *hostt = shost->hostt;
@@ -1592,15 +1600,7 @@
 	scsi_enable_async_suspend(&sdev->sdev_gendev);
 	dev_set_name(&sdev->sdev_gendev, "%d:%d:%d:%llu",
 		     sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
-	sdev->gendev_attr_groups[j++] = &scsi_sdev_attr_group;
-	if (hostt->sdev_groups) {
-		for (i = 0; hostt->sdev_groups[i] &&
-			     j < ARRAY_SIZE(sdev->gendev_attr_groups);
-		     i++, j++) {
-			sdev->gendev_attr_groups[j] = hostt->sdev_groups[i];
-		}
-	}
-	WARN_ON_ONCE(j >= ARRAY_SIZE(sdev->gendev_attr_groups));
+	sdev->sdev_gendev.groups = hostt->sdev_groups;
 
 	device_initialize(&sdev->sdev_dev);
 	sdev->sdev_dev.parent = get_device(&sdev->sdev_gendev);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 78343d3..554b6f7 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1899,12 +1899,12 @@
 	}
 	spin_unlock_irqrestore(&session->lock, flags);
 
-	if (session->transport->session_recovery_timedout)
-		session->transport->session_recovery_timedout(session);
-
 	ISCSI_DBG_TRANS_SESSION(session, "Unblocking SCSI target\n");
 	scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE);
 	ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking SCSI target\n");
+
+	if (session->transport->session_recovery_timedout)
+		session->transport->session_recovery_timedout(session);
 }
 
 static void __iscsi_unblock_session(struct work_struct *work)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 50f8dc9..8e4af11 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -693,7 +693,6 @@
 	cd->device = sdev;
 	cd->disk = disk;
 	cd->driver = &sr_template;
-	cd->disk = disk;
 	cd->capacity = 0x1fffff;
 	cd->device->changed = 1;	/* force recheck CD type */
 	cd->media_present = 1;
diff --git a/drivers/scsi/ufs/ufs-debugfs.c b/drivers/scsi/ufs/ufs-debugfs.c
index 4e1ff20..4a0bbcf 100644
--- a/drivers/scsi/ufs/ufs-debugfs.c
+++ b/drivers/scsi/ufs/ufs-debugfs.c
@@ -8,6 +8,18 @@
 
 static struct dentry *ufs_debugfs_root;
 
+struct ufs_debugfs_attr {
+	const char			*name;
+	mode_t				mode;
+	const struct file_operations	*fops;
+};
+
+/* @file corresponds to a debugfs attribute in directory hba->debugfs_root. */
+static inline struct ufs_hba *hba_from_file(const struct file *file)
+{
+	return d_inode(file->f_path.dentry->d_parent)->i_private;
+}
+
 void __init ufs_debugfs_init(void)
 {
 	ufs_debugfs_root = debugfs_create_dir("ufshcd", NULL);
@@ -20,7 +32,7 @@
 
 static int ufs_debugfs_stats_show(struct seq_file *s, void *data)
 {
-	struct ufs_hba *hba = s->private;
+	struct ufs_hba *hba = hba_from_file(s->file);
 	struct ufs_event_hist *e = hba->ufs_stats.event;
 
 #define PRT(fmt, typ) \
@@ -126,13 +138,93 @@
 	ufs_debugfs_put_user_access(hba);
 }
 
+static int ufs_saved_err_show(struct seq_file *s, void *data)
+{
+	struct ufs_debugfs_attr *attr = s->private;
+	struct ufs_hba *hba = hba_from_file(s->file);
+	const int *p;
+
+	if (strcmp(attr->name, "saved_err") == 0) {
+		p = &hba->saved_err;
+	} else if (strcmp(attr->name, "saved_uic_err") == 0) {
+		p = &hba->saved_uic_err;
+	} else {
+		return -ENOENT;
+	}
+
+	seq_printf(s, "%d\n", *p);
+	return 0;
+}
+
+static ssize_t ufs_saved_err_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct ufs_debugfs_attr *attr = file->f_inode->i_private;
+	struct ufs_hba *hba = hba_from_file(file);
+	char val_str[16] = { };
+	int val, ret;
+
+	if (count > sizeof(val_str))
+		return -EINVAL;
+	if (copy_from_user(val_str, buf, count))
+		return -EFAULT;
+	ret = kstrtoint(val_str, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irq(hba->host->host_lock);
+	if (strcmp(attr->name, "saved_err") == 0) {
+		hba->saved_err = val;
+	} else if (strcmp(attr->name, "saved_uic_err") == 0) {
+		hba->saved_uic_err = val;
+	} else {
+		ret = -ENOENT;
+	}
+	if (ret == 0)
+		ufshcd_schedule_eh_work(hba);
+	spin_unlock_irq(hba->host->host_lock);
+
+	return ret < 0 ? ret : count;
+}
+
+static int ufs_saved_err_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ufs_saved_err_show, inode->i_private);
+}
+
+static const struct file_operations ufs_saved_err_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ufs_saved_err_open,
+	.read		= seq_read,
+	.write		= ufs_saved_err_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct ufs_debugfs_attr ufs_attrs[] = {
+	{ "stats", 0400, &ufs_debugfs_stats_fops },
+	{ "saved_err", 0600, &ufs_saved_err_fops },
+	{ "saved_uic_err", 0600, &ufs_saved_err_fops },
+	{ }
+};
+
 void ufs_debugfs_hba_init(struct ufs_hba *hba)
 {
+	const struct ufs_debugfs_attr *attr;
+	struct dentry *root;
+
 	/* Set default exception event rate limit period to 20ms */
 	hba->debugfs_ee_rate_limit_ms = 20;
 	INIT_DELAYED_WORK(&hba->debugfs_ee_work, ufs_debugfs_restart_ee);
-	hba->debugfs_root = debugfs_create_dir(dev_name(hba->dev), ufs_debugfs_root);
-	debugfs_create_file("stats", 0400, hba->debugfs_root, hba, &ufs_debugfs_stats_fops);
+
+	root = debugfs_create_dir(dev_name(hba->dev), ufs_debugfs_root);
+	if (IS_ERR_OR_NULL(root))
+		return;
+	hba->debugfs_root = root;
+	d_inode(root)->i_private = hba;
+	for (attr = ufs_attrs; attr->name; attr++)
+		debugfs_create_file(attr->name, attr->mode, root, (void *)attr,
+				    attr->fops);
 	debugfs_create_file("exception_event_mask", 0600, hba->debugfs_root,
 			    hba, &ee_usr_mask_fops);
 	debugfs_create_u32("exception_event_rate_limit_ms", 0600, hba->debugfs_root,
diff --git a/drivers/scsi/ufs/ufs-exynos.c b/drivers/scsi/ufs/ufs-exynos.c
index 30d0c1a..cd26bc8 100644
--- a/drivers/scsi/ufs/ufs-exynos.c
+++ b/drivers/scsi/ufs/ufs-exynos.c
@@ -12,8 +12,10 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/mfd/syscon.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 
 #include "ufshcd.h"
 #include "ufshcd-pltfrm.h"
@@ -48,10 +50,11 @@
 #define HCI_ERR_EN_T_LAYER	0x84
 #define HCI_ERR_EN_DME_LAYER	0x88
 #define HCI_CLKSTOP_CTRL	0xB0
+#define REFCLKOUT_STOP		BIT(4)
 #define REFCLK_STOP		BIT(2)
 #define UNIPRO_MCLK_STOP	BIT(1)
 #define UNIPRO_PCLK_STOP	BIT(0)
-#define CLK_STOP_MASK		(REFCLK_STOP |\
+#define CLK_STOP_MASK		(REFCLKOUT_STOP | REFCLK_STOP |\
 				 UNIPRO_MCLK_STOP |\
 				 UNIPRO_PCLK_STOP)
 #define HCI_MISC		0xB4
@@ -74,6 +77,52 @@
 				 UIC_TRANSPORT_NO_CONNECTION_RX |\
 				 UIC_TRANSPORT_BAD_TC)
 
+/* FSYS UFS Shareability */
+#define UFS_WR_SHARABLE		BIT(2)
+#define UFS_RD_SHARABLE		BIT(1)
+#define UFS_SHARABLE		(UFS_WR_SHARABLE | UFS_RD_SHARABLE)
+#define UFS_SHAREABILITY_OFFSET	0x710
+
+/* Multi-host registers */
+#define MHCTRL			0xC4
+#define MHCTRL_EN_VH_MASK	(0xE)
+#define MHCTRL_EN_VH(vh)	(vh << 1)
+#define PH2VH_MBOX		0xD8
+
+#define MH_MSG_MASK		(0xFF)
+
+#define MH_MSG(id, msg)		((id << 8) | (msg & 0xFF))
+#define MH_MSG_PH_READY		0x1
+#define MH_MSG_VH_READY		0x2
+
+#define ALLOW_INQUIRY		BIT(25)
+#define ALLOW_MODE_SELECT	BIT(24)
+#define ALLOW_MODE_SENSE	BIT(23)
+#define ALLOW_PRE_FETCH		GENMASK(22, 21)
+#define ALLOW_READ_CMD_ALL	GENMASK(20, 18)	/* read_6/10/16 */
+#define ALLOW_READ_BUFFER	BIT(17)
+#define ALLOW_READ_CAPACITY	GENMASK(16, 15)
+#define ALLOW_REPORT_LUNS	BIT(14)
+#define ALLOW_REQUEST_SENSE	BIT(13)
+#define ALLOW_SYNCHRONIZE_CACHE	GENMASK(8, 7)
+#define ALLOW_TEST_UNIT_READY	BIT(6)
+#define ALLOW_UNMAP		BIT(5)
+#define ALLOW_VERIFY		BIT(4)
+#define ALLOW_WRITE_CMD_ALL	GENMASK(3, 1)	/* write_6/10/16 */
+
+#define ALLOW_TRANS_VH_DEFAULT	(ALLOW_INQUIRY | ALLOW_MODE_SELECT | \
+				 ALLOW_MODE_SENSE | ALLOW_PRE_FETCH | \
+				 ALLOW_READ_CMD_ALL | ALLOW_READ_BUFFER | \
+				 ALLOW_READ_CAPACITY | ALLOW_REPORT_LUNS | \
+				 ALLOW_REQUEST_SENSE | ALLOW_SYNCHRONIZE_CACHE | \
+				 ALLOW_TEST_UNIT_READY | ALLOW_UNMAP | \
+				 ALLOW_VERIFY | ALLOW_WRITE_CMD_ALL)
+
+#define HCI_MH_ALLOWABLE_TRAN_OF_VH		0x30C
+#define HCI_MH_IID_IN_TASK_TAG			0X308
+
+#define PH_READY_TIMEOUT_MS			(5 * MSEC_PER_SEC)
+
 enum {
 	UNIPRO_L1_5 = 0,/* PHY Adapter */
 	UNIPRO_L2,	/* Data Link */
@@ -149,6 +198,117 @@
 	return 0;
 }
 
+static int exynosauto_ufs_drv_init(struct device *dev, struct exynos_ufs *ufs)
+{
+	struct exynos_ufs_uic_attr *attr = ufs->drv_data->uic_attr;
+
+	/* IO Coherency setting */
+	if (ufs->sysreg) {
+		return regmap_update_bits(ufs->sysreg,
+					  ufs->shareability_reg_offset,
+					  UFS_SHARABLE, UFS_SHARABLE);
+	}
+
+	attr->tx_dif_p_nsec = 3200000;
+
+	return 0;
+}
+
+static int exynosauto_ufs_post_hce_enable(struct exynos_ufs *ufs)
+{
+	struct ufs_hba *hba = ufs->hba;
+
+	/* Enable Virtual Host #1 */
+	ufshcd_rmwl(hba, MHCTRL_EN_VH_MASK, MHCTRL_EN_VH(1), MHCTRL);
+	/* Default VH Transfer permissions */
+	hci_writel(ufs, ALLOW_TRANS_VH_DEFAULT, HCI_MH_ALLOWABLE_TRAN_OF_VH);
+	/* IID information is replaced in TASKTAG[7:5] instead of IID in UCD */
+	hci_writel(ufs, 0x1, HCI_MH_IID_IN_TASK_TAG);
+
+	return 0;
+}
+
+static int exynosauto_ufs_pre_link(struct exynos_ufs *ufs)
+{
+	struct ufs_hba *hba = ufs->hba;
+	int i;
+	u32 tx_line_reset_period, rx_line_reset_period;
+
+	rx_line_reset_period = (RX_LINE_RESET_TIME * ufs->mclk_rate) / NSEC_PER_MSEC;
+	tx_line_reset_period = (TX_LINE_RESET_TIME * ufs->mclk_rate) / NSEC_PER_MSEC;
+
+	ufshcd_dme_set(hba, UIC_ARG_MIB(0x200), 0x40);
+	for_each_ufs_rx_lane(ufs, i) {
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_CLK_PRD, i),
+			       DIV_ROUND_UP(NSEC_PER_SEC, ufs->mclk_rate));
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_CLK_PRD_EN, i), 0x0);
+
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_VALUE2, i),
+			       (rx_line_reset_period >> 16) & 0xFF);
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_VALUE1, i),
+			       (rx_line_reset_period >> 8) & 0xFF);
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_VALUE0, i),
+			       (rx_line_reset_period) & 0xFF);
+
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x2f, i), 0x79);
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x84, i), 0x1);
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x25, i), 0xf6);
+	}
+
+	for_each_ufs_tx_lane(ufs, i) {
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_CLK_PRD, i),
+			       DIV_ROUND_UP(NSEC_PER_SEC, ufs->mclk_rate));
+		/* Not to affect VND_TX_LINERESET_PVALUE to VND_TX_CLK_PRD */
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_CLK_PRD_EN, i),
+			       0x02);
+
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE2, i),
+			       (tx_line_reset_period >> 16) & 0xFF);
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE1, i),
+			       (tx_line_reset_period >> 8) & 0xFF);
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE0, i),
+			       (tx_line_reset_period) & 0xFF);
+
+		/* TX PWM Gear Capability / PWM_G1_ONLY */
+		ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x04, i), 0x1);
+	}
+
+	ufshcd_dme_set(hba, UIC_ARG_MIB(0x200), 0x0);
+
+	ufshcd_dme_set(hba, UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE), 0x0);
+
+	ufshcd_dme_set(hba, UIC_ARG_MIB(0xa011), 0x8000);
+
+	return 0;
+}
+
+static int exynosauto_ufs_pre_pwr_change(struct exynos_ufs *ufs,
+					 struct ufs_pa_layer_attr *pwr)
+{
+	struct ufs_hba *hba = ufs->hba;
+
+	/* PACP_PWR_req and delivered to the remote DME */
+	ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), 12000);
+	ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), 32000);
+	ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), 16000);
+
+	return 0;
+}
+
+static int exynosauto_ufs_post_pwr_change(struct exynos_ufs *ufs,
+					  struct ufs_pa_layer_attr *pwr)
+{
+	struct ufs_hba *hba = ufs->hba;
+	u32 enabled_vh;
+
+	enabled_vh = ufshcd_readl(hba, MHCTRL) & MHCTRL_EN_VH_MASK;
+
+	/* Send physical host ready message to virtual hosts */
+	ufshcd_writel(hba, MH_MSG(enabled_vh, MH_MSG_PH_READY), PH2VH_MBOX);
+
+	return 0;
+}
+
 static int exynos7_ufs_pre_link(struct exynos_ufs *ufs)
 {
 	struct ufs_hba *hba = ufs->hba;
@@ -793,6 +953,27 @@
 	}
 }
 
+static int exynos_ufs_setup_clocks(struct ufs_hba *hba, bool on,
+				   enum ufs_notify_change_status status)
+{
+	struct exynos_ufs *ufs = ufshcd_get_variant(hba);
+
+	if (!ufs)
+		return 0;
+
+	if (on && status == PRE_CHANGE) {
+		if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL)
+			exynos_ufs_disable_auto_ctrl_hcc(ufs);
+		exynos_ufs_ungate_clks(ufs);
+	} else if (!on && status == POST_CHANGE) {
+		exynos_ufs_gate_clks(ufs);
+		if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL)
+			exynos_ufs_enable_auto_ctrl_hcc(ufs);
+	}
+
+	return 0;
+}
+
 static int exynos_ufs_pre_link(struct ufs_hba *hba)
 {
 	struct exynos_ufs *ufs = ufshcd_get_variant(hba);
@@ -808,8 +989,12 @@
 
 	/* m-phy */
 	exynos_ufs_phy_init(ufs);
-	exynos_ufs_config_phy_time_attr(ufs);
-	exynos_ufs_config_phy_cap_attr(ufs);
+	if (!(ufs->opts & EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR)) {
+		exynos_ufs_config_phy_time_attr(ufs);
+		exynos_ufs_config_phy_cap_attr(ufs);
+	}
+
+	exynos_ufs_setup_clocks(hba, true, PRE_CHANGE);
 
 	if (ufs->drv_data->pre_link)
 		ufs->drv_data->pre_link(ufs);
@@ -893,17 +1078,10 @@
 static int exynos_ufs_parse_dt(struct device *dev, struct exynos_ufs *ufs)
 {
 	struct device_node *np = dev->of_node;
-	struct exynos_ufs_drv_data *drv_data = &exynos_ufs_drvs;
 	struct exynos_ufs_uic_attr *attr;
 	int ret = 0;
 
-	while (drv_data->compatible) {
-		if (of_device_is_compatible(np, drv_data->compatible)) {
-			ufs->drv_data = drv_data;
-			break;
-		}
-		drv_data++;
-	}
+	ufs->drv_data = device_get_match_data(dev);
 
 	if (ufs->drv_data && ufs->drv_data->uic_attr) {
 		attr = ufs->drv_data->uic_attr;
@@ -913,6 +1091,17 @@
 		goto out;
 	}
 
+	ufs->sysreg = syscon_regmap_lookup_by_phandle(np, "samsung,sysreg");
+	if (IS_ERR(ufs->sysreg))
+		ufs->sysreg = NULL;
+	else {
+		if (of_property_read_u32_index(np, "samsung,sysreg", 1,
+					       &ufs->shareability_reg_offset)) {
+			dev_warn(dev, "can't get an offset from sysreg. Set to default value\n");
+			ufs->shareability_reg_offset = UFS_SHAREABILITY_OFFSET;
+		}
+	}
+
 	ufs->pclk_avail_min = PCLK_AVAIL_MIN;
 	ufs->pclk_avail_max = PCLK_AVAIL_MAX;
 
@@ -927,6 +1116,18 @@
 	return ret;
 }
 
+static inline void exynos_ufs_priv_init(struct ufs_hba *hba,
+					struct exynos_ufs *ufs)
+{
+	ufs->hba = hba;
+	ufs->opts = ufs->drv_data->opts;
+	ufs->rx_sel_idx = PA_MAXDATALANES;
+	if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX)
+		ufs->rx_sel_idx = 0;
+	hba->priv = (void *)ufs;
+	hba->quirks = ufs->drv_data->quirks;
+}
+
 static int exynos_ufs_init(struct ufs_hba *hba)
 {
 	struct device *dev = hba->dev;
@@ -976,13 +1177,8 @@
 	if (ret)
 		goto phy_off;
 
-	ufs->hba = hba;
-	ufs->opts = ufs->drv_data->opts;
-	ufs->rx_sel_idx = PA_MAXDATALANES;
-	if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX)
-		ufs->rx_sel_idx = 0;
-	hba->priv = (void *)ufs;
-	hba->quirks = ufs->drv_data->quirks;
+	exynos_ufs_priv_init(hba, ufs);
+
 	if (ufs->drv_data->drv_init) {
 		ret = ufs->drv_data->drv_init(dev, ufs);
 		if (ret) {
@@ -1110,6 +1306,12 @@
 
 	switch (status) {
 	case PRE_CHANGE:
+		if (ufs->drv_data->pre_hce_enable) {
+			ret = ufs->drv_data->pre_hce_enable(ufs);
+			if (ret)
+				return ret;
+		}
+
 		ret = exynos_ufs_host_reset(hba);
 		if (ret)
 			return ret;
@@ -1119,6 +1321,10 @@
 		exynos_ufs_calc_pwm_clk_div(ufs);
 		if (!(ufs->opts & EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL))
 			exynos_ufs_enable_auto_ctrl_hcc(ufs);
+
+		if (ufs->drv_data->post_hce_enable)
+			ret = ufs->drv_data->post_hce_enable(ufs);
+
 		break;
 	}
 
@@ -1202,12 +1408,77 @@
 	return 0;
 }
 
+static int exynosauto_ufs_vh_link_startup_notify(struct ufs_hba *hba,
+						 enum ufs_notify_change_status status)
+{
+	if (status == POST_CHANGE) {
+		ufshcd_set_link_active(hba);
+		ufshcd_set_ufs_dev_active(hba);
+	}
+
+	return 0;
+}
+
+static int exynosauto_ufs_vh_wait_ph_ready(struct ufs_hba *hba)
+{
+	u32 mbox;
+	ktime_t start, stop;
+
+	start = ktime_get();
+	stop = ktime_add(start, ms_to_ktime(PH_READY_TIMEOUT_MS));
+
+	do {
+		mbox = ufshcd_readl(hba, PH2VH_MBOX);
+		/* TODO: Mailbox message protocols between the PH and VHs are
+		 * not implemented yet. This will be supported later
+		 */
+		if ((mbox & MH_MSG_MASK) == MH_MSG_PH_READY)
+			return 0;
+
+		usleep_range(40, 50);
+	} while (ktime_before(ktime_get(), stop));
+
+	return -ETIME;
+}
+
+static int exynosauto_ufs_vh_init(struct ufs_hba *hba)
+{
+	struct device *dev = hba->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct exynos_ufs *ufs;
+	int ret;
+
+	ufs = devm_kzalloc(dev, sizeof(*ufs), GFP_KERNEL);
+	if (!ufs)
+		return -ENOMEM;
+
+	/* exynos-specific hci */
+	ufs->reg_hci = devm_platform_ioremap_resource_byname(pdev, "vs_hci");
+	if (IS_ERR(ufs->reg_hci)) {
+		dev_err(dev, "cannot ioremap for hci vendor register\n");
+		return PTR_ERR(ufs->reg_hci);
+	}
+
+	ret = exynosauto_ufs_vh_wait_ph_ready(hba);
+	if (ret)
+		return ret;
+
+	ufs->drv_data = device_get_match_data(dev);
+	if (!ufs->drv_data)
+		return -ENODEV;
+
+	exynos_ufs_priv_init(hba, ufs);
+
+	return 0;
+}
+
 static struct ufs_hba_variant_ops ufs_hba_exynos_ops = {
 	.name				= "exynos_ufs",
 	.init				= exynos_ufs_init,
 	.hce_enable_notify		= exynos_ufs_hce_enable_notify,
 	.link_startup_notify		= exynos_ufs_link_startup_notify,
 	.pwr_change_notify		= exynos_ufs_pwr_change_notify,
+	.setup_clocks			= exynos_ufs_setup_clocks,
 	.setup_xfer_req			= exynos_ufs_specify_nexus_t_xfer_req,
 	.setup_task_mgmt		= exynos_ufs_specify_nexus_t_tm_req,
 	.hibern8_notify			= exynos_ufs_hibern8_notify,
@@ -1215,12 +1486,24 @@
 	.resume				= exynos_ufs_resume,
 };
 
+static struct ufs_hba_variant_ops ufs_hba_exynosauto_vh_ops = {
+	.name				= "exynosauto_ufs_vh",
+	.init				= exynosauto_ufs_vh_init,
+	.link_startup_notify		= exynosauto_ufs_vh_link_startup_notify,
+};
+
 static int exynos_ufs_probe(struct platform_device *pdev)
 {
 	int err;
 	struct device *dev = &pdev->dev;
+	const struct ufs_hba_variant_ops *vops = &ufs_hba_exynos_ops;
+	const struct exynos_ufs_drv_data *drv_data =
+		device_get_match_data(dev);
 
-	err = ufshcd_pltfrm_init(pdev, &ufs_hba_exynos_ops);
+	if (drv_data && drv_data->vops)
+		vops = drv_data->vops;
+
+	err = ufshcd_pltfrm_init(pdev, vops);
 	if (err)
 		dev_err(dev, "ufshcd_pltfrm_init() failed %d\n", err);
 
@@ -1261,8 +1544,35 @@
 	.pa_dbg_option_suite		= 0x30103,
 };
 
+static struct exynos_ufs_drv_data exynosauto_ufs_drvs = {
+	.uic_attr		= &exynos7_uic_attr,
+	.quirks			= UFSHCD_QUIRK_PRDT_BYTE_GRAN |
+				  UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR |
+				  UFSHCD_QUIRK_BROKEN_OCS_FATAL_ERROR |
+				  UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING,
+	.opts			= EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL |
+				  EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR |
+				  EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX,
+	.drv_init		= exynosauto_ufs_drv_init,
+	.post_hce_enable	= exynosauto_ufs_post_hce_enable,
+	.pre_link		= exynosauto_ufs_pre_link,
+	.pre_pwr_change		= exynosauto_ufs_pre_pwr_change,
+	.post_pwr_change	= exynosauto_ufs_post_pwr_change,
+};
+
+static struct exynos_ufs_drv_data exynosauto_ufs_vh_drvs = {
+	.vops			= &ufs_hba_exynosauto_vh_ops,
+	.quirks			= UFSHCD_QUIRK_PRDT_BYTE_GRAN |
+				  UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR |
+				  UFSHCD_QUIRK_BROKEN_OCS_FATAL_ERROR |
+				  UFSHCI_QUIRK_BROKEN_HCE |
+				  UFSHCD_QUIRK_BROKEN_UIC_CMD |
+				  UFSHCD_QUIRK_SKIP_PH_CONFIGURATION |
+				  UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING,
+	.opts			= EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX,
+};
+
 static struct exynos_ufs_drv_data exynos_ufs_drvs = {
-	.compatible		= "samsung,exynos7-ufs",
 	.uic_attr		= &exynos7_uic_attr,
 	.quirks			= UFSHCD_QUIRK_PRDT_BYTE_GRAN |
 				  UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR |
@@ -1287,6 +1597,10 @@
 static const struct of_device_id exynos_ufs_of_match[] = {
 	{ .compatible = "samsung,exynos7-ufs",
 	  .data	      = &exynos_ufs_drvs },
+	{ .compatible = "samsung,exynosautov9-ufs",
+	  .data	      = &exynosauto_ufs_drvs },
+	{ .compatible = "samsung,exynosautov9-ufs-vh",
+	  .data	      = &exynosauto_ufs_vh_drvs },
 	{},
 };
 
diff --git a/drivers/scsi/ufs/ufs-exynos.h b/drivers/scsi/ufs/ufs-exynos.h
index dadf4fd..1c33e54 100644
--- a/drivers/scsi/ufs/ufs-exynos.h
+++ b/drivers/scsi/ufs/ufs-exynos.h
@@ -56,6 +56,22 @@
 #define TX_GRAN_NVAL_10_08	0x0296
 #define TX_GRAN_NVAL_H(v)	(((v) >> 8) & 0x3)
 
+#define VND_TX_CLK_PRD		0xAA
+#define VND_TX_CLK_PRD_EN	0xA9
+#define VND_TX_LINERESET_PVALUE0	0xAD
+#define VND_TX_LINERESET_PVALUE1	0xAC
+#define VND_TX_LINERESET_PVALUE2	0xAB
+
+#define TX_LINE_RESET_TIME	3200
+
+#define VND_RX_CLK_PRD		0x12
+#define VND_RX_CLK_PRD_EN	0x11
+#define VND_RX_LINERESET_VALUE0	0x1D
+#define VND_RX_LINERESET_VALUE1	0x1C
+#define VND_RX_LINERESET_VALUE2	0x1B
+
+#define RX_LINE_RESET_TIME	1000
+
 #define RX_FILLER_ENABLE	0x0316
 #define RX_FILLER_EN		(1 << 1)
 #define RX_LINERESET_VAL	0x0317
@@ -99,7 +115,7 @@
 #define PA_HIBERN8TIME_VAL	0x20
 
 #define PCLK_AVAIL_MIN	70000000
-#define PCLK_AVAIL_MAX	133000000
+#define PCLK_AVAIL_MAX	167000000
 
 struct exynos_ufs_uic_attr {
 	/* TX Attributes */
@@ -142,7 +158,7 @@
 };
 
 struct exynos_ufs_drv_data {
-	char *compatible;
+	const struct ufs_hba_variant_ops *vops;
 	struct exynos_ufs_uic_attr *uic_attr;
 	unsigned int quirks;
 	unsigned int opts;
@@ -154,6 +170,8 @@
 				struct ufs_pa_layer_attr *pwr);
 	int (*post_pwr_change)(struct exynos_ufs *ufs,
 				struct ufs_pa_layer_attr *pwr);
+	int (*pre_hce_enable)(struct exynos_ufs *ufs);
+	int (*post_hce_enable)(struct exynos_ufs *ufs);
 };
 
 struct ufs_phy_time_cfg {
@@ -191,7 +209,9 @@
 	struct ufs_pa_layer_attr dev_req_params;
 	struct ufs_phy_time_cfg t_cfg;
 	ktime_t entry_hibern8_t;
-	struct exynos_ufs_drv_data *drv_data;
+	const struct exynos_ufs_drv_data *drv_data;
+	struct regmap *sysreg;
+	u32 shareability_reg_offset;
 
 	u32 opts;
 #define EXYNOS_UFS_OPT_HAS_APB_CLK_CTRL		BIT(0)
@@ -199,6 +219,7 @@
 #define EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL	BIT(2)
 #define EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX	BIT(3)
 #define EXYNOS_UFS_OPT_USE_SW_HIBERN8_TIMER	BIT(4)
+#define EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR	BIT(5)
 };
 
 #define for_each_ufs_rx_lane(ufs, i) \
diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index fc5b214..5393b5c 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -1189,6 +1189,7 @@
 	}
 	link = device_link_add(dev, &reset_pdev->dev,
 		DL_FLAG_AUTOPROBE_CONSUMER);
+	put_device(&reset_pdev->dev);
 	if (!link) {
 		dev_notice(dev, "add reset device_link fail\n");
 		goto skip_reset;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5c6a58a..13c09db 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -132,6 +132,14 @@
 	UFSHCD_CAN_QUEUE	= 32,
 };
 
+static const char *const ufshcd_state_name[] = {
+	[UFSHCD_STATE_RESET]			= "reset",
+	[UFSHCD_STATE_OPERATIONAL]		= "operational",
+	[UFSHCD_STATE_ERROR]			= "error",
+	[UFSHCD_STATE_EH_SCHEDULED_FATAL]	= "eh_fatal",
+	[UFSHCD_STATE_EH_SCHEDULED_NON_FATAL]	= "eh_non_fatal",
+};
+
 /* UFSHCD error handling flags */
 enum {
 	UFSHCD_EH_IN_PROGRESS = (1 << 0),
@@ -236,7 +244,6 @@
 static irqreturn_t ufshcd_intr(int irq, void *__hba);
 static int ufshcd_change_power_mode(struct ufs_hba *hba,
 			     struct ufs_pa_layer_attr *pwr_mode);
-static void ufshcd_schedule_eh_work(struct ufs_hba *hba);
 static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
 static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
 static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
@@ -711,7 +718,7 @@
  * This function is used to get the OCS field from UTRD
  * Returns the OCS field in the UTRD
  */
-static inline int ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp)
+static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp)
 {
 	return le32_to_cpu(lrbp->utr_descriptor_ptr->header.dword_2) & MASK_OCS;
 }
@@ -2323,6 +2330,9 @@
 	int ret;
 	unsigned long flags;
 
+	if (hba->quirks & UFSHCD_QUIRK_BROKEN_UIC_CMD)
+		return 0;
+
 	ufshcd_hold(hba, false);
 	mutex_lock(&hba->uic_cmd_mutex);
 	ufshcd_add_delay_before_dme_cmd(hba);
@@ -2367,17 +2377,24 @@
 					sizeof(struct ufshcd_sg_entry)));
 		else
 			lrbp->utr_descriptor_ptr->prd_table_length =
-				cpu_to_le16((u16) (sg_segments));
+				cpu_to_le16(sg_segments);
 
-		prd_table = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr;
+		prd_table = lrbp->ucd_prdt_ptr;
 
 		scsi_for_each_sg(cmd, sg, sg_segments, i) {
-			prd_table[i].size  =
-				cpu_to_le32(((u32) sg_dma_len(sg))-1);
-			prd_table[i].base_addr =
-				cpu_to_le32(lower_32_bits(sg->dma_address));
-			prd_table[i].upper_addr =
-				cpu_to_le32(upper_32_bits(sg->dma_address));
+			const unsigned int len = sg_dma_len(sg);
+
+			/*
+			 * From the UFSHCI spec: "Data Byte Count (DBC): A '0'
+			 * based value that indicates the length, in bytes, of
+			 * the data block. A maximum of length of 256KB may
+			 * exist for any entry. Bits 1:0 of this field shall be
+			 * 11b to indicate Dword granularity. A value of '3'
+			 * indicates 4 bytes, '7' indicates 8 bytes, etc."
+			 */
+			WARN_ONCE(len > 256 * 1024, "len = %#x\n", len);
+			prd_table[i].size = cpu_to_le32(len - 1);
+			prd_table[i].addr = cpu_to_le64(sg->dma_address);
 			prd_table[i].reserved = 0;
 		}
 	} else {
@@ -2661,7 +2678,7 @@
 	lrb->ucd_req_dma_addr = cmd_desc_element_addr;
 	lrb->ucd_rsp_ptr = (struct utp_upiu_rsp *)cmd_descp[i].response_upiu;
 	lrb->ucd_rsp_dma_addr = cmd_desc_element_addr + response_offset;
-	lrb->ucd_prdt_ptr = (struct ufshcd_sg_entry *)cmd_descp[i].prd_table;
+	lrb->ucd_prdt_ptr = cmd_descp[i].prd_table;
 	lrb->ucd_prdt_dma_addr = cmd_desc_element_addr + prdt_offset;
 }
 
@@ -5084,7 +5101,7 @@
 {
 	int result = 0;
 	int scsi_status;
-	int ocs;
+	enum utp_ocs ocs;
 
 	/* overall command status of utrd */
 	ocs = ufshcd_get_tr_ocs(lrbp);
@@ -5243,11 +5260,9 @@
  * __ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
  * @completed_reqs: bitmask that indicates which requests to complete
- * @retry_requests: whether to ask the SCSI core to retry completed requests
  */
 static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
-					unsigned long completed_reqs,
-					bool retry_requests)
+					unsigned long completed_reqs)
 {
 	struct ufshcd_lrb *lrbp;
 	struct scsi_cmnd *cmd;
@@ -5263,8 +5278,7 @@
 			if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
 				ufshcd_update_monitor(hba, lrbp);
 			ufshcd_add_command_trace(hba, index, UFS_CMD_COMP);
-			result = retry_requests ? DID_BUS_BUSY << 16 :
-				ufshcd_transfer_rsp_status(hba, lrbp);
+			result = ufshcd_transfer_rsp_status(hba, lrbp);
 			scsi_dma_unmap(cmd);
 			cmd->result = result;
 			/* Mark completed command as NULL in LRB */
@@ -5290,14 +5304,12 @@
 /**
  * ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
- * @retry_requests: whether or not to ask to retry requests
  *
  * Returns
  *  IRQ_HANDLED - If interrupt is valid
  *  IRQ_NONE    - If invalid interrupt
  */
-static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba,
-					     bool retry_requests)
+static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
 {
 	unsigned long completed_reqs, flags;
 	u32 tr_doorbell;
@@ -5326,8 +5338,7 @@
 	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
 
 	if (completed_reqs) {
-		__ufshcd_transfer_req_compl(hba, completed_reqs,
-					    retry_requests);
+		__ufshcd_transfer_req_compl(hba, completed_reqs);
 		return IRQ_HANDLED;
 	} else {
 		return IRQ_NONE;
@@ -5826,13 +5837,7 @@
 /* Complete requests that have door-bell cleared */
 static void ufshcd_complete_requests(struct ufs_hba *hba)
 {
-	ufshcd_transfer_req_compl(hba, /*retry_requests=*/false);
-	ufshcd_tmc_handler(hba);
-}
-
-static void ufshcd_retry_aborted_requests(struct ufs_hba *hba)
-{
-	ufshcd_transfer_req_compl(hba, /*retry_requests=*/true);
+	ufshcd_transfer_req_compl(hba);
 	ufshcd_tmc_handler(hba);
 }
 
@@ -5914,9 +5919,10 @@
 	       (hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK));
 }
 
-/* host lock must be held before calling this func */
-static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
+void ufshcd_schedule_eh_work(struct ufs_hba *hba)
 {
+	lockdep_assert_held(hba->host->host_lock);
+
 	/* handle fatal errors only when link is not in error state */
 	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
 		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
@@ -6076,6 +6082,13 @@
 
 	hba = container_of(work, struct ufs_hba, eh_work);
 
+	dev_info(hba->dev,
+		 "%s started; HBA state %s; powered %d; shutting down %d; saved_err = %d; saved_uic_err = %d; force_reset = %d%s\n",
+		 __func__, ufshcd_state_name[hba->ufshcd_state],
+		 hba->is_powered, hba->shutting_down, hba->saved_err,
+		 hba->saved_uic_err, hba->force_reset,
+		 ufshcd_is_link_broken(hba) ? "; link is broken" : "");
+
 	down(&hba->host_sem);
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (ufshcd_err_handling_should_stop(hba)) {
@@ -6170,6 +6183,8 @@
 			err_xfer = true;
 			goto lock_skip_pending_xfer_clear;
 		}
+		dev_err(hba->dev, "Aborted tag %d / CDB %#02x\n", tag,
+			hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1);
 	}
 
 	/* Clear pending task management requests */
@@ -6181,7 +6196,8 @@
 	}
 
 lock_skip_pending_xfer_clear:
-	ufshcd_retry_aborted_requests(hba);
+	/* Complete the requests that are cleared by s/w */
+	ufshcd_complete_requests(hba);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->silence_err_logs = false;
@@ -6249,6 +6265,9 @@
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	ufshcd_err_handling_unprepare(hba);
 	up(&hba->host_sem);
+
+	dev_info(hba->dev, "%s finished; HBA state %s\n", __func__,
+		 ufshcd_state_name[hba->ufshcd_state]);
 }
 
 /**
@@ -6434,9 +6453,8 @@
 	irqreturn_t ret = IRQ_NONE;
 	int tag;
 
-	pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-
 	spin_lock_irqsave(hba->host->host_lock, flags);
+	pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
 	issued = hba->outstanding_tasks & ~pending;
 	for_each_set_bit(tag, &issued, hba->nutmrs) {
 		struct request *req = hba->tmf_rqs[tag];
@@ -6473,7 +6491,7 @@
 		retval |= ufshcd_tmc_handler(hba);
 
 	if (intr_status & UTP_TRANSFER_REQ_COMPL)
-		retval |= ufshcd_transfer_req_compl(hba, /*retry_requests=*/false);
+		retval |= ufshcd_transfer_req_compl(hba);
 
 	return retval;
 }
@@ -6545,6 +6563,10 @@
 	err = ufshcd_wait_for_register(hba,
 			REG_UTP_TASK_REQ_DOOR_BELL,
 			mask, 0, 1000, 1000);
+
+	dev_err(hba->dev, "Clearing task management function with tag %d %s\n",
+		tag, err ? "succeeded" : "failed");
+
 out:
 	return err;
 }
@@ -6593,11 +6615,6 @@
 	err = wait_for_completion_io_timeout(&wait,
 			msecs_to_jiffies(TM_CMD_TIMEOUT));
 	if (!err) {
-		/*
-		 * Make sure that ufshcd_compl_tm() does not trigger a
-		 * use-after-free.
-		 */
-		req->end_io_data = NULL;
 		ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_ERR);
 		dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
 				__func__, tm_function);
@@ -6637,7 +6654,8 @@
 		u8 tm_function, u8 *tm_response)
 {
 	struct utp_task_req_desc treq = { { 0 }, };
-	int ocs_value, err;
+	enum utp_ocs ocs_value;
+	int err;
 
 	/* Configure task request descriptor */
 	treq.header.dword_0 = cpu_to_le32(UTP_REQ_DESC_INT_CMD);
@@ -6815,7 +6833,7 @@
 	int err;
 	enum dev_cmd_type cmd_type = DEV_CMD_TYPE_QUERY;
 	struct utp_task_req_desc treq = { { 0 }, };
-	int ocs_value;
+	enum utp_ocs ocs_value;
 	u8 tm_f = be32_to_cpu(req_upiu->header.dword_1) >> 16 & MASK_TM_FUNC;
 
 	switch (msgcode) {
@@ -6893,7 +6911,7 @@
 			err = ufshcd_clear_cmd(hba, pos);
 			if (err)
 				break;
-			__ufshcd_transfer_req_compl(hba, 1U << pos, false);
+			__ufshcd_transfer_req_compl(hba, 1U << pos);
 		}
 	}
 
@@ -7055,7 +7073,7 @@
 		dev_err(hba->dev,
 		"%s: cmd was completed, but without a notifying intr, tag = %d",
 		__func__, tag);
-		__ufshcd_transfer_req_compl(hba, 1UL << tag, /*retry_requests=*/false);
+		__ufshcd_transfer_req_compl(hba, 1UL << tag);
 		goto release;
 	}
 
@@ -7092,6 +7110,7 @@
 		goto release;
 	}
 
+	lrbp->cmd = NULL;
 	err = SUCCESS;
 
 release:
@@ -7121,7 +7140,7 @@
 	ufshpb_reset_host(hba);
 	ufshcd_hba_stop(hba);
 	hba->silence_err_logs = true;
-	ufshcd_retry_aborted_requests(hba);
+	ufshcd_complete_requests(hba);
 	hba->silence_err_logs = false;
 
 	/* scale up clocks to max frequency before full reinitialization */
@@ -8002,6 +8021,9 @@
 	if (ret)
 		goto out;
 
+	if (hba->quirks & UFSHCD_QUIRK_SKIP_PH_CONFIGURATION)
+		goto out;
+
 	/* Debug counters initialization */
 	ufshcd_clear_dbg_ufs_stats(hba);
 
@@ -9772,6 +9794,11 @@
 {
 	int ret;
 
+	/* Verify that there are no gaps in struct utp_transfer_cmd_desc. */
+	static_assert(sizeof(struct utp_transfer_cmd_desc) ==
+		      2 * ALIGNED_UPIU_SIZE +
+			      SG_ALL * sizeof(struct ufshcd_sg_entry));
+
 	ufs_debugfs_init();
 
 	ret = scsi_register_driver(&ufs_dev_wlun_template.gendrv);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index dd76586..54750d7 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -589,6 +589,18 @@
 	 * This quirk allows only sg entries aligned with page size.
 	 */
 	UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE		= 1 << 14,
+
+	/*
+	 * This quirk needs to be enabled if the host controller does not
+	 * support UIC command
+	 */
+	UFSHCD_QUIRK_BROKEN_UIC_CMD			= 1 << 15,
+
+	/*
+	 * This quirk needs to be enabled if the host controller cannot
+	 * support physical host configuration.
+	 */
+	UFSHCD_QUIRK_SKIP_PH_CONFIGURATION		= 1 << 16,
 };
 
 enum ufshcd_caps {
@@ -1023,6 +1035,7 @@
 void ufshcd_parse_dev_ref_clk_freq(struct ufs_hba *hba, struct clk *refclk);
 void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val);
 void ufshcd_hba_stop(struct ufs_hba *hba);
+void ufshcd_schedule_eh_work(struct ufs_hba *hba);
 
 static inline void check_upiu_size(void)
 {
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index de95be5..6a295c8 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -389,7 +389,7 @@
 };
 
 /* Overall command status values */
-enum {
+enum utp_ocs {
 	OCS_SUCCESS			= 0x0,
 	OCS_INVALID_CMD_TABLE_ATTR	= 0x1,
 	OCS_INVALID_PRDT_ATTR		= 0x2,
@@ -402,6 +402,9 @@
 	OCS_INVALID_CRYPTO_CONFIG	= 0x9,
 	OCS_GENERAL_CRYPTO_ERROR	= 0xA,
 	OCS_INVALID_COMMAND_STATUS	= 0x0F,
+};
+
+enum {
 	MASK_OCS			= 0x0F,
 };
 
@@ -412,20 +415,18 @@
 
 /**
  * struct ufshcd_sg_entry - UFSHCI PRD Entry
- * @base_addr: Lower 32bit physical address DW-0
- * @upper_addr: Upper 32bit physical address DW-1
+ * @addr: Physical address; DW-0 and DW-1.
  * @reserved: Reserved for future use DW-2
  * @size: size of physical segment DW-3
  */
 struct ufshcd_sg_entry {
-	__le32    base_addr;
-	__le32    upper_addr;
+	__le64    addr;
 	__le32    reserved;
 	__le32    size;
 };
 
 /**
- * struct utp_transfer_cmd_desc - UFS Command Descriptor structure
+ * struct utp_transfer_cmd_desc - UTP Command Descriptor (UCD)
  * @command_upiu: Command UPIU Frame address
  * @response_upiu: Response UPIU Frame address
  * @prd_table: Physical Region Descriptor
@@ -451,7 +452,7 @@
 };
 
 /**
- * struct utp_transfer_req_desc - UTRD structure
+ * struct utp_transfer_req_desc - UTP Transfer Request Descriptor (UTRD)
  * @header: UTRD header DW-0 to DW-3
  * @command_desc_base_addr_lo: UCD base address low DW-4
  * @command_desc_base_addr_hi: UCD base address high DW-5
diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
index f4eca441..ded5ba9 100644
--- a/drivers/scsi/ufs/ufshpb.c
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -331,7 +331,7 @@
 	cdb[0] = UFSHPB_READ;
 
 	if (hba->dev_quirks & UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ)
-		ppn_tmp = swab64(ppn);
+		ppn_tmp = (__force __be64)swab64((__force u64)ppn);
 
 	/* ppn value is stored as big-endian in the host memory */
 	memcpy(&cdb[6], &ppn_tmp, sizeof(__be64));
@@ -394,8 +394,6 @@
 	if (!ufshpb_is_supported_chunk(hpb, transfer_len))
 		return 0;
 
-	WARN_ON_ONCE(transfer_len > HPB_MULTI_CHUNK_HIGH);
-
 	if (hpb->is_hcm) {
 		/*
 		 * in host control mode, reads are the main source for
@@ -1572,7 +1570,7 @@
 	if (ufshpb_is_legacy(hba))
 		hpb->pre_req_max_tr_len = HPB_LEGACY_CHUNK_HIGH;
 	else
-		hpb->pre_req_max_tr_len = HPB_MULTI_CHUNK_HIGH;
+		hpb->pre_req_max_tr_len = hpb_dev_info->max_hpb_single_cmd;
 
 	hpb->lu_pinned_start = hpb_lu_info->pinned_start;
 	hpb->lu_pinned_end = hpb_lu_info->num_pinned ?
@@ -2582,7 +2580,7 @@
 {
 	struct ufshpb_dev_info *hpb_dev_info = &hba->ufshpb_dev;
 	int version, ret;
-	u32 max_hpb_single_cmd = HPB_MULTI_CHUNK_LOW;
+	int max_single_cmd;
 
 	hpb_dev_info->control_mode = desc_buf[DEVICE_DESC_PARAM_HPB_CONTROL];
 
@@ -2598,18 +2596,22 @@
 	if (version == HPB_SUPPORT_LEGACY_VERSION)
 		hpb_dev_info->is_legacy = true;
 
-	ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR,
-		QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD, 0, 0, &max_hpb_single_cmd);
-	if (ret)
-		dev_err(hba->dev, "%s: idn: read max size of single hpb cmd query request failed",
-			__func__);
-	hpb_dev_info->max_hpb_single_cmd = max_hpb_single_cmd;
-
 	/*
 	 * Get the number of user logical unit to check whether all
 	 * scsi_device finish initialization
 	 */
 	hpb_dev_info->num_lu = desc_buf[DEVICE_DESC_PARAM_NUM_LU];
+
+	if (hpb_dev_info->is_legacy)
+		return;
+
+	ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR,
+		QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD, 0, 0, &max_single_cmd);
+
+	if (ret)
+		hpb_dev_info->max_hpb_single_cmd = HPB_LEGACY_CHUNK_HIGH;
+	else
+		hpb_dev_info->max_hpb_single_cmd = min(max_single_cmd + 1, HPB_MULTI_CHUNK_HIGH);
 }
 
 void ufshpb_init(struct ufs_hba *hba)
diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h
index f15d8fd..b475dbd7 100644
--- a/drivers/scsi/ufs/ufshpb.h
+++ b/drivers/scsi/ufs/ufshpb.h
@@ -31,7 +31,6 @@
 
 /* hpb support chunk size */
 #define HPB_LEGACY_CHUNK_HIGH			1
-#define HPB_MULTI_CHUNK_LOW			7
 #define HPB_MULTI_CHUNK_HIGH			255
 
 /* hpb vender defined opcode */
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 19f7d7b..28e1d98 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -977,7 +977,6 @@
 static struct virtio_driver virtio_scsi_driver = {
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
-	.suppress_used_validation = true,
 	.driver.name = KBUILD_MODNAME,
 	.driver.owner = THIS_MODULE,
 	.id_table = id_table,
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index bd0fbcd..e24e220 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -834,8 +834,10 @@
 
 	maple_queue_cache = KMEM_CACHE(maple_buffer, SLAB_HWCACHE_ALIGN);
 
-	if (!maple_queue_cache)
+	if (!maple_queue_cache) {
+		retval = -ENOMEM;
 		goto cleanup_bothirqs;
+	}
 
 	INIT_LIST_HEAD(&maple_waitq);
 	INIT_LIST_HEAD(&maple_sentq);
@@ -848,6 +850,7 @@
 		if (!mdev[i]) {
 			while (i-- > 0)
 				maple_free_dev(mdev[i]);
+			retval = -ENOMEM;
 			goto cleanup_cache;
 		}
 		baseunits[i] = mdev[i];
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 8b3d268..b808c94 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -37,6 +37,7 @@
 #define CQSPI_NEEDS_WR_DELAY		BIT(0)
 #define CQSPI_DISABLE_DAC_MODE		BIT(1)
 #define CQSPI_SUPPORT_EXTERNAL_DMA	BIT(2)
+#define CQSPI_NO_SUPPORT_WR_COMPLETION	BIT(3)
 
 /* Capabilities */
 #define CQSPI_SUPPORTS_OCTAL		BIT(0)
@@ -86,6 +87,7 @@
 	struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT];
 	bool			use_dma_read;
 	u32			pd_dev_id;
+	bool			wr_completion;
 };
 
 struct cqspi_driver_platdata {
@@ -996,9 +998,11 @@
 	 * polling on the controller's side. spinand and spi-nor will take
 	 * care of polling the status register.
 	 */
-	reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
-	reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL;
-	writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
+	if (cqspi->wr_completion) {
+		reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
+		reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL;
+		writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
+	}
 
 	reg = readl(reg_base + CQSPI_REG_SIZE);
 	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
@@ -1736,6 +1740,10 @@
 
 	cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk);
 	master->max_speed_hz = cqspi->master_ref_clk_hz;
+
+	/* write completion is supported by default */
+	cqspi->wr_completion = true;
+
 	ddata  = of_device_get_match_data(dev);
 	if (ddata) {
 		if (ddata->quirks & CQSPI_NEEDS_WR_DELAY)
@@ -1747,6 +1755,8 @@
 			cqspi->use_direct_mode = true;
 		if (ddata->quirks & CQSPI_SUPPORT_EXTERNAL_DMA)
 			cqspi->use_dma_read = true;
+		if (ddata->quirks & CQSPI_NO_SUPPORT_WR_COMPLETION)
+			cqspi->wr_completion = false;
 
 		if (of_device_is_compatible(pdev->dev.of_node,
 					    "xlnx,versal-ospi-1.0"))
@@ -1859,6 +1869,10 @@
 	.quirks = CQSPI_DISABLE_DAC_MODE,
 };
 
+static const struct cqspi_driver_platdata socfpga_qspi = {
+	.quirks = CQSPI_NO_SUPPORT_WR_COMPLETION,
+};
+
 static const struct cqspi_driver_platdata versal_ospi = {
 	.hwcaps_mask = CQSPI_SUPPORTS_OCTAL,
 	.quirks = CQSPI_DISABLE_DAC_MODE | CQSPI_SUPPORT_EXTERNAL_DMA,
@@ -1887,6 +1901,10 @@
 		.compatible = "xlnx,versal-ospi-1.0",
 		.data = (void *)&versal_ospi,
 	},
+	{
+		.compatible = "intel,socfpga-qspi",
+		.data = (void *)&socfpga_qspi,
+	},
 	{ /* end of table */ }
 };
 
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 5d98611..c72e501 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -912,7 +912,7 @@
 
 	ret = devm_spi_register_controller(&pdev->dev, controller);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_controller error.\n");
+		dev_err_probe(&pdev->dev, ret, "spi_register_controller error: %i\n", ret);
 		goto out_pm_get;
 	}
 
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 27a446f..e2affaee 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -491,22 +491,26 @@
 	int ret;
 
 	mas->tx = dma_request_chan(mas->dev, "tx");
-	ret = dev_err_probe(mas->dev, IS_ERR(mas->tx), "Failed to get tx DMA ch\n");
-	if (ret < 0)
+	if (IS_ERR(mas->tx)) {
+		ret = dev_err_probe(mas->dev, PTR_ERR(mas->tx),
+				    "Failed to get tx DMA ch\n");
 		goto err_tx;
+	}
 
 	mas->rx = dma_request_chan(mas->dev, "rx");
-	ret = dev_err_probe(mas->dev, IS_ERR(mas->rx), "Failed to get rx DMA ch\n");
-	if (ret < 0)
+	if (IS_ERR(mas->rx)) {
+		ret = dev_err_probe(mas->dev, PTR_ERR(mas->rx),
+				    "Failed to get rx DMA ch\n");
 		goto err_rx;
+	}
 
 	return 0;
 
 err_rx:
-	dma_release_channel(mas->tx);
-	mas->tx = NULL;
-err_tx:
 	mas->rx = NULL;
+	dma_release_channel(mas->tx);
+err_tx:
+	mas->tx = NULL;
 	return ret;
 }
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index b23e675..fdd530b 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3099,12 +3099,6 @@
 
 	device_del(&ctlr->dev);
 
-	/* Release the last reference on the controller if its driver
-	 * has not yet been converted to devm_spi_alloc_master/slave().
-	 */
-	if (!ctlr->devm_allocated)
-		put_device(&ctlr->dev);
-
 	/* free bus id */
 	mutex_lock(&board_lock);
 	if (found == ctlr)
@@ -3113,6 +3107,12 @@
 
 	if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
 		mutex_unlock(&ctlr->add_lock);
+
+	/* Release the last reference on the controller if its driver
+	 * has not yet been converted to devm_spi_alloc_master/slave().
+	 */
+	if (!ctlr->devm_allocated)
+		put_device(&ctlr->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_controller);
 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 59af251..7fec869 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -66,8 +66,6 @@
 
 source "drivers/staging/fwserial/Kconfig"
 
-source "drivers/staging/netlogic/Kconfig"
-
 source "drivers/staging/gs_fpgaboot/Kconfig"
 
 source "drivers/staging/unisys/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 76f4134..e66e19c 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -10,7 +10,6 @@
 obj-$(CONFIG_R8712U)		+= rtl8712/
 obj-$(CONFIG_R8188EU)		+= r8188eu/
 obj-$(CONFIG_RTS5208)		+= rts5208/
-obj-$(CONFIG_NETLOGIC_XLR_NET)	+= netlogic/
 obj-$(CONFIG_OCTEON_ETHERNET)	+= octeon/
 obj-$(CONFIG_OCTEON_USB)	+= octeon-usb/
 obj-$(CONFIG_VT6655)		+= vt6655/
diff --git a/drivers/staging/fbtft/fb_ssd1351.c b/drivers/staging/fbtft/fb_ssd1351.c
index cf263a5..6fd549a 100644
--- a/drivers/staging/fbtft/fb_ssd1351.c
+++ b/drivers/staging/fbtft/fb_ssd1351.c
@@ -187,7 +187,6 @@
 	},
 };
 
-#ifdef CONFIG_FB_BACKLIGHT
 static int update_onboard_backlight(struct backlight_device *bd)
 {
 	struct fbtft_par *par = bl_get_data(bd);
@@ -231,9 +230,6 @@
 	if (!par->fbtftops.unregister_backlight)
 		par->fbtftops.unregister_backlight = fbtft_unregister_backlight;
 }
-#else
-static void register_onboard_backlight(struct fbtft_par *par) { };
-#endif
 
 FBTFT_REGISTER_DRIVER(DRVNAME, "solomon,ssd1351", &display);
 
diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c
index ecb5f75f..f2684d2 100644
--- a/drivers/staging/fbtft/fbtft-core.c
+++ b/drivers/staging/fbtft/fbtft-core.c
@@ -128,7 +128,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_FB_BACKLIGHT
 static int fbtft_backlight_update_status(struct backlight_device *bd)
 {
 	struct fbtft_par *par = bl_get_data(bd);
@@ -161,6 +160,7 @@
 		par->info->bl_dev = NULL;
 	}
 }
+EXPORT_SYMBOL(fbtft_unregister_backlight);
 
 static const struct backlight_ops fbtft_bl_ops = {
 	.get_brightness	= fbtft_backlight_get_brightness,
@@ -198,12 +198,7 @@
 	if (!par->fbtftops.unregister_backlight)
 		par->fbtftops.unregister_backlight = fbtft_unregister_backlight;
 }
-#else
-void fbtft_register_backlight(struct fbtft_par *par) { };
-void fbtft_unregister_backlight(struct fbtft_par *par) { };
-#endif
 EXPORT_SYMBOL(fbtft_register_backlight);
-EXPORT_SYMBOL(fbtft_unregister_backlight);
 
 static void fbtft_set_addr_win(struct fbtft_par *par, int xs, int ys, int xe,
 			       int ye)
@@ -853,13 +848,11 @@
 		 fb_info->fix.smem_len >> 10, text1,
 		 HZ / fb_info->fbdefio->delay, text2);
 
-#ifdef CONFIG_FB_BACKLIGHT
 	/* Turn on backlight if available */
 	if (fb_info->bl_dev) {
 		fb_info->bl_dev->props.power = FB_BLANK_UNBLANK;
 		fb_info->bl_dev->ops->update_status(fb_info->bl_dev);
 	}
-#endif
 
 	return 0;
 
diff --git a/drivers/staging/greybus/audio_helper.c b/drivers/staging/greybus/audio_helper.c
index 1ed4772..8437606 100644
--- a/drivers/staging/greybus/audio_helper.c
+++ b/drivers/staging/greybus/audio_helper.c
@@ -192,7 +192,11 @@
 				      unsigned int num_controls)
 {
 	struct snd_card *card = component->card->snd_card;
+	int err;
 
-	return gbaudio_remove_controls(card, component->dev, controls,
-				       num_controls, component->name_prefix);
+	down_write(&card->controls_rwsem);
+	err = gbaudio_remove_controls(card, component->dev, controls,
+				      num_controls, component->name_prefix);
+	up_write(&card->controls_rwsem);
+	return err;
 }
diff --git a/drivers/staging/netlogic/Kconfig b/drivers/staging/netlogic/Kconfig
deleted file mode 100644
index e171260..0000000
--- a/drivers/staging/netlogic/Kconfig
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-config NETLOGIC_XLR_NET
-	tristate "Netlogic XLR/XLS network device"
-	depends on CPU_XLR
-	depends on NETDEVICES
-	select PHYLIB
-	help
-	This driver support Netlogic XLR/XLS on chip gigabit
-	Ethernet.
diff --git a/drivers/staging/netlogic/Makefile b/drivers/staging/netlogic/Makefile
deleted file mode 100644
index 7e2902a..0000000
--- a/drivers/staging/netlogic/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_NETLOGIC_XLR_NET) += xlr_net.o platform_net.o
diff --git a/drivers/staging/netlogic/TODO b/drivers/staging/netlogic/TODO
deleted file mode 100644
index 20e22ec..0000000
--- a/drivers/staging/netlogic/TODO
+++ /dev/null
@@ -1,11 +0,0 @@
-* Implementing 64bit stat counter in software
-* All memory allocation should be changed to DMA allocations
-* Changing comments into linux standard format
-
-Please send patches
-To:
-Ganesan Ramalingam <ganesanr@broadcom.com>
-Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc:
-Jayachandran Chandrashekaran Nair <jchandra@broadcom.com>
-
diff --git a/drivers/staging/netlogic/platform_net.c b/drivers/staging/netlogic/platform_net.c
deleted file mode 100644
index 8be9d0b..0000000
--- a/drivers/staging/netlogic/platform_net.c
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/ioport.h>
-#include <linux/resource.h>
-#include <linux/phy.h>
-
-#include <asm/netlogic/haldefs.h>
-#include <asm/netlogic/common.h>
-#include <asm/netlogic/xlr/fmn.h>
-#include <asm/netlogic/xlr/xlr.h>
-#include <asm/netlogic/psb-bootinfo.h>
-#include <asm/netlogic/xlr/pic.h>
-#include <asm/netlogic/xlr/iomap.h>
-
-#include "platform_net.h"
-
-/* Linux Net */
-#define MAX_NUM_GMAC		8
-#define MAX_NUM_XLS_GMAC	8
-#define MAX_NUM_XLR_GMAC	4
-
-static u32 xlr_gmac_offsets[] = {
-	NETLOGIC_IO_GMAC_0_OFFSET, NETLOGIC_IO_GMAC_1_OFFSET,
-	NETLOGIC_IO_GMAC_2_OFFSET, NETLOGIC_IO_GMAC_3_OFFSET,
-	NETLOGIC_IO_GMAC_4_OFFSET, NETLOGIC_IO_GMAC_5_OFFSET,
-	NETLOGIC_IO_GMAC_6_OFFSET, NETLOGIC_IO_GMAC_7_OFFSET
-};
-
-static u32 xlr_gmac_irqs[] = { PIC_GMAC_0_IRQ, PIC_GMAC_1_IRQ,
-	PIC_GMAC_2_IRQ, PIC_GMAC_3_IRQ,
-	PIC_GMAC_4_IRQ, PIC_GMAC_5_IRQ,
-	PIC_GMAC_6_IRQ, PIC_GMAC_7_IRQ
-};
-
-static struct resource xlr_net0_res[8];
-static struct resource xlr_net1_res[8];
-static u32 __iomem *gmac4_addr;
-static u32 __iomem *gpio_addr;
-
-static void xlr_resource_init(struct resource *res, int offset, int irq)
-{
-	res->name = "gmac";
-
-	res->start = CPHYSADDR(nlm_mmio_base(offset));
-	res->end = res->start + 0xfff;
-	res->flags = IORESOURCE_MEM;
-
-	res++;
-	res->name = "gmac";
-	res->start = irq;
-	res->end = irq;
-	res->flags = IORESOURCE_IRQ;
-}
-
-static struct platform_device *gmac_controller2_init(void *gmac0_addr)
-{
-	int mac;
-	static struct xlr_net_data ndata1 = {
-		.phy_interface	= PHY_INTERFACE_MODE_SGMII,
-		.rfr_station	= FMN_STNID_GMAC1_FR_0,
-		.bucket_size	= xlr_board_fmn_config.bucket_size,
-		.gmac_fmn_info	= &xlr_board_fmn_config.gmac[1],
-	};
-
-	static struct platform_device xlr_net_dev1 = {
-		.name		= "xlr-net",
-		.id		= 1,
-		.dev.platform_data = &ndata1,
-	};
-
-	gmac4_addr =
-		ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GMAC_4_OFFSET)),
-			0xfff);
-	ndata1.serdes_addr = gmac4_addr;
-	ndata1.pcs_addr	= gmac4_addr;
-	ndata1.mii_addr	= gmac0_addr;
-	ndata1.gpio_addr = gpio_addr;
-	ndata1.cpu_mask = nlm_current_node()->coremask;
-
-	xlr_net_dev1.resource = xlr_net1_res;
-
-	for (mac = 0; mac < 4; mac++) {
-		ndata1.tx_stnid[mac] = FMN_STNID_GMAC1_TX0 + mac;
-		ndata1.phy_addr[mac] = mac + 4 + 0x10;
-
-		xlr_resource_init(&xlr_net1_res[mac * 2],
-				  xlr_gmac_offsets[mac + 4],
-				  xlr_gmac_irqs[mac + 4]);
-	}
-	xlr_net_dev1.num_resources = 8;
-
-	return &xlr_net_dev1;
-}
-
-static void xls_gmac_init(void)
-{
-	int mac;
-	struct platform_device *xlr_net_dev1;
-	void __iomem *gmac0_addr =
-		ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GMAC_0_OFFSET)),
-			0xfff);
-
-	static struct xlr_net_data ndata0 = {
-		.rfr_station	= FMN_STNID_GMACRFR_0,
-		.bucket_size	= xlr_board_fmn_config.bucket_size,
-		.gmac_fmn_info	= &xlr_board_fmn_config.gmac[0],
-	};
-
-	static struct platform_device xlr_net_dev0 = {
-		.name		= "xlr-net",
-		.id		= 0,
-	};
-	xlr_net_dev0.dev.platform_data = &ndata0;
-	ndata0.serdes_addr = gmac0_addr;
-	ndata0.pcs_addr	= gmac0_addr;
-	ndata0.mii_addr	= gmac0_addr;
-
-	/* Passing GPIO base for serdes init. Only needed on sgmii ports */
-	gpio_addr =
-		ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GPIO_OFFSET)),
-			0xfff);
-	ndata0.gpio_addr = gpio_addr;
-	ndata0.cpu_mask = nlm_current_node()->coremask;
-
-	xlr_net_dev0.resource = xlr_net0_res;
-
-	switch (nlm_prom_info.board_major_version) {
-	case 12:
-		/* first block RGMII or XAUI, use RGMII */
-		ndata0.phy_interface = PHY_INTERFACE_MODE_RGMII;
-		ndata0.tx_stnid[0] = FMN_STNID_GMAC0_TX0;
-		ndata0.phy_addr[0] = 0;
-
-		xlr_net_dev0.num_resources = 2;
-
-		xlr_resource_init(&xlr_net0_res[0], xlr_gmac_offsets[0],
-				  xlr_gmac_irqs[0]);
-		platform_device_register(&xlr_net_dev0);
-
-		/* second block is XAUI, not supported yet */
-		break;
-	default:
-		/* default XLS config, all ports SGMII */
-		ndata0.phy_interface = PHY_INTERFACE_MODE_SGMII;
-		for (mac = 0; mac < 4; mac++) {
-			ndata0.tx_stnid[mac] = FMN_STNID_GMAC0_TX0 + mac;
-			ndata0.phy_addr[mac] = mac + 0x10;
-
-			xlr_resource_init(&xlr_net0_res[mac * 2],
-					  xlr_gmac_offsets[mac],
-					xlr_gmac_irqs[mac]);
-		}
-		xlr_net_dev0.num_resources = 8;
-		platform_device_register(&xlr_net_dev0);
-
-		xlr_net_dev1 = gmac_controller2_init(gmac0_addr);
-		platform_device_register(xlr_net_dev1);
-	}
-}
-
-static void xlr_gmac_init(void)
-{
-	int mac;
-
-	/* assume all GMACs for now */
-	static struct xlr_net_data ndata0 = {
-		.phy_interface	= PHY_INTERFACE_MODE_RGMII,
-		.serdes_addr	= NULL,
-		.pcs_addr	= NULL,
-		.rfr_station	= FMN_STNID_GMACRFR_0,
-		.bucket_size	= xlr_board_fmn_config.bucket_size,
-		.gmac_fmn_info	= &xlr_board_fmn_config.gmac[0],
-		.gpio_addr	= NULL,
-	};
-
-	static struct platform_device xlr_net_dev0 = {
-		.name		= "xlr-net",
-		.id		= 0,
-		.dev.platform_data = &ndata0,
-	};
-	ndata0.mii_addr =
-		ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GMAC_0_OFFSET)),
-			0xfff);
-
-	ndata0.cpu_mask = nlm_current_node()->coremask;
-
-	for (mac = 0; mac < MAX_NUM_XLR_GMAC; mac++) {
-		ndata0.tx_stnid[mac] = FMN_STNID_GMAC0_TX0 + mac;
-		ndata0.phy_addr[mac] = mac;
-		xlr_resource_init(&xlr_net0_res[mac * 2], xlr_gmac_offsets[mac],
-				  xlr_gmac_irqs[mac]);
-	}
-	xlr_net_dev0.num_resources = 8;
-	xlr_net_dev0.resource = xlr_net0_res;
-
-	platform_device_register(&xlr_net_dev0);
-}
-
-static int __init xlr_net_init(void)
-{
-	if (nlm_chip_is_xls())
-		xls_gmac_init();
-	else
-		xlr_gmac_init();
-
-	return 0;
-}
-
-arch_initcall(xlr_net_init);
diff --git a/drivers/staging/netlogic/platform_net.h b/drivers/staging/netlogic/platform_net.h
deleted file mode 100644
index c8d4c13..0000000
--- a/drivers/staging/netlogic/platform_net.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-#define PORTS_PER_CONTROLLER		4
-
-struct xlr_net_data {
-	int cpu_mask;
-	u32 __iomem *mii_addr;
-	u32 __iomem *serdes_addr;
-	u32 __iomem *pcs_addr;
-	u32 __iomem *gpio_addr;
-	int phy_interface;
-	int rfr_station;
-	int tx_stnid[PORTS_PER_CONTROLLER];
-	int *bucket_size;
-	int phy_addr[PORTS_PER_CONTROLLER];
-	struct xlr_fmn_info *gmac_fmn_info;
-};
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
deleted file mode 100644
index 69ea61f..0000000
--- a/drivers/staging/netlogic/xlr_net.c
+++ /dev/null
@@ -1,1080 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-#include <linux/phy.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/smp.h>
-#include <linux/ethtool.h>
-#include <linux/module.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/jiffies.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-
-#include <asm/mipsregs.h>
-/*
- * fmn.h - For FMN credit configuration and registering fmn_handler.
- * FMN is communication mechanism that allows processing agents within
- * XLR/XLS to communicate each other.
- */
-#include <asm/netlogic/xlr/fmn.h>
-
-#include "platform_net.h"
-#include "xlr_net.h"
-
-/*
- * The readl/writel implementation byteswaps on XLR/XLS, so
- * we need to use __raw_ IO to read the NAE registers
- * because they are in the big-endian MMIO area on the SoC.
- */
-static inline void xlr_nae_wreg(u32 __iomem *base, unsigned int reg, u32 val)
-{
-	__raw_writel(val, base + reg);
-}
-
-static inline u32 xlr_nae_rdreg(u32 __iomem *base, unsigned int reg)
-{
-	return __raw_readl(base + reg);
-}
-
-static inline void xlr_reg_update(u32 *base_addr, u32 off, u32 val, u32 mask)
-{
-	u32 tmp;
-
-	tmp = xlr_nae_rdreg(base_addr, off);
-	xlr_nae_wreg(base_addr, off, (tmp & ~mask) | (val & mask));
-}
-
-#define MAC_SKB_BACK_PTR_SIZE SMP_CACHE_BYTES
-
-static int send_to_rfr_fifo(struct xlr_net_priv *priv, void *addr)
-{
-	struct nlm_fmn_msg msg;
-	int ret = 0, num_try = 0, stnid;
-	unsigned long paddr, mflags;
-
-	paddr = virt_to_bus(addr);
-	msg.msg0 = (u64)paddr & 0xffffffffe0ULL;
-	msg.msg1 = 0;
-	msg.msg2 = 0;
-	msg.msg3 = 0;
-	stnid = priv->nd->rfr_station;
-	do {
-		mflags = nlm_cop2_enable_irqsave();
-		ret = nlm_fmn_send(1, 0, stnid, &msg);
-		nlm_cop2_disable_irqrestore(mflags);
-		if (ret == 0)
-			return 0;
-	} while (++num_try < 10000);
-
-	netdev_err(priv->ndev, "Send to RFR failed in RX path\n");
-	return ret;
-}
-
-static inline unsigned char *xlr_alloc_skb(void)
-{
-	struct sk_buff *skb;
-	int buf_len = sizeof(struct sk_buff *);
-	unsigned char *skb_data;
-
-	/* skb->data is cache aligned */
-	skb = alloc_skb(XLR_RX_BUF_SIZE, GFP_ATOMIC);
-	if (!skb)
-		return NULL;
-	skb_data = skb->data;
-	skb_reserve(skb, MAC_SKB_BACK_PTR_SIZE);
-	memcpy(skb_data, &skb, buf_len);
-
-	return skb->data;
-}
-
-static void xlr_net_fmn_handler(int bkt, int src_stnid, int size, int code,
-				struct nlm_fmn_msg *msg, void *arg)
-{
-	struct sk_buff *skb;
-	void *skb_data = NULL;
-	struct net_device *ndev;
-	struct xlr_net_priv *priv;
-	u32 port, length;
-	unsigned char *addr;
-	struct xlr_adapter *adapter = arg;
-
-	length = (msg->msg0 >> 40) & 0x3fff;
-	if (length == 0) {
-		addr = bus_to_virt(msg->msg0 & 0xffffffffffULL);
-		addr = addr - MAC_SKB_BACK_PTR_SIZE;
-		skb = (struct sk_buff *)(*(unsigned long *)addr);
-		dev_kfree_skb_any((struct sk_buff *)addr);
-	} else {
-		addr = (unsigned char *)
-			bus_to_virt(msg->msg0 & 0xffffffffe0ULL);
-		length = length - BYTE_OFFSET - MAC_CRC_LEN;
-		port = ((int)msg->msg0) & 0x0f;
-		addr = addr - MAC_SKB_BACK_PTR_SIZE;
-		skb = (struct sk_buff *)(*(unsigned long *)addr);
-		skb->dev = adapter->netdev[port];
-		if (!skb->dev)
-			return;
-		ndev = skb->dev;
-		priv = netdev_priv(ndev);
-
-		/* 16 byte IP header align */
-		skb_reserve(skb, BYTE_OFFSET);
-		skb_put(skb, length);
-		skb->protocol = eth_type_trans(skb, skb->dev);
-		netif_rx(skb);
-		/* Fill rx ring */
-		skb_data = xlr_alloc_skb();
-		if (skb_data)
-			send_to_rfr_fifo(priv, skb_data);
-	}
-}
-
-static struct phy_device *xlr_get_phydev(struct xlr_net_priv *priv)
-{
-	return mdiobus_get_phy(priv->mii_bus, priv->phy_addr);
-}
-
-/*
- * Ethtool operation
- */
-static int xlr_get_link_ksettings(struct net_device *ndev,
-				  struct ethtool_link_ksettings *ecmd)
-{
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = xlr_get_phydev(priv);
-
-	if (!phydev)
-		return -ENODEV;
-
-	phy_ethtool_ksettings_get(phydev, ecmd);
-
-	return 0;
-}
-
-static int xlr_set_link_ksettings(struct net_device *ndev,
-				  const struct ethtool_link_ksettings *ecmd)
-{
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = xlr_get_phydev(priv);
-
-	if (!phydev)
-		return -ENODEV;
-	return phy_ethtool_ksettings_set(phydev, ecmd);
-}
-
-static const struct ethtool_ops xlr_ethtool_ops = {
-	.get_link_ksettings = xlr_get_link_ksettings,
-	.set_link_ksettings = xlr_set_link_ksettings,
-};
-
-/*
- * Net operations
- */
-static int xlr_net_fill_rx_ring(struct net_device *ndev)
-{
-	void *skb_data;
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	int i;
-
-	for (i = 0; i < MAX_FRIN_SPILL / 4; i++) {
-		skb_data = xlr_alloc_skb();
-		if (!skb_data)
-			return -ENOMEM;
-		send_to_rfr_fifo(priv, skb_data);
-	}
-	netdev_info(ndev, "Rx ring setup done\n");
-	return 0;
-}
-
-static int xlr_net_open(struct net_device *ndev)
-{
-	u32 err;
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = xlr_get_phydev(priv);
-
-	/* schedule a link state check */
-	phy_start(phydev);
-
-	err = phy_start_aneg(phydev);
-	if (err) {
-		pr_err("Autoneg failed\n");
-		return err;
-	}
-	/* Setup the speed from PHY to internal reg*/
-	xlr_set_gmac_speed(priv);
-
-	netif_tx_start_all_queues(ndev);
-
-	return 0;
-}
-
-static int xlr_net_stop(struct net_device *ndev)
-{
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = xlr_get_phydev(priv);
-
-	phy_stop(phydev);
-	netif_tx_stop_all_queues(ndev);
-	return 0;
-}
-
-static void xlr_make_tx_desc(struct nlm_fmn_msg *msg, unsigned long addr,
-			     struct sk_buff *skb)
-{
-	unsigned long physkb = virt_to_phys(skb);
-	int cpu_core = nlm_core_id();
-	int fr_stn_id = cpu_core * 8 + XLR_FB_STN;	/* FB to 6th bucket */
-
-	msg->msg0 = (((u64)1 << 63)	|	/* End of packet descriptor */
-		((u64)127 << 54)	|	/* No Free back */
-		(u64)skb->len << 40	|	/* Length of data */
-		((u64)addr));
-	msg->msg1 = (((u64)1 << 63)	|
-		((u64)fr_stn_id << 54)	|	/* Free back id */
-		(u64)0 << 40		|	/* Set len to 0 */
-		((u64)physkb  & 0xffffffff));	/* 32bit address */
-	msg->msg2 = 0;
-	msg->msg3 = 0;
-}
-
-static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
-				      struct net_device *ndev)
-{
-	struct nlm_fmn_msg msg;
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	int ret;
-	u32 flags;
-
-	xlr_make_tx_desc(&msg, virt_to_phys(skb->data), skb);
-	flags = nlm_cop2_enable_irqsave();
-	ret = nlm_fmn_send(2, 0, priv->tx_stnid, &msg);
-	nlm_cop2_disable_irqrestore(flags);
-	if (ret)
-		dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
-static void xlr_hw_set_mac_addr(struct net_device *ndev)
-{
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-
-	/* set mac station address */
-	xlr_nae_wreg(priv->base_addr, R_MAC_ADDR0,
-		     ((ndev->dev_addr[5] << 24) | (ndev->dev_addr[4] << 16) |
-		     (ndev->dev_addr[3] << 8) | (ndev->dev_addr[2])));
-	xlr_nae_wreg(priv->base_addr, R_MAC_ADDR0 + 1,
-		     ((ndev->dev_addr[1] << 24) | (ndev->dev_addr[0] << 16)));
-
-	xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK2, 0xffffffff);
-	xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK2 + 1, 0xffffffff);
-	xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK3, 0xffffffff);
-	xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK3 + 1, 0xffffffff);
-
-	xlr_nae_wreg(priv->base_addr, R_MAC_FILTER_CONFIG,
-		     (1 << O_MAC_FILTER_CONFIG__BROADCAST_EN) |
-		     (1 << O_MAC_FILTER_CONFIG__ALL_MCAST_EN) |
-		     (1 << O_MAC_FILTER_CONFIG__MAC_ADDR0_VALID));
-
-	if (priv->nd->phy_interface == PHY_INTERFACE_MODE_RGMII ||
-	    priv->nd->phy_interface == PHY_INTERFACE_MODE_SGMII)
-		xlr_reg_update(priv->base_addr, R_IPG_IFG, MAC_B2B_IPG, 0x7f);
-}
-
-static int xlr_net_set_mac_addr(struct net_device *ndev, void *data)
-{
-	int err;
-
-	err = eth_mac_addr(ndev, data);
-	if (err)
-		return err;
-	xlr_hw_set_mac_addr(ndev);
-	return 0;
-}
-
-static void xlr_set_rx_mode(struct net_device *ndev)
-{
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	u32 regval;
-
-	regval = xlr_nae_rdreg(priv->base_addr, R_MAC_FILTER_CONFIG);
-
-	if (ndev->flags & IFF_PROMISC) {
-		regval |= (1 << O_MAC_FILTER_CONFIG__BROADCAST_EN) |
-		(1 << O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN) |
-		(1 << O_MAC_FILTER_CONFIG__ALL_MCAST_EN) |
-		(1 << O_MAC_FILTER_CONFIG__ALL_UCAST_EN);
-	} else {
-		regval &= ~((1 << O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN) |
-		(1 << O_MAC_FILTER_CONFIG__ALL_UCAST_EN));
-	}
-
-	xlr_nae_wreg(priv->base_addr, R_MAC_FILTER_CONFIG, regval);
-}
-
-static void xlr_stats(struct net_device *ndev, struct rtnl_link_stats64 *stats)
-{
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-
-	stats->rx_packets = xlr_nae_rdreg(priv->base_addr, RX_PACKET_COUNTER);
-	stats->tx_packets = xlr_nae_rdreg(priv->base_addr, TX_PACKET_COUNTER);
-	stats->rx_bytes = xlr_nae_rdreg(priv->base_addr, RX_BYTE_COUNTER);
-	stats->tx_bytes = xlr_nae_rdreg(priv->base_addr, TX_BYTE_COUNTER);
-	stats->tx_errors = xlr_nae_rdreg(priv->base_addr, TX_FCS_ERROR_COUNTER);
-	stats->rx_dropped = xlr_nae_rdreg(priv->base_addr,
-					  RX_DROP_PACKET_COUNTER);
-	stats->tx_dropped = xlr_nae_rdreg(priv->base_addr,
-					  TX_DROP_FRAME_COUNTER);
-
-	stats->multicast = xlr_nae_rdreg(priv->base_addr,
-					 RX_MULTICAST_PACKET_COUNTER);
-	stats->collisions = xlr_nae_rdreg(priv->base_addr,
-					  TX_TOTAL_COLLISION_COUNTER);
-
-	stats->rx_length_errors = xlr_nae_rdreg(priv->base_addr,
-						RX_FRAME_LENGTH_ERROR_COUNTER);
-	stats->rx_over_errors = xlr_nae_rdreg(priv->base_addr,
-					      RX_DROP_PACKET_COUNTER);
-	stats->rx_crc_errors = xlr_nae_rdreg(priv->base_addr,
-					     RX_FCS_ERROR_COUNTER);
-	stats->rx_frame_errors = xlr_nae_rdreg(priv->base_addr,
-					       RX_ALIGNMENT_ERROR_COUNTER);
-
-	stats->rx_fifo_errors = xlr_nae_rdreg(priv->base_addr,
-					      RX_DROP_PACKET_COUNTER);
-	stats->rx_missed_errors = xlr_nae_rdreg(priv->base_addr,
-						RX_CARRIER_SENSE_ERROR_COUNTER);
-
-	stats->rx_errors = (stats->rx_over_errors + stats->rx_crc_errors +
-			    stats->rx_frame_errors + stats->rx_fifo_errors +
-			    stats->rx_missed_errors);
-
-	stats->tx_aborted_errors = xlr_nae_rdreg(priv->base_addr,
-						 TX_EXCESSIVE_COLLISION_PACKET_COUNTER);
-	stats->tx_carrier_errors = xlr_nae_rdreg(priv->base_addr,
-						 TX_DROP_FRAME_COUNTER);
-	stats->tx_fifo_errors = xlr_nae_rdreg(priv->base_addr,
-					      TX_DROP_FRAME_COUNTER);
-}
-
-static const struct net_device_ops xlr_netdev_ops = {
-	.ndo_open = xlr_net_open,
-	.ndo_stop = xlr_net_stop,
-	.ndo_start_xmit = xlr_net_start_xmit,
-	.ndo_select_queue = dev_pick_tx_cpu_id,
-	.ndo_set_mac_address = xlr_net_set_mac_addr,
-	.ndo_set_rx_mode = xlr_set_rx_mode,
-	.ndo_get_stats64 = xlr_stats,
-};
-
-/*
- * Gmac init
- */
-static void *xlr_config_spill(struct xlr_net_priv *priv, int reg_start_0,
-			      int reg_start_1, int reg_size, int size)
-{
-	void *spill;
-	u32 *base;
-	unsigned long phys_addr;
-	u32 spill_size;
-
-	base = priv->base_addr;
-	spill_size = size;
-	spill = kmalloc(spill_size + SMP_CACHE_BYTES, GFP_KERNEL);
-	if (!spill)
-		return ZERO_SIZE_PTR;
-
-	spill = PTR_ALIGN(spill, SMP_CACHE_BYTES);
-	phys_addr = virt_to_phys(spill);
-	dev_dbg(&priv->ndev->dev, "Allocated spill %d bytes at %lx\n",
-		size, phys_addr);
-	xlr_nae_wreg(base, reg_start_0, (phys_addr >> 5) & 0xffffffff);
-	xlr_nae_wreg(base, reg_start_1, ((u64)phys_addr >> 37) & 0x07);
-	xlr_nae_wreg(base, reg_size, spill_size);
-
-	return spill;
-}
-
-/*
- * Configure the 6 FIFO's that are used by the network accelarator to
- * communicate with the rest of the XLx device. 4 of the FIFO's are for
- * packets from NA --> cpu (called Class FIFO's) and 2 are for feeding
- * the NA with free descriptors.
- */
-static void xlr_config_fifo_spill_area(struct xlr_net_priv *priv)
-{
-	priv->frin_spill = xlr_config_spill(priv,
-					    R_REG_FRIN_SPILL_MEM_START_0,
-					    R_REG_FRIN_SPILL_MEM_START_1,
-					    R_REG_FRIN_SPILL_MEM_SIZE,
-					    MAX_FRIN_SPILL * sizeof(u64));
-	priv->frout_spill = xlr_config_spill(priv,
-					     R_FROUT_SPILL_MEM_START_0,
-					     R_FROUT_SPILL_MEM_START_1,
-					     R_FROUT_SPILL_MEM_SIZE,
-					     MAX_FROUT_SPILL * sizeof(u64));
-	priv->class_0_spill = xlr_config_spill(priv,
-					       R_CLASS0_SPILL_MEM_START_0,
-					       R_CLASS0_SPILL_MEM_START_1,
-					       R_CLASS0_SPILL_MEM_SIZE,
-					       MAX_CLASS_0_SPILL * sizeof(u64));
-	priv->class_1_spill = xlr_config_spill(priv,
-					       R_CLASS1_SPILL_MEM_START_0,
-					       R_CLASS1_SPILL_MEM_START_1,
-					       R_CLASS1_SPILL_MEM_SIZE,
-					       MAX_CLASS_1_SPILL * sizeof(u64));
-	priv->class_2_spill = xlr_config_spill(priv,
-					       R_CLASS2_SPILL_MEM_START_0,
-					       R_CLASS2_SPILL_MEM_START_1,
-					       R_CLASS2_SPILL_MEM_SIZE,
-					       MAX_CLASS_2_SPILL * sizeof(u64));
-	priv->class_3_spill = xlr_config_spill(priv,
-					       R_CLASS3_SPILL_MEM_START_0,
-					       R_CLASS3_SPILL_MEM_START_1,
-					       R_CLASS3_SPILL_MEM_SIZE,
-					       MAX_CLASS_3_SPILL * sizeof(u64));
-}
-
-/*
- * Configure PDE to Round-Robin distribution of packets to the
- * available cpu
- */
-static void xlr_config_pde(struct xlr_net_priv *priv)
-{
-	int i = 0;
-	u64 bkt_map = 0;
-
-	/* Each core has 8 buckets(station) */
-	for (i = 0; i < hweight32(priv->nd->cpu_mask); i++)
-		bkt_map |= (0xff << (i * 8));
-
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_0, (bkt_map & 0xffffffff));
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_0 + 1,
-		     ((bkt_map >> 32) & 0xffffffff));
-
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_1, (bkt_map & 0xffffffff));
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_1 + 1,
-		     ((bkt_map >> 32) & 0xffffffff));
-
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_2, (bkt_map & 0xffffffff));
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_2 + 1,
-		     ((bkt_map >> 32) & 0xffffffff));
-
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_3, (bkt_map & 0xffffffff));
-	xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_3 + 1,
-		     ((bkt_map >> 32) & 0xffffffff));
-}
-
-/*
- * Setup the Message ring credits, bucket size and other
- * common configuration
- */
-static int xlr_config_common(struct xlr_net_priv *priv)
-{
-	struct xlr_fmn_info *gmac = priv->nd->gmac_fmn_info;
-	int start_stn_id = gmac->start_stn_id;
-	int end_stn_id = gmac->end_stn_id;
-	int *bucket_size = priv->nd->bucket_size;
-	int i, j, err;
-
-	/* Setting non-core MsgBktSize(0x321 - 0x325) */
-	for (i = start_stn_id; i <= end_stn_id; i++) {
-		xlr_nae_wreg(priv->base_addr,
-			     R_GMAC_RFR0_BUCKET_SIZE + i - start_stn_id,
-			     bucket_size[i]);
-	}
-
-	/*
-	 * Setting non-core Credit counter register
-	 * Distributing Gmac's credit to CPU's
-	 */
-	for (i = 0; i < 8; i++) {
-		for (j = 0; j < 8; j++)
-			xlr_nae_wreg(priv->base_addr,
-				     (R_CC_CPU0_0 + (i * 8)) + j,
-				     gmac->credit_config[(i * 8) + j]);
-	}
-
-	xlr_nae_wreg(priv->base_addr, R_MSG_TX_THRESHOLD, 3);
-	xlr_nae_wreg(priv->base_addr, R_DMACR0, 0xffffffff);
-	xlr_nae_wreg(priv->base_addr, R_DMACR1, 0xffffffff);
-	xlr_nae_wreg(priv->base_addr, R_DMACR2, 0xffffffff);
-	xlr_nae_wreg(priv->base_addr, R_DMACR3, 0xffffffff);
-	xlr_nae_wreg(priv->base_addr, R_FREEQCARVE, 0);
-
-	err = xlr_net_fill_rx_ring(priv->ndev);
-	if (err)
-		return err;
-	nlm_register_fmn_handler(start_stn_id, end_stn_id, xlr_net_fmn_handler,
-				 priv->adapter);
-	return 0;
-}
-
-static void xlr_config_translate_table(struct xlr_net_priv *priv)
-{
-	u32 cpu_mask;
-	u32 val;
-	int bkts[32]; /* one bucket is assumed for each cpu */
-	int b1, b2, c1, c2, i, j, k;
-	int use_bkt;
-
-	use_bkt = 0;
-	cpu_mask = priv->nd->cpu_mask;
-
-	pr_info("Using %s-based distribution\n",
-		(use_bkt) ? "bucket" : "class");
-	j = 0;
-	for (i = 0; i < 32; i++) {
-		if ((1 << i) & cpu_mask) {
-			/* for each cpu, mark the 4+threadid bucket */
-			bkts[j] = ((i / 4) * 8) + (i % 4);
-			j++;
-		}
-	}
-
-	/*configure the 128 * 9 Translation table to send to available buckets*/
-	k = 0;
-	c1 = 3;
-	c2 = 0;
-	for (i = 0; i < 64; i++) {
-		/*
-		 * On use_bkt set the b0, b1 are used, else
-		 * the 4 classes are used, here implemented
-		 * a logic to distribute the packets to the
-		 * buckets equally or based on the class
-		 */
-		c1 = (c1 + 1) & 3;
-		c2 = (c1 + 1) & 3;
-		b1 = bkts[k];
-		k = (k + 1) % j;
-		b2 = bkts[k];
-		k = (k + 1) % j;
-
-		val = ((c1 << 23) | (b1 << 17) | (use_bkt << 16) |
-				(c2 << 7) | (b2 << 1) | (use_bkt << 0));
-		dev_dbg(&priv->ndev->dev, "Table[%d] b1=%d b2=%d c1=%d c2=%d\n",
-			i, b1, b2, c1, c2);
-		xlr_nae_wreg(priv->base_addr, R_TRANSLATETABLE + i, val);
-		c1 = c2;
-	}
-}
-
-static void xlr_config_parser(struct xlr_net_priv *priv)
-{
-	u32 val;
-
-	/* Mark it as ETHERNET type */
-	xlr_nae_wreg(priv->base_addr, R_L2TYPE_0, 0x01);
-
-	/* Use 7bit CRChash for flow classification with 127 as CRC polynomial*/
-	xlr_nae_wreg(priv->base_addr, R_PARSERCONFIGREG,
-		     ((0x7f << 8) | (1 << 1)));
-
-	/* configure the parser : L2 Type is configured in the bootloader */
-	/* extract IP: src, dest protocol */
-	xlr_nae_wreg(priv->base_addr, R_L3CTABLE,
-		     (9 << 20) | (1 << 19) | (1 << 18) | (0x01 << 16) |
-		     (0x0800 << 0));
-	xlr_nae_wreg(priv->base_addr, R_L3CTABLE + 1,
-		     (9 << 25) | (1 << 21) | (12 << 14) | (4 << 10) |
-		     (16 << 4) | 4);
-
-	/* Configure to extract SRC port and Dest port for TCP and UDP pkts */
-	xlr_nae_wreg(priv->base_addr, R_L4CTABLE, 6);
-	xlr_nae_wreg(priv->base_addr, R_L4CTABLE + 2, 17);
-	val = ((0 << 21) | (2 << 17) | (2 << 11) | (2 << 7));
-	xlr_nae_wreg(priv->base_addr, R_L4CTABLE + 1, val);
-	xlr_nae_wreg(priv->base_addr, R_L4CTABLE + 3, val);
-
-	xlr_config_translate_table(priv);
-}
-
-static int xlr_phy_write(u32 *base_addr, int phy_addr, int regnum, u16 val)
-{
-	unsigned long timeout, stoptime, checktime;
-	int timedout;
-
-	/* 100ms timeout*/
-	timeout = msecs_to_jiffies(100);
-	stoptime = jiffies + timeout;
-	timedout = 0;
-
-	xlr_nae_wreg(base_addr, R_MII_MGMT_ADDRESS, (phy_addr << 8) | regnum);
-
-	/* Write the data which starts the write cycle */
-	xlr_nae_wreg(base_addr, R_MII_MGMT_WRITE_DATA, (u32)val);
-
-	/* poll for the read cycle to complete */
-	while (!timedout) {
-		checktime = jiffies;
-		if (xlr_nae_rdreg(base_addr, R_MII_MGMT_INDICATORS) == 0)
-			break;
-		timedout = time_after(checktime, stoptime);
-	}
-	if (timedout) {
-		pr_info("Phy device write err: device busy");
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-static int xlr_phy_read(u32 *base_addr, int phy_addr, int regnum)
-{
-	unsigned long timeout, stoptime, checktime;
-	int timedout;
-
-	/* 100ms timeout*/
-	timeout = msecs_to_jiffies(100);
-	stoptime = jiffies + timeout;
-	timedout = 0;
-
-	/* setup the phy reg to be used */
-	xlr_nae_wreg(base_addr, R_MII_MGMT_ADDRESS,
-		     (phy_addr << 8) | (regnum << 0));
-
-	/* Issue the read command */
-	xlr_nae_wreg(base_addr, R_MII_MGMT_COMMAND,
-		     (1 << O_MII_MGMT_COMMAND__rstat));
-
-	/* poll for the read cycle to complete */
-	while (!timedout) {
-		checktime = jiffies;
-		if (xlr_nae_rdreg(base_addr, R_MII_MGMT_INDICATORS) == 0)
-			break;
-		timedout = time_after(checktime, stoptime);
-	}
-	if (timedout) {
-		pr_info("Phy device read err: device busy");
-		return -EBUSY;
-	}
-
-	/* clear the read cycle */
-	xlr_nae_wreg(base_addr, R_MII_MGMT_COMMAND, 0);
-
-	/* Read the data */
-	return xlr_nae_rdreg(base_addr, R_MII_MGMT_STATUS);
-}
-
-static int xlr_mii_write(struct mii_bus *bus, int phy_addr, int regnum, u16 val)
-{
-	struct xlr_net_priv *priv = bus->priv;
-	int ret;
-
-	ret = xlr_phy_write(priv->mii_addr, phy_addr, regnum, val);
-	dev_dbg(&priv->ndev->dev, "mii_write phy %d : %d <- %x [%x]\n",
-		phy_addr, regnum, val, ret);
-	return ret;
-}
-
-static int xlr_mii_read(struct mii_bus *bus, int phy_addr, int regnum)
-{
-	struct xlr_net_priv *priv = bus->priv;
-	int ret;
-
-	ret =  xlr_phy_read(priv->mii_addr, phy_addr, regnum);
-	dev_dbg(&priv->ndev->dev, "mii_read phy %d : %d [%x]\n",
-		phy_addr, regnum, ret);
-	return ret;
-}
-
-/*
- * XLR ports are RGMII. XLS ports are SGMII mostly except the port0,
- * which can be configured either SGMII or RGMII, considered SGMII
- * by default, if board setup to RGMII the port_type need to set
- * accordingly.Serdes and PCS layer need to configured for SGMII
- */
-static void xlr_sgmii_init(struct xlr_net_priv *priv)
-{
-	int phy;
-
-	xlr_phy_write(priv->serdes_addr, 26, 0, 0x6DB0);
-	xlr_phy_write(priv->serdes_addr, 26, 1, 0xFFFF);
-	xlr_phy_write(priv->serdes_addr, 26, 2, 0xB6D0);
-	xlr_phy_write(priv->serdes_addr, 26, 3, 0x00FF);
-	xlr_phy_write(priv->serdes_addr, 26, 4, 0x0000);
-	xlr_phy_write(priv->serdes_addr, 26, 5, 0x0000);
-	xlr_phy_write(priv->serdes_addr, 26, 6, 0x0005);
-	xlr_phy_write(priv->serdes_addr, 26, 7, 0x0001);
-	xlr_phy_write(priv->serdes_addr, 26, 8, 0x0000);
-	xlr_phy_write(priv->serdes_addr, 26, 9, 0x0000);
-	xlr_phy_write(priv->serdes_addr, 26, 10, 0x0000);
-
-	/* program  GPIO values for serdes init parameters */
-	xlr_nae_wreg(priv->gpio_addr, 0x20, 0x7e6802);
-	xlr_nae_wreg(priv->gpio_addr, 0x10, 0x7104);
-
-	xlr_nae_wreg(priv->gpio_addr, 0x22, 0x7e6802);
-	xlr_nae_wreg(priv->gpio_addr, 0x21, 0x7104);
-
-	/* enable autoneg - more magic */
-	phy = priv->phy_addr % 4 + 27;
-	xlr_phy_write(priv->pcs_addr, phy, 0, 0x1000);
-	xlr_phy_write(priv->pcs_addr, phy, 0, 0x0200);
-}
-
-void xlr_set_gmac_speed(struct xlr_net_priv *priv)
-{
-	struct phy_device *phydev = xlr_get_phydev(priv);
-	int speed;
-
-	if (phydev->interface == PHY_INTERFACE_MODE_SGMII)
-		xlr_sgmii_init(priv);
-
-	if (phydev->speed != priv->phy_speed) {
-		speed = phydev->speed;
-		if (speed == SPEED_1000) {
-			/* Set interface to Byte mode */
-			xlr_nae_wreg(priv->base_addr, R_MAC_CONFIG_2, 0x7217);
-			priv->phy_speed = speed;
-		} else if (speed == SPEED_100 || speed == SPEED_10) {
-			/* Set interface to Nibble mode */
-			xlr_nae_wreg(priv->base_addr, R_MAC_CONFIG_2, 0x7117);
-			priv->phy_speed = speed;
-		}
-		/* Set SGMII speed in Interface control reg */
-		if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
-			if (speed == SPEED_10)
-				xlr_nae_wreg(priv->base_addr,
-					     R_INTERFACE_CONTROL,
-					     SGMII_SPEED_10);
-			if (speed == SPEED_100)
-				xlr_nae_wreg(priv->base_addr,
-					     R_INTERFACE_CONTROL,
-					     SGMII_SPEED_100);
-			if (speed == SPEED_1000)
-				xlr_nae_wreg(priv->base_addr,
-					     R_INTERFACE_CONTROL,
-					     SGMII_SPEED_1000);
-		}
-		if (speed == SPEED_10)
-			xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x2);
-		if (speed == SPEED_100)
-			xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x1);
-		if (speed == SPEED_1000)
-			xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x0);
-	}
-	pr_info("gmac%d : %dMbps\n", priv->port_id, priv->phy_speed);
-}
-
-static void xlr_gmac_link_adjust(struct net_device *ndev)
-{
-	struct xlr_net_priv *priv = netdev_priv(ndev);
-	struct phy_device *phydev = xlr_get_phydev(priv);
-	u32 intreg;
-
-	intreg = xlr_nae_rdreg(priv->base_addr, R_INTREG);
-	if (phydev->link) {
-		if (phydev->speed != priv->phy_speed) {
-			xlr_set_gmac_speed(priv);
-			pr_info("gmac%d : Link up\n", priv->port_id);
-		}
-	} else {
-		xlr_set_gmac_speed(priv);
-		pr_info("gmac%d : Link down\n", priv->port_id);
-	}
-}
-
-static int xlr_mii_probe(struct xlr_net_priv *priv)
-{
-	struct phy_device *phydev = xlr_get_phydev(priv);
-
-	if (!phydev) {
-		pr_err("no PHY found on phy_addr %d\n", priv->phy_addr);
-		return -ENODEV;
-	}
-
-	/* Attach MAC to PHY */
-	phydev = phy_connect(priv->ndev, phydev_name(phydev),
-			     xlr_gmac_link_adjust, priv->nd->phy_interface);
-
-	if (IS_ERR(phydev)) {
-		pr_err("could not attach PHY\n");
-		return PTR_ERR(phydev);
-	}
-	phydev->supported &= (ADVERTISED_10baseT_Full
-				| ADVERTISED_10baseT_Half
-				| ADVERTISED_100baseT_Full
-				| ADVERTISED_100baseT_Half
-				| ADVERTISED_1000baseT_Full
-				| ADVERTISED_Autoneg
-				| ADVERTISED_MII);
-
-	phydev->advertising = phydev->supported;
-	phy_attached_info(phydev);
-	return 0;
-}
-
-static int xlr_setup_mdio(struct xlr_net_priv *priv,
-			  struct platform_device *pdev)
-{
-	int err;
-
-	priv->mii_bus = mdiobus_alloc();
-	if (!priv->mii_bus) {
-		pr_err("mdiobus alloc failed\n");
-		return -ENOMEM;
-	}
-
-	priv->mii_bus->priv = priv;
-	priv->mii_bus->name = "xlr-mdio";
-	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d",
-		 priv->mii_bus->name, priv->port_id);
-	priv->mii_bus->read = xlr_mii_read;
-	priv->mii_bus->write = xlr_mii_write;
-	priv->mii_bus->parent = &pdev->dev;
-
-	/* Scan only the enabled address */
-	priv->mii_bus->phy_mask = ~(1 << priv->phy_addr);
-
-	/* setting clock divisor to 54 */
-	xlr_nae_wreg(priv->base_addr, R_MII_MGMT_CONFIG, 0x7);
-
-	err = mdiobus_register(priv->mii_bus);
-	if (err) {
-		mdiobus_free(priv->mii_bus);
-		pr_err("mdio bus registration failed\n");
-		return err;
-	}
-
-	pr_info("Registered mdio bus id : %s\n", priv->mii_bus->id);
-	err = xlr_mii_probe(priv);
-	if (err) {
-		mdiobus_free(priv->mii_bus);
-		return err;
-	}
-	return 0;
-}
-
-static void xlr_port_enable(struct xlr_net_priv *priv)
-{
-	u32 prid = (read_c0_prid() & 0xf000);
-
-	/* Setup MAC_CONFIG reg if (xls & rgmii) */
-	if ((prid == 0x8000 || prid == 0x4000 || prid == 0xc000) &&
-	    priv->nd->phy_interface == PHY_INTERFACE_MODE_RGMII)
-		xlr_reg_update(priv->base_addr, R_RX_CONTROL,
-			       (1 << O_RX_CONTROL__RGMII),
-			       (1 << O_RX_CONTROL__RGMII));
-
-	/* Rx Tx enable */
-	xlr_reg_update(priv->base_addr, R_MAC_CONFIG_1,
-		       ((1 << O_MAC_CONFIG_1__rxen) |
-			(1 << O_MAC_CONFIG_1__txen) |
-			(1 << O_MAC_CONFIG_1__rxfc) |
-			(1 << O_MAC_CONFIG_1__txfc)),
-		       ((1 << O_MAC_CONFIG_1__rxen) |
-			(1 << O_MAC_CONFIG_1__txen) |
-			(1 << O_MAC_CONFIG_1__rxfc) |
-			(1 << O_MAC_CONFIG_1__txfc)));
-
-	/* Setup tx control reg */
-	xlr_reg_update(priv->base_addr, R_TX_CONTROL,
-		       ((1 << O_TX_CONTROL__TXENABLE) |
-		       (512 << O_TX_CONTROL__TXTHRESHOLD)), 0x3fff);
-
-	/* Setup rx control reg */
-	xlr_reg_update(priv->base_addr, R_RX_CONTROL,
-		       1 << O_RX_CONTROL__RXENABLE,
-		       1 << O_RX_CONTROL__RXENABLE);
-}
-
-static void xlr_port_disable(struct xlr_net_priv *priv)
-{
-	/* Setup MAC_CONFIG reg */
-	/* Rx Tx disable*/
-	xlr_reg_update(priv->base_addr, R_MAC_CONFIG_1,
-		       ((1 << O_MAC_CONFIG_1__rxen) |
-			(1 << O_MAC_CONFIG_1__txen) |
-			(1 << O_MAC_CONFIG_1__rxfc) |
-			(1 << O_MAC_CONFIG_1__txfc)), 0x0);
-
-	/* Setup tx control reg */
-	xlr_reg_update(priv->base_addr, R_TX_CONTROL,
-		       ((1 << O_TX_CONTROL__TXENABLE) |
-		       (512 << O_TX_CONTROL__TXTHRESHOLD)), 0);
-
-	/* Setup rx control reg */
-	xlr_reg_update(priv->base_addr, R_RX_CONTROL,
-		       1 << O_RX_CONTROL__RXENABLE, 0);
-}
-
-/*
- * Initialization of gmac
- */
-static int xlr_gmac_init(struct xlr_net_priv *priv,
-			 struct platform_device *pdev)
-{
-	int ret;
-
-	pr_info("Initializing the gmac%d\n", priv->port_id);
-
-	xlr_port_disable(priv);
-
-	xlr_nae_wreg(priv->base_addr, R_DESC_PACK_CTRL,
-		     (1 << O_DESC_PACK_CTRL__MAXENTRY) |
-		     (BYTE_OFFSET << O_DESC_PACK_CTRL__BYTEOFFSET) |
-		     (1600 << O_DESC_PACK_CTRL__REGULARSIZE));
-
-	ret = xlr_setup_mdio(priv, pdev);
-	if (ret)
-		return ret;
-	xlr_port_enable(priv);
-
-	/* Enable Full-duplex/1000Mbps/CRC */
-	xlr_nae_wreg(priv->base_addr, R_MAC_CONFIG_2, 0x7217);
-	/* speed 2.5Mhz */
-	xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x02);
-	/* Setup Interrupt mask reg */
-	xlr_nae_wreg(priv->base_addr, R_INTMASK, (1 << O_INTMASK__TXILLEGAL) |
-		     (1 << O_INTMASK__MDINT) | (1 << O_INTMASK__TXFETCHERROR) |
-		     (1 << O_INTMASK__P2PSPILLECC) | (1 << O_INTMASK__TAGFULL) |
-		     (1 << O_INTMASK__UNDERRUN) | (1 << O_INTMASK__ABORT));
-
-	/* Clear all stats */
-	xlr_reg_update(priv->base_addr, R_STATCTRL, 0, 1 << O_STATCTRL__CLRCNT);
-	xlr_reg_update(priv->base_addr, R_STATCTRL, 1 << 2, 1 << 2);
-	return 0;
-}
-
-static int xlr_net_probe(struct platform_device *pdev)
-{
-	struct xlr_net_priv *priv = NULL;
-	struct net_device *ndev;
-	struct resource *res;
-	struct xlr_adapter *adapter;
-	int err, port;
-
-	pr_info("XLR/XLS Ethernet Driver controller %d\n", pdev->id);
-	/*
-	 * Allocate our adapter data structure and attach it to the device.
-	 */
-	adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
-	if (!adapter)
-		return -ENOMEM;
-
-	/*
-	 * XLR and XLS have 1 and 2 NAE controller respectively
-	 * Each controller has 4 gmac ports, mapping each controller
-	 * under one parent device, 4 gmac ports under one device.
-	 */
-	for (port = 0; port < pdev->num_resources / 2; port++) {
-		ndev = alloc_etherdev_mq(sizeof(struct xlr_net_priv), 32);
-		if (!ndev) {
-			dev_err(&pdev->dev,
-				"Allocation of Ethernet device failed\n");
-			return -ENOMEM;
-		}
-
-		priv = netdev_priv(ndev);
-		priv->pdev = pdev;
-		priv->ndev = ndev;
-		priv->port_id = (pdev->id * 4) + port;
-		priv->nd = (struct xlr_net_data *)pdev->dev.platform_data;
-		priv->base_addr = devm_platform_ioremap_resource(pdev, port);
-		if (IS_ERR(priv->base_addr)) {
-			err = PTR_ERR(priv->base_addr);
-			goto err_gmac;
-		}
-		priv->adapter = adapter;
-		adapter->netdev[port] = ndev;
-
-		res = platform_get_resource(pdev, IORESOURCE_IRQ, port);
-		if (!res) {
-			dev_err(&pdev->dev, "No irq resource for MAC %d\n",
-				priv->port_id);
-			err = -ENODEV;
-			goto err_gmac;
-		}
-
-		ndev->irq = res->start;
-
-		priv->phy_addr = priv->nd->phy_addr[port];
-		priv->tx_stnid = priv->nd->tx_stnid[port];
-		priv->mii_addr = priv->nd->mii_addr;
-		priv->serdes_addr = priv->nd->serdes_addr;
-		priv->pcs_addr = priv->nd->pcs_addr;
-		priv->gpio_addr = priv->nd->gpio_addr;
-
-		ndev->netdev_ops = &xlr_netdev_ops;
-		ndev->watchdog_timeo = HZ;
-
-		/* Setup Mac address and Rx mode */
-		eth_hw_addr_random(ndev);
-		xlr_hw_set_mac_addr(ndev);
-		xlr_set_rx_mode(ndev);
-
-		priv->num_rx_desc += MAX_NUM_DESC_SPILL;
-		ndev->ethtool_ops = &xlr_ethtool_ops;
-		SET_NETDEV_DEV(ndev, &pdev->dev);
-
-		xlr_config_fifo_spill_area(priv);
-		/* Configure PDE to Round-Robin pkt distribution */
-		xlr_config_pde(priv);
-		xlr_config_parser(priv);
-
-		/* Call init with respect to port */
-		if (strcmp(res->name, "gmac") == 0) {
-			err = xlr_gmac_init(priv, pdev);
-			if (err) {
-				dev_err(&pdev->dev, "gmac%d init failed\n",
-					priv->port_id);
-				goto err_gmac;
-			}
-		}
-
-		if (priv->port_id == 0 || priv->port_id == 4) {
-			err = xlr_config_common(priv);
-			if (err)
-				goto err_netdev;
-		}
-
-		err = register_netdev(ndev);
-		if (err) {
-			dev_err(&pdev->dev,
-				"Registering netdev failed for gmac%d\n",
-				priv->port_id);
-			goto err_netdev;
-		}
-		platform_set_drvdata(pdev, priv);
-	}
-
-	return 0;
-
-err_netdev:
-	mdiobus_free(priv->mii_bus);
-err_gmac:
-	free_netdev(ndev);
-	return err;
-}
-
-static int xlr_net_remove(struct platform_device *pdev)
-{
-	struct xlr_net_priv *priv = platform_get_drvdata(pdev);
-
-	unregister_netdev(priv->ndev);
-	mdiobus_unregister(priv->mii_bus);
-	mdiobus_free(priv->mii_bus);
-	free_netdev(priv->ndev);
-	return 0;
-}
-
-static struct platform_driver xlr_net_driver = {
-	.probe		= xlr_net_probe,
-	.remove		= xlr_net_remove,
-	.driver		= {
-		.name	= "xlr-net",
-	},
-};
-
-module_platform_driver(xlr_net_driver);
-
-MODULE_AUTHOR("Ganesan Ramalingam <ganesanr@broadcom.com>");
-MODULE_DESCRIPTION("Ethernet driver for Netlogic XLR/XLS");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_ALIAS("platform:xlr-net");
diff --git a/drivers/staging/netlogic/xlr_net.h b/drivers/staging/netlogic/xlr_net.h
deleted file mode 100644
index 8365b74..0000000
--- a/drivers/staging/netlogic/xlr_net.h
+++ /dev/null
@@ -1,1079 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-/* #define MAC_SPLIT_MODE */
-
-#define MAC_SPACING                 0x400
-#define XGMAC_SPACING               0x400
-
-/* PE-MCXMAC register and bit field definitions */
-#define R_MAC_CONFIG_1                                              0x00
-#define   O_MAC_CONFIG_1__srst                                      31
-#define   O_MAC_CONFIG_1__simr                                      30
-#define   O_MAC_CONFIG_1__hrrmc                                     18
-#define   W_MAC_CONFIG_1__hrtmc                                      2
-#define   O_MAC_CONFIG_1__hrrfn                                     16
-#define   W_MAC_CONFIG_1__hrtfn                                      2
-#define   O_MAC_CONFIG_1__intlb                                      8
-#define   O_MAC_CONFIG_1__rxfc                                       5
-#define   O_MAC_CONFIG_1__txfc                                       4
-#define   O_MAC_CONFIG_1__srxen                                      3
-#define   O_MAC_CONFIG_1__rxen                                       2
-#define   O_MAC_CONFIG_1__stxen                                      1
-#define   O_MAC_CONFIG_1__txen                                       0
-#define R_MAC_CONFIG_2                                              0x01
-#define   O_MAC_CONFIG_2__prlen                                     12
-#define   W_MAC_CONFIG_2__prlen                                      4
-#define   O_MAC_CONFIG_2__speed                                      8
-#define   W_MAC_CONFIG_2__speed                                      2
-#define   O_MAC_CONFIG_2__hugen                                      5
-#define   O_MAC_CONFIG_2__flchk                                      4
-#define   O_MAC_CONFIG_2__crce                                       1
-#define   O_MAC_CONFIG_2__fulld                                      0
-#define R_IPG_IFG                                                   0x02
-#define   O_IPG_IFG__ipgr1                                          24
-#define   W_IPG_IFG__ipgr1                                           7
-#define   O_IPG_IFG__ipgr2                                          16
-#define   W_IPG_IFG__ipgr2                                           7
-#define   O_IPG_IFG__mifg                                            8
-#define   W_IPG_IFG__mifg                                            8
-#define   O_IPG_IFG__ipgt                                            0
-#define   W_IPG_IFG__ipgt                                            7
-#define R_HALF_DUPLEX                                               0x03
-#define   O_HALF_DUPLEX__abebt                                      24
-#define   W_HALF_DUPLEX__abebt                                       4
-#define   O_HALF_DUPLEX__abebe                                      19
-#define   O_HALF_DUPLEX__bpnb                                       18
-#define   O_HALF_DUPLEX__nobo                                       17
-#define   O_HALF_DUPLEX__edxsdfr                                    16
-#define   O_HALF_DUPLEX__retry                                      12
-#define   W_HALF_DUPLEX__retry                                       4
-#define   O_HALF_DUPLEX__lcol                                        0
-#define   W_HALF_DUPLEX__lcol                                       10
-#define R_MAXIMUM_FRAME_LENGTH                                      0x04
-#define   O_MAXIMUM_FRAME_LENGTH__maxf                               0
-#define   W_MAXIMUM_FRAME_LENGTH__maxf                              16
-#define R_TEST                                                      0x07
-#define   O_TEST__mbof                                               3
-#define   O_TEST__rthdf                                              2
-#define   O_TEST__tpause                                             1
-#define   O_TEST__sstct                                              0
-#define R_MII_MGMT_CONFIG                                           0x08
-#define   O_MII_MGMT_CONFIG__scinc                                   5
-#define   O_MII_MGMT_CONFIG__spre                                    4
-#define   O_MII_MGMT_CONFIG__clks                                    3
-#define   W_MII_MGMT_CONFIG__clks                                    3
-#define R_MII_MGMT_COMMAND                                          0x09
-#define   O_MII_MGMT_COMMAND__scan                                   1
-#define   O_MII_MGMT_COMMAND__rstat                                  0
-#define R_MII_MGMT_ADDRESS                                          0x0A
-#define   O_MII_MGMT_ADDRESS__fiad                                   8
-#define   W_MII_MGMT_ADDRESS__fiad                                   5
-#define   O_MII_MGMT_ADDRESS__fgad                                   5
-#define   W_MII_MGMT_ADDRESS__fgad                                   0
-#define R_MII_MGMT_WRITE_DATA                                       0x0B
-#define   O_MII_MGMT_WRITE_DATA__ctld                                0
-#define   W_MII_MGMT_WRITE_DATA__ctld                               16
-#define R_MII_MGMT_STATUS                                           0x0C
-#define R_MII_MGMT_INDICATORS                                       0x0D
-#define   O_MII_MGMT_INDICATORS__nvalid                              2
-#define   O_MII_MGMT_INDICATORS__scan                                1
-#define   O_MII_MGMT_INDICATORS__busy                                0
-#define R_INTERFACE_CONTROL                                         0x0E
-#define   O_INTERFACE_CONTROL__hrstint                              31
-#define   O_INTERFACE_CONTROL__tbimode                              27
-#define   O_INTERFACE_CONTROL__ghdmode                              26
-#define   O_INTERFACE_CONTROL__lhdmode                              25
-#define   O_INTERFACE_CONTROL__phymod                               24
-#define   O_INTERFACE_CONTROL__hrrmi                                23
-#define   O_INTERFACE_CONTROL__rspd                                 16
-#define   O_INTERFACE_CONTROL__hr100                                15
-#define   O_INTERFACE_CONTROL__frcq                                 10
-#define   O_INTERFACE_CONTROL__nocfr                                 9
-#define   O_INTERFACE_CONTROL__dlfct                                 8
-#define   O_INTERFACE_CONTROL__enjab                                 0
-#define R_INTERFACE_STATUS                                         0x0F
-#define   O_INTERFACE_STATUS__xsdfr                                  9
-#define   O_INTERFACE_STATUS__ssrr                                   8
-#define   W_INTERFACE_STATUS__ssrr                                   5
-#define   O_INTERFACE_STATUS__miilf                                  3
-#define   O_INTERFACE_STATUS__locar                                  2
-#define   O_INTERFACE_STATUS__sqerr                                  1
-#define   O_INTERFACE_STATUS__jabber                                 0
-#define R_STATION_ADDRESS_LS                                       0x10
-#define R_STATION_ADDRESS_MS                                       0x11
-
-/* A-XGMAC register and bit field definitions */
-#define R_XGMAC_CONFIG_0    0x00
-#define   O_XGMAC_CONFIG_0__hstmacrst               31
-#define   O_XGMAC_CONFIG_0__hstrstrctl              23
-#define   O_XGMAC_CONFIG_0__hstrstrfn               22
-#define   O_XGMAC_CONFIG_0__hstrsttctl              18
-#define   O_XGMAC_CONFIG_0__hstrsttfn               17
-#define   O_XGMAC_CONFIG_0__hstrstmiim              16
-#define   O_XGMAC_CONFIG_0__hstloopback             8
-#define R_XGMAC_CONFIG_1    0x01
-#define   O_XGMAC_CONFIG_1__hsttctlen               31
-#define   O_XGMAC_CONFIG_1__hsttfen                 30
-#define   O_XGMAC_CONFIG_1__hstrctlen               29
-#define   O_XGMAC_CONFIG_1__hstrfen                 28
-#define   O_XGMAC_CONFIG_1__tfen                    26
-#define   O_XGMAC_CONFIG_1__rfen                    24
-#define   O_XGMAC_CONFIG_1__hstrctlshrtp            12
-#define   O_XGMAC_CONFIG_1__hstdlyfcstx             10
-#define   W_XGMAC_CONFIG_1__hstdlyfcstx              2
-#define   O_XGMAC_CONFIG_1__hstdlyfcsrx              8
-#define   W_XGMAC_CONFIG_1__hstdlyfcsrx              2
-#define   O_XGMAC_CONFIG_1__hstppen                  7
-#define   O_XGMAC_CONFIG_1__hstbytswp                6
-#define   O_XGMAC_CONFIG_1__hstdrplt64               5
-#define   O_XGMAC_CONFIG_1__hstprmscrx               4
-#define   O_XGMAC_CONFIG_1__hstlenchk                3
-#define   O_XGMAC_CONFIG_1__hstgenfcs                2
-#define   O_XGMAC_CONFIG_1__hstpadmode               0
-#define   W_XGMAC_CONFIG_1__hstpadmode               2
-#define R_XGMAC_CONFIG_2    0x02
-#define   O_XGMAC_CONFIG_2__hsttctlfrcp             31
-#define   O_XGMAC_CONFIG_2__hstmlnkflth             27
-#define   O_XGMAC_CONFIG_2__hstalnkflth             26
-#define   O_XGMAC_CONFIG_2__rflnkflt                24
-#define   W_XGMAC_CONFIG_2__rflnkflt                 2
-#define   O_XGMAC_CONFIG_2__hstipgextmod            16
-#define   W_XGMAC_CONFIG_2__hstipgextmod             5
-#define   O_XGMAC_CONFIG_2__hstrctlfrcp             15
-#define   O_XGMAC_CONFIG_2__hstipgexten              5
-#define   O_XGMAC_CONFIG_2__hstmipgext               0
-#define   W_XGMAC_CONFIG_2__hstmipgext               5
-#define R_XGMAC_CONFIG_3    0x03
-#define   O_XGMAC_CONFIG_3__hstfltrfrm              31
-#define   W_XGMAC_CONFIG_3__hstfltrfrm              16
-#define   O_XGMAC_CONFIG_3__hstfltrfrmdc            15
-#define   W_XGMAC_CONFIG_3__hstfltrfrmdc            16
-#define R_XGMAC_STATION_ADDRESS_LS      0x04
-#define   O_XGMAC_STATION_ADDRESS_LS__hstmacadr0    0
-#define   W_XGMAC_STATION_ADDRESS_LS__hstmacadr0    32
-#define R_XGMAC_STATION_ADDRESS_MS      0x05
-#define R_XGMAC_MAX_FRAME_LEN           0x08
-#define   O_XGMAC_MAX_FRAME_LEN__hstmxfrmwctx       16
-#define   W_XGMAC_MAX_FRAME_LEN__hstmxfrmwctx       14
-#define   O_XGMAC_MAX_FRAME_LEN__hstmxfrmbcrx        0
-#define   W_XGMAC_MAX_FRAME_LEN__hstmxfrmbcrx       16
-#define R_XGMAC_REV_LEVEL               0x0B
-#define   O_XGMAC_REV_LEVEL__revlvl                  0
-#define   W_XGMAC_REV_LEVEL__revlvl                 15
-#define R_XGMAC_MIIM_COMMAND            0x10
-#define   O_XGMAC_MIIM_COMMAND__hstldcmd             3
-#define   O_XGMAC_MIIM_COMMAND__hstmiimcmd           0
-#define   W_XGMAC_MIIM_COMMAND__hstmiimcmd           3
-#define R_XGMAC_MIIM_FILED              0x11
-#define   O_XGMAC_MIIM_FILED__hststfield            30
-#define   W_XGMAC_MIIM_FILED__hststfield             2
-#define   O_XGMAC_MIIM_FILED__hstopfield            28
-#define   W_XGMAC_MIIM_FILED__hstopfield             2
-#define   O_XGMAC_MIIM_FILED__hstphyadx             23
-#define   W_XGMAC_MIIM_FILED__hstphyadx              5
-#define   O_XGMAC_MIIM_FILED__hstregadx             18
-#define   W_XGMAC_MIIM_FILED__hstregadx              5
-#define   O_XGMAC_MIIM_FILED__hsttafield            16
-#define   W_XGMAC_MIIM_FILED__hsttafield             2
-#define   O_XGMAC_MIIM_FILED__miimrddat              0
-#define   W_XGMAC_MIIM_FILED__miimrddat             16
-#define R_XGMAC_MIIM_CONFIG             0x12
-#define   O_XGMAC_MIIM_CONFIG__hstnopram             7
-#define   O_XGMAC_MIIM_CONFIG__hstclkdiv             0
-#define   W_XGMAC_MIIM_CONFIG__hstclkdiv             7
-#define R_XGMAC_MIIM_LINK_FAIL_VECTOR   0x13
-#define   O_XGMAC_MIIM_LINK_FAIL_VECTOR__miimlfvec   0
-#define   W_XGMAC_MIIM_LINK_FAIL_VECTOR__miimlfvec  32
-#define R_XGMAC_MIIM_INDICATOR          0x14
-#define   O_XGMAC_MIIM_INDICATOR__miimphylf          4
-#define   O_XGMAC_MIIM_INDICATOR__miimmoncplt        3
-#define   O_XGMAC_MIIM_INDICATOR__miimmonvld         2
-#define   O_XGMAC_MIIM_INDICATOR__miimmon            1
-#define   O_XGMAC_MIIM_INDICATOR__miimbusy           0
-
-/* GMAC stats registers */
-#define R_RBYT							    0x27
-#define R_RPKT							    0x28
-#define R_RFCS							    0x29
-#define R_RMCA							    0x2A
-#define R_RBCA							    0x2B
-#define R_RXCF							    0x2C
-#define R_RXPF							    0x2D
-#define R_RXUO							    0x2E
-#define R_RALN							    0x2F
-#define R_RFLR							    0x30
-#define R_RCDE							    0x31
-#define R_RCSE							    0x32
-#define R_RUND							    0x33
-#define R_ROVR							    0x34
-#define R_TBYT							    0x38
-#define R_TPKT							    0x39
-#define R_TMCA							    0x3A
-#define R_TBCA							    0x3B
-#define R_TXPF							    0x3C
-#define R_TDFR							    0x3D
-#define R_TEDF							    0x3E
-#define R_TSCL							    0x3F
-#define R_TMCL							    0x40
-#define R_TLCL							    0x41
-#define R_TXCL							    0x42
-#define R_TNCL							    0x43
-#define R_TJBR							    0x46
-#define R_TFCS							    0x47
-#define R_TXCF							    0x48
-#define R_TOVR							    0x49
-#define R_TUND							    0x4A
-#define R_TFRG							    0x4B
-
-/* Glue logic register and bit field definitions */
-#define R_MAC_ADDR0                                                 0x50
-#define R_MAC_ADDR1                                                 0x52
-#define R_MAC_ADDR2                                                 0x54
-#define R_MAC_ADDR3                                                 0x56
-#define R_MAC_ADDR_MASK2                                            0x58
-#define R_MAC_ADDR_MASK3                                            0x5A
-#define R_MAC_FILTER_CONFIG                                         0x5C
-#define   O_MAC_FILTER_CONFIG__BROADCAST_EN                         10
-#define   O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN                       9
-#define   O_MAC_FILTER_CONFIG__ALL_MCAST_EN                         8
-#define   O_MAC_FILTER_CONFIG__ALL_UCAST_EN                         7
-#define   O_MAC_FILTER_CONFIG__HASH_MCAST_EN                        6
-#define   O_MAC_FILTER_CONFIG__HASH_UCAST_EN                        5
-#define   O_MAC_FILTER_CONFIG__ADDR_MATCH_DISC                      4
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR3_VALID                      3
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR2_VALID                      2
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR1_VALID                      1
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR0_VALID                      0
-#define R_HASH_TABLE_VECTOR                                         0x30
-#define R_TX_CONTROL                                                 0x0A0
-#define   O_TX_CONTROL__TX15HALT                                     31
-#define   O_TX_CONTROL__TX14HALT                                     30
-#define   O_TX_CONTROL__TX13HALT                                     29
-#define   O_TX_CONTROL__TX12HALT                                     28
-#define   O_TX_CONTROL__TX11HALT                                     27
-#define   O_TX_CONTROL__TX10HALT                                     26
-#define   O_TX_CONTROL__TX9HALT                                      25
-#define   O_TX_CONTROL__TX8HALT                                      24
-#define   O_TX_CONTROL__TX7HALT                                      23
-#define   O_TX_CONTROL__TX6HALT                                      22
-#define   O_TX_CONTROL__TX5HALT                                      21
-#define   O_TX_CONTROL__TX4HALT                                      20
-#define   O_TX_CONTROL__TX3HALT                                      19
-#define   O_TX_CONTROL__TX2HALT                                      18
-#define   O_TX_CONTROL__TX1HALT                                      17
-#define   O_TX_CONTROL__TX0HALT                                      16
-#define   O_TX_CONTROL__TXIDLE                                       15
-#define   O_TX_CONTROL__TXENABLE                                     14
-#define   O_TX_CONTROL__TXTHRESHOLD                                  0
-#define   W_TX_CONTROL__TXTHRESHOLD                                  14
-#define R_RX_CONTROL                                                 0x0A1
-#define   O_RX_CONTROL__RGMII                                        10
-#define   O_RX_CONTROL__SOFTRESET			             2
-#define   O_RX_CONTROL__RXHALT                                       1
-#define   O_RX_CONTROL__RXENABLE                                     0
-#define R_DESC_PACK_CTRL                                            0x0A2
-#define   O_DESC_PACK_CTRL__BYTEOFFSET                              17
-#define   W_DESC_PACK_CTRL__BYTEOFFSET                              3
-#define   O_DESC_PACK_CTRL__PREPADENABLE                            16
-#define   O_DESC_PACK_CTRL__MAXENTRY                                14
-#define   W_DESC_PACK_CTRL__MAXENTRY                                2
-#define   O_DESC_PACK_CTRL__REGULARSIZE                             0
-#define   W_DESC_PACK_CTRL__REGULARSIZE                             14
-#define R_STATCTRL                                                  0x0A3
-#define   O_STATCTRL__OVERFLOWEN                                    4
-#define   O_STATCTRL__GIG                                           3
-#define   O_STATCTRL__STEN                                          2
-#define   O_STATCTRL__CLRCNT                                        1
-#define   O_STATCTRL__AUTOZ                                         0
-#define R_L2ALLOCCTRL                                               0x0A4
-#define   O_L2ALLOCCTRL__TXL2ALLOCATE                               9
-#define   W_L2ALLOCCTRL__TXL2ALLOCATE                               9
-#define   O_L2ALLOCCTRL__RXL2ALLOCATE                               0
-#define   W_L2ALLOCCTRL__RXL2ALLOCATE                               9
-#define R_INTMASK                                                   0x0A5
-#define   O_INTMASK__SPI4TXERROR                                     28
-#define   O_INTMASK__SPI4RXERROR                                     27
-#define   O_INTMASK__RGMIIHALFDUPCOLLISION                           27
-#define   O_INTMASK__ABORT                                           26
-#define   O_INTMASK__UNDERRUN                                        25
-#define   O_INTMASK__DISCARDPACKET                                   24
-#define   O_INTMASK__ASYNCFIFOFULL                                   23
-#define   O_INTMASK__TAGFULL                                         22
-#define   O_INTMASK__CLASS3FULL                                      21
-#define   O_INTMASK__C3EARLYFULL                                     20
-#define   O_INTMASK__CLASS2FULL                                      19
-#define   O_INTMASK__C2EARLYFULL                                     18
-#define   O_INTMASK__CLASS1FULL                                      17
-#define   O_INTMASK__C1EARLYFULL                                     16
-#define   O_INTMASK__CLASS0FULL                                      15
-#define   O_INTMASK__C0EARLYFULL                                     14
-#define   O_INTMASK__RXDATAFULL                                      13
-#define   O_INTMASK__RXEARLYFULL                                     12
-#define   O_INTMASK__RFREEEMPTY                                      9
-#define   O_INTMASK__RFEARLYEMPTY                                    8
-#define   O_INTMASK__P2PSPILLECC                                     7
-#define   O_INTMASK__FREEDESCFULL                                    5
-#define   O_INTMASK__FREEEARLYFULL                                   4
-#define   O_INTMASK__TXFETCHERROR                                    3
-#define   O_INTMASK__STATCARRY                                       2
-#define   O_INTMASK__MDINT                                           1
-#define   O_INTMASK__TXILLEGAL                                       0
-#define R_INTREG                                                    0x0A6
-#define   O_INTREG__SPI4TXERROR                                     28
-#define   O_INTREG__SPI4RXERROR                                     27
-#define   O_INTREG__RGMIIHALFDUPCOLLISION                           27
-#define   O_INTREG__ABORT                                           26
-#define   O_INTREG__UNDERRUN                                        25
-#define   O_INTREG__DISCARDPACKET                                   24
-#define   O_INTREG__ASYNCFIFOFULL                                   23
-#define   O_INTREG__TAGFULL                                         22
-#define   O_INTREG__CLASS3FULL                                      21
-#define   O_INTREG__C3EARLYFULL                                     20
-#define   O_INTREG__CLASS2FULL                                      19
-#define   O_INTREG__C2EARLYFULL                                     18
-#define   O_INTREG__CLASS1FULL                                      17
-#define   O_INTREG__C1EARLYFULL                                     16
-#define   O_INTREG__CLASS0FULL                                      15
-#define   O_INTREG__C0EARLYFULL                                     14
-#define   O_INTREG__RXDATAFULL                                      13
-#define   O_INTREG__RXEARLYFULL                                     12
-#define   O_INTREG__RFREEEMPTY                                      9
-#define   O_INTREG__RFEARLYEMPTY                                    8
-#define   O_INTREG__P2PSPILLECC                                     7
-#define   O_INTREG__FREEDESCFULL                                    5
-#define   O_INTREG__FREEEARLYFULL                                   4
-#define   O_INTREG__TXFETCHERROR                                    3
-#define   O_INTREG__STATCARRY                                       2
-#define   O_INTREG__MDINT                                           1
-#define   O_INTREG__TXILLEGAL                                       0
-#define R_TXRETRY                                                   0x0A7
-#define   O_TXRETRY__COLLISIONRETRY                                 6
-#define   O_TXRETRY__BUSERRORRETRY                                  5
-#define   O_TXRETRY__UNDERRUNRETRY                                  4
-#define   O_TXRETRY__RETRIES                                        0
-#define   W_TXRETRY__RETRIES                                        4
-#define R_CORECONTROL                                               0x0A8
-#define   O_CORECONTROL__ERRORTHREAD                                4
-#define   W_CORECONTROL__ERRORTHREAD                                7
-#define   O_CORECONTROL__SHUTDOWN                                   2
-#define   O_CORECONTROL__SPEED                                      0
-#define   W_CORECONTROL__SPEED                                      2
-#define R_BYTEOFFSET0                                               0x0A9
-#define R_BYTEOFFSET1                                               0x0AA
-#define R_L2TYPE_0                                                  0x0F0
-#define   O_L2TYPE__EXTRAHDRPROTOSIZE                               26
-#define   W_L2TYPE__EXTRAHDRPROTOSIZE                               5
-#define   O_L2TYPE__EXTRAHDRPROTOOFFSET                             20
-#define   W_L2TYPE__EXTRAHDRPROTOOFFSET                             6
-#define   O_L2TYPE__EXTRAHEADERSIZE                                 14
-#define   W_L2TYPE__EXTRAHEADERSIZE                                 6
-#define   O_L2TYPE__PROTOOFFSET                                     8
-#define   W_L2TYPE__PROTOOFFSET                                     6
-#define   O_L2TYPE__L2HDROFFSET                                     2
-#define   W_L2TYPE__L2HDROFFSET                                     6
-#define   O_L2TYPE__L2PROTO                                         0
-#define   W_L2TYPE__L2PROTO                                         2
-#define R_L2TYPE_1                                                  0xF0
-#define R_L2TYPE_2                                                  0xF0
-#define R_L2TYPE_3                                                  0xF0
-#define R_PARSERCONFIGREG                                           0x100
-#define   O_PARSERCONFIGREG__CRCHASHPOLY                            8
-#define   W_PARSERCONFIGREG__CRCHASHPOLY                            7
-#define   O_PARSERCONFIGREG__PREPADOFFSET                           4
-#define   W_PARSERCONFIGREG__PREPADOFFSET                           4
-#define   O_PARSERCONFIGREG__USECAM                                 2
-#define   O_PARSERCONFIGREG__USEHASH                                1
-#define   O_PARSERCONFIGREG__USEPROTO                               0
-#define R_L3CTABLE                                                  0x140
-#define   O_L3CTABLE__OFFSET0                                       25
-#define   W_L3CTABLE__OFFSET0                                       7
-#define   O_L3CTABLE__LEN0                                          21
-#define   W_L3CTABLE__LEN0                                          4
-#define   O_L3CTABLE__OFFSET1                                       14
-#define   W_L3CTABLE__OFFSET1                                       7
-#define   O_L3CTABLE__LEN1                                          10
-#define   W_L3CTABLE__LEN1                                          4
-#define   O_L3CTABLE__OFFSET2                                       4
-#define   W_L3CTABLE__OFFSET2                                       6
-#define   O_L3CTABLE__LEN2                                          0
-#define   W_L3CTABLE__LEN2                                          4
-#define   O_L3CTABLE__L3HDROFFSET                                   26
-#define   W_L3CTABLE__L3HDROFFSET                                   6
-#define   O_L3CTABLE__L4PROTOOFFSET                                 20
-#define   W_L3CTABLE__L4PROTOOFFSET                                 6
-#define   O_L3CTABLE__IPCHKSUMCOMPUTE                               19
-#define   O_L3CTABLE__L4CLASSIFY                                    18
-#define   O_L3CTABLE__L2PROTO                                       16
-#define   W_L3CTABLE__L2PROTO                                       2
-#define   O_L3CTABLE__L3PROTOKEY                                    0
-#define   W_L3CTABLE__L3PROTOKEY                                    16
-#define R_L4CTABLE                                                  0x160
-#define   O_L4CTABLE__OFFSET0                                       21
-#define   W_L4CTABLE__OFFSET0                                       6
-#define   O_L4CTABLE__LEN0                                          17
-#define   W_L4CTABLE__LEN0                                          4
-#define   O_L4CTABLE__OFFSET1                                       11
-#define   W_L4CTABLE__OFFSET1                                       6
-#define   O_L4CTABLE__LEN1                                          7
-#define   W_L4CTABLE__LEN1                                          4
-#define   O_L4CTABLE__TCPCHKSUMENABLE                               0
-#define R_CAM4X128TABLE                                             0x172
-#define   O_CAM4X128TABLE__CLASSID                                  7
-#define   W_CAM4X128TABLE__CLASSID                                  2
-#define   O_CAM4X128TABLE__BUCKETID                                 1
-#define   W_CAM4X128TABLE__BUCKETID                                 6
-#define   O_CAM4X128TABLE__USEBUCKET                                0
-#define R_CAM4X128KEY                                               0x180
-#define R_TRANSLATETABLE                                            0x1A0
-#define R_DMACR0                                                    0x200
-#define   O_DMACR0__DATA0WRMAXCR                                    27
-#define   W_DMACR0__DATA0WRMAXCR                                    3
-#define   O_DMACR0__DATA0RDMAXCR                                    24
-#define   W_DMACR0__DATA0RDMAXCR                                    3
-#define   O_DMACR0__DATA1WRMAXCR                                    21
-#define   W_DMACR0__DATA1WRMAXCR                                    3
-#define   O_DMACR0__DATA1RDMAXCR                                    18
-#define   W_DMACR0__DATA1RDMAXCR                                    3
-#define   O_DMACR0__DATA2WRMAXCR                                    15
-#define   W_DMACR0__DATA2WRMAXCR                                    3
-#define   O_DMACR0__DATA2RDMAXCR                                    12
-#define   W_DMACR0__DATA2RDMAXCR                                    3
-#define   O_DMACR0__DATA3WRMAXCR                                    9
-#define   W_DMACR0__DATA3WRMAXCR                                    3
-#define   O_DMACR0__DATA3RDMAXCR                                    6
-#define   W_DMACR0__DATA3RDMAXCR                                    3
-#define   O_DMACR0__DATA4WRMAXCR                                    3
-#define   W_DMACR0__DATA4WRMAXCR                                    3
-#define   O_DMACR0__DATA4RDMAXCR                                    0
-#define   W_DMACR0__DATA4RDMAXCR                                    3
-#define R_DMACR1                                                    0x201
-#define   O_DMACR1__DATA5WRMAXCR                                    27
-#define   W_DMACR1__DATA5WRMAXCR                                    3
-#define   O_DMACR1__DATA5RDMAXCR                                    24
-#define   W_DMACR1__DATA5RDMAXCR                                    3
-#define   O_DMACR1__DATA6WRMAXCR                                    21
-#define   W_DMACR1__DATA6WRMAXCR                                    3
-#define   O_DMACR1__DATA6RDMAXCR                                    18
-#define   W_DMACR1__DATA6RDMAXCR                                    3
-#define   O_DMACR1__DATA7WRMAXCR                                    15
-#define   W_DMACR1__DATA7WRMAXCR                                    3
-#define   O_DMACR1__DATA7RDMAXCR                                    12
-#define   W_DMACR1__DATA7RDMAXCR                                    3
-#define   O_DMACR1__DATA8WRMAXCR                                    9
-#define   W_DMACR1__DATA8WRMAXCR                                    3
-#define   O_DMACR1__DATA8RDMAXCR                                    6
-#define   W_DMACR1__DATA8RDMAXCR                                    3
-#define   O_DMACR1__DATA9WRMAXCR                                    3
-#define   W_DMACR1__DATA9WRMAXCR                                    3
-#define   O_DMACR1__DATA9RDMAXCR                                    0
-#define   W_DMACR1__DATA9RDMAXCR                                    3
-#define R_DMACR2                                                    0x202
-#define   O_DMACR2__DATA10WRMAXCR                                   27
-#define   W_DMACR2__DATA10WRMAXCR                                   3
-#define   O_DMACR2__DATA10RDMAXCR                                   24
-#define   W_DMACR2__DATA10RDMAXCR                                   3
-#define   O_DMACR2__DATA11WRMAXCR                                   21
-#define   W_DMACR2__DATA11WRMAXCR                                   3
-#define   O_DMACR2__DATA11RDMAXCR                                   18
-#define   W_DMACR2__DATA11RDMAXCR                                   3
-#define   O_DMACR2__DATA12WRMAXCR                                   15
-#define   W_DMACR2__DATA12WRMAXCR                                   3
-#define   O_DMACR2__DATA12RDMAXCR                                   12
-#define   W_DMACR2__DATA12RDMAXCR                                   3
-#define   O_DMACR2__DATA13WRMAXCR                                   9
-#define   W_DMACR2__DATA13WRMAXCR                                   3
-#define   O_DMACR2__DATA13RDMAXCR                                   6
-#define   W_DMACR2__DATA13RDMAXCR                                   3
-#define   O_DMACR2__DATA14WRMAXCR                                   3
-#define   W_DMACR2__DATA14WRMAXCR                                   3
-#define   O_DMACR2__DATA14RDMAXCR                                   0
-#define   W_DMACR2__DATA14RDMAXCR                                   3
-#define R_DMACR3                                                    0x203
-#define   O_DMACR3__DATA15WRMAXCR                                   27
-#define   W_DMACR3__DATA15WRMAXCR                                   3
-#define   O_DMACR3__DATA15RDMAXCR                                   24
-#define   W_DMACR3__DATA15RDMAXCR                                   3
-#define   O_DMACR3__SPCLASSWRMAXCR                                  21
-#define   W_DMACR3__SPCLASSWRMAXCR                                  3
-#define   O_DMACR3__SPCLASSRDMAXCR                                  18
-#define   W_DMACR3__SPCLASSRDMAXCR                                  3
-#define   O_DMACR3__JUMFRINWRMAXCR                                  15
-#define   W_DMACR3__JUMFRINWRMAXCR                                  3
-#define   O_DMACR3__JUMFRINRDMAXCR                                  12
-#define   W_DMACR3__JUMFRINRDMAXCR                                  3
-#define   O_DMACR3__REGFRINWRMAXCR                                  9
-#define   W_DMACR3__REGFRINWRMAXCR                                  3
-#define   O_DMACR3__REGFRINRDMAXCR                                  6
-#define   W_DMACR3__REGFRINRDMAXCR                                  3
-#define   O_DMACR3__FROUTWRMAXCR                                    3
-#define   W_DMACR3__FROUTWRMAXCR                                    3
-#define   O_DMACR3__FROUTRDMAXCR                                    0
-#define   W_DMACR3__FROUTRDMAXCR                                    3
-#define R_REG_FRIN_SPILL_MEM_START_0                                0x204
-#define   O_REG_FRIN_SPILL_MEM_START_0__REGFRINSPILLMEMSTART0        0
-#define   W_REG_FRIN_SPILL_MEM_START_0__REGFRINSPILLMEMSTART0       32
-#define R_REG_FRIN_SPILL_MEM_START_1                                0x205
-#define   O_REG_FRIN_SPILL_MEM_START_1__REGFRINSPILLMEMSTART1        0
-#define   W_REG_FRIN_SPILL_MEM_START_1__REGFRINSPILLMEMSTART1        3
-#define R_REG_FRIN_SPILL_MEM_SIZE                                   0x206
-#define   O_REG_FRIN_SPILL_MEM_SIZE__REGFRINSPILLMEMSIZE             0
-#define   W_REG_FRIN_SPILL_MEM_SIZE__REGFRINSPILLMEMSIZE            32
-#define R_FROUT_SPILL_MEM_START_0                                   0x207
-#define   O_FROUT_SPILL_MEM_START_0__FROUTSPILLMEMSTART0             0
-#define   W_FROUT_SPILL_MEM_START_0__FROUTSPILLMEMSTART0            32
-#define R_FROUT_SPILL_MEM_START_1                                   0x208
-#define   O_FROUT_SPILL_MEM_START_1__FROUTSPILLMEMSTART1             0
-#define   W_FROUT_SPILL_MEM_START_1__FROUTSPILLMEMSTART1             3
-#define R_FROUT_SPILL_MEM_SIZE                                      0x209
-#define   O_FROUT_SPILL_MEM_SIZE__FROUTSPILLMEMSIZE                  0
-#define   W_FROUT_SPILL_MEM_SIZE__FROUTSPILLMEMSIZE                 32
-#define R_CLASS0_SPILL_MEM_START_0                                  0x20A
-#define   O_CLASS0_SPILL_MEM_START_0__CLASS0SPILLMEMSTART0           0
-#define   W_CLASS0_SPILL_MEM_START_0__CLASS0SPILLMEMSTART0          32
-#define R_CLASS0_SPILL_MEM_START_1                                  0x20B
-#define   O_CLASS0_SPILL_MEM_START_1__CLASS0SPILLMEMSTART1           0
-#define   W_CLASS0_SPILL_MEM_START_1__CLASS0SPILLMEMSTART1           3
-#define R_CLASS0_SPILL_MEM_SIZE                                     0x20C
-#define   O_CLASS0_SPILL_MEM_SIZE__CLASS0SPILLMEMSIZE                0
-#define   W_CLASS0_SPILL_MEM_SIZE__CLASS0SPILLMEMSIZE               32
-#define R_JUMFRIN_SPILL_MEM_START_0                                 0x20D
-#define   O_JUMFRIN_SPILL_MEM_START_0__JUMFRINSPILLMEMSTART0          0
-#define   W_JUMFRIN_SPILL_MEM_START_0__JUMFRINSPILLMEMSTART0         32
-#define R_JUMFRIN_SPILL_MEM_START_1                                 0x20E
-#define   O_JUMFRIN_SPILL_MEM_START_1__JUMFRINSPILLMEMSTART1         0
-#define   W_JUMFRIN_SPILL_MEM_START_1__JUMFRINSPILLMEMSTART1         3
-#define R_JUMFRIN_SPILL_MEM_SIZE                                    0x20F
-#define   O_JUMFRIN_SPILL_MEM_SIZE__JUMFRINSPILLMEMSIZE              0
-#define   W_JUMFRIN_SPILL_MEM_SIZE__JUMFRINSPILLMEMSIZE             32
-#define R_CLASS1_SPILL_MEM_START_0                                  0x210
-#define   O_CLASS1_SPILL_MEM_START_0__CLASS1SPILLMEMSTART0           0
-#define   W_CLASS1_SPILL_MEM_START_0__CLASS1SPILLMEMSTART0          32
-#define R_CLASS1_SPILL_MEM_START_1                                  0x211
-#define   O_CLASS1_SPILL_MEM_START_1__CLASS1SPILLMEMSTART1           0
-#define   W_CLASS1_SPILL_MEM_START_1__CLASS1SPILLMEMSTART1           3
-#define R_CLASS1_SPILL_MEM_SIZE                                     0x212
-#define   O_CLASS1_SPILL_MEM_SIZE__CLASS1SPILLMEMSIZE                0
-#define   W_CLASS1_SPILL_MEM_SIZE__CLASS1SPILLMEMSIZE               32
-#define R_CLASS2_SPILL_MEM_START_0                                  0x213
-#define   O_CLASS2_SPILL_MEM_START_0__CLASS2SPILLMEMSTART0           0
-#define   W_CLASS2_SPILL_MEM_START_0__CLASS2SPILLMEMSTART0          32
-#define R_CLASS2_SPILL_MEM_START_1                                  0x214
-#define   O_CLASS2_SPILL_MEM_START_1__CLASS2SPILLMEMSTART1           0
-#define   W_CLASS2_SPILL_MEM_START_1__CLASS2SPILLMEMSTART1           3
-#define R_CLASS2_SPILL_MEM_SIZE                                     0x215
-#define   O_CLASS2_SPILL_MEM_SIZE__CLASS2SPILLMEMSIZE                0
-#define   W_CLASS2_SPILL_MEM_SIZE__CLASS2SPILLMEMSIZE               32
-#define R_CLASS3_SPILL_MEM_START_0                                  0x216
-#define   O_CLASS3_SPILL_MEM_START_0__CLASS3SPILLMEMSTART0           0
-#define   W_CLASS3_SPILL_MEM_START_0__CLASS3SPILLMEMSTART0          32
-#define R_CLASS3_SPILL_MEM_START_1                                  0x217
-#define   O_CLASS3_SPILL_MEM_START_1__CLASS3SPILLMEMSTART1           0
-#define   W_CLASS3_SPILL_MEM_START_1__CLASS3SPILLMEMSTART1           3
-#define R_CLASS3_SPILL_MEM_SIZE                                     0x218
-#define   O_CLASS3_SPILL_MEM_SIZE__CLASS3SPILLMEMSIZE                0
-#define   W_CLASS3_SPILL_MEM_SIZE__CLASS3SPILLMEMSIZE               32
-#define R_REG_FRIN1_SPILL_MEM_START_0                               0x219
-#define R_REG_FRIN1_SPILL_MEM_START_1                               0x21a
-#define R_REG_FRIN1_SPILL_MEM_SIZE                                  0x21b
-#define R_SPIHNGY0                                                  0x219
-#define   O_SPIHNGY0__EG_HNGY_THRESH_0                              24
-#define   W_SPIHNGY0__EG_HNGY_THRESH_0                              7
-#define   O_SPIHNGY0__EG_HNGY_THRESH_1                              16
-#define   W_SPIHNGY0__EG_HNGY_THRESH_1                              7
-#define   O_SPIHNGY0__EG_HNGY_THRESH_2                              8
-#define   W_SPIHNGY0__EG_HNGY_THRESH_2                              7
-#define   O_SPIHNGY0__EG_HNGY_THRESH_3                              0
-#define   W_SPIHNGY0__EG_HNGY_THRESH_3                              7
-#define R_SPIHNGY1                                                  0x21A
-#define   O_SPIHNGY1__EG_HNGY_THRESH_4                              24
-#define   W_SPIHNGY1__EG_HNGY_THRESH_4                              7
-#define   O_SPIHNGY1__EG_HNGY_THRESH_5                              16
-#define   W_SPIHNGY1__EG_HNGY_THRESH_5                              7
-#define   O_SPIHNGY1__EG_HNGY_THRESH_6                              8
-#define   W_SPIHNGY1__EG_HNGY_THRESH_6                              7
-#define   O_SPIHNGY1__EG_HNGY_THRESH_7                              0
-#define   W_SPIHNGY1__EG_HNGY_THRESH_7                              7
-#define R_SPIHNGY2                                                  0x21B
-#define   O_SPIHNGY2__EG_HNGY_THRESH_8                              24
-#define   W_SPIHNGY2__EG_HNGY_THRESH_8                              7
-#define   O_SPIHNGY2__EG_HNGY_THRESH_9                              16
-#define   W_SPIHNGY2__EG_HNGY_THRESH_9                              7
-#define   O_SPIHNGY2__EG_HNGY_THRESH_10                             8
-#define   W_SPIHNGY2__EG_HNGY_THRESH_10                             7
-#define   O_SPIHNGY2__EG_HNGY_THRESH_11                             0
-#define   W_SPIHNGY2__EG_HNGY_THRESH_11                             7
-#define R_SPIHNGY3                                                  0x21C
-#define   O_SPIHNGY3__EG_HNGY_THRESH_12                             24
-#define   W_SPIHNGY3__EG_HNGY_THRESH_12                             7
-#define   O_SPIHNGY3__EG_HNGY_THRESH_13                             16
-#define   W_SPIHNGY3__EG_HNGY_THRESH_13                             7
-#define   O_SPIHNGY3__EG_HNGY_THRESH_14                             8
-#define   W_SPIHNGY3__EG_HNGY_THRESH_14                             7
-#define   O_SPIHNGY3__EG_HNGY_THRESH_15                             0
-#define   W_SPIHNGY3__EG_HNGY_THRESH_15                             7
-#define R_SPISTRV0                                                  0x21D
-#define   O_SPISTRV0__EG_STRV_THRESH_0                              24
-#define   W_SPISTRV0__EG_STRV_THRESH_0                              7
-#define   O_SPISTRV0__EG_STRV_THRESH_1                              16
-#define   W_SPISTRV0__EG_STRV_THRESH_1                              7
-#define   O_SPISTRV0__EG_STRV_THRESH_2                              8
-#define   W_SPISTRV0__EG_STRV_THRESH_2                              7
-#define   O_SPISTRV0__EG_STRV_THRESH_3                              0
-#define   W_SPISTRV0__EG_STRV_THRESH_3                              7
-#define R_SPISTRV1                                                  0x21E
-#define   O_SPISTRV1__EG_STRV_THRESH_4                              24
-#define   W_SPISTRV1__EG_STRV_THRESH_4                              7
-#define   O_SPISTRV1__EG_STRV_THRESH_5                              16
-#define   W_SPISTRV1__EG_STRV_THRESH_5                              7
-#define   O_SPISTRV1__EG_STRV_THRESH_6                              8
-#define   W_SPISTRV1__EG_STRV_THRESH_6                              7
-#define   O_SPISTRV1__EG_STRV_THRESH_7                              0
-#define   W_SPISTRV1__EG_STRV_THRESH_7                              7
-#define R_SPISTRV2                                                  0x21F
-#define   O_SPISTRV2__EG_STRV_THRESH_8                              24
-#define   W_SPISTRV2__EG_STRV_THRESH_8                              7
-#define   O_SPISTRV2__EG_STRV_THRESH_9                              16
-#define   W_SPISTRV2__EG_STRV_THRESH_9                              7
-#define   O_SPISTRV2__EG_STRV_THRESH_10                             8
-#define   W_SPISTRV2__EG_STRV_THRESH_10                             7
-#define   O_SPISTRV2__EG_STRV_THRESH_11                             0
-#define   W_SPISTRV2__EG_STRV_THRESH_11                             7
-#define R_SPISTRV3                                                  0x220
-#define   O_SPISTRV3__EG_STRV_THRESH_12                             24
-#define   W_SPISTRV3__EG_STRV_THRESH_12                             7
-#define   O_SPISTRV3__EG_STRV_THRESH_13                             16
-#define   W_SPISTRV3__EG_STRV_THRESH_13                             7
-#define   O_SPISTRV3__EG_STRV_THRESH_14                             8
-#define   W_SPISTRV3__EG_STRV_THRESH_14                             7
-#define   O_SPISTRV3__EG_STRV_THRESH_15                             0
-#define   W_SPISTRV3__EG_STRV_THRESH_15                             7
-#define R_TXDATAFIFO0                                               0x221
-#define   O_TXDATAFIFO0__TX0DATAFIFOSTART                           24
-#define   W_TXDATAFIFO0__TX0DATAFIFOSTART                           7
-#define   O_TXDATAFIFO0__TX0DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO0__TX0DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO0__TX1DATAFIFOSTART                           8
-#define   W_TXDATAFIFO0__TX1DATAFIFOSTART                           7
-#define   O_TXDATAFIFO0__TX1DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO0__TX1DATAFIFOSIZE                            7
-#define R_TXDATAFIFO1                                               0x222
-#define   O_TXDATAFIFO1__TX2DATAFIFOSTART                           24
-#define   W_TXDATAFIFO1__TX2DATAFIFOSTART                           7
-#define   O_TXDATAFIFO1__TX2DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO1__TX2DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO1__TX3DATAFIFOSTART                           8
-#define   W_TXDATAFIFO1__TX3DATAFIFOSTART                           7
-#define   O_TXDATAFIFO1__TX3DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO1__TX3DATAFIFOSIZE                            7
-#define R_TXDATAFIFO2                                               0x223
-#define   O_TXDATAFIFO2__TX4DATAFIFOSTART                           24
-#define   W_TXDATAFIFO2__TX4DATAFIFOSTART                           7
-#define   O_TXDATAFIFO2__TX4DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO2__TX4DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO2__TX5DATAFIFOSTART                           8
-#define   W_TXDATAFIFO2__TX5DATAFIFOSTART                           7
-#define   O_TXDATAFIFO2__TX5DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO2__TX5DATAFIFOSIZE                            7
-#define R_TXDATAFIFO3                                               0x224
-#define   O_TXDATAFIFO3__TX6DATAFIFOSTART                           24
-#define   W_TXDATAFIFO3__TX6DATAFIFOSTART                           7
-#define   O_TXDATAFIFO3__TX6DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO3__TX6DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO3__TX7DATAFIFOSTART                           8
-#define   W_TXDATAFIFO3__TX7DATAFIFOSTART                           7
-#define   O_TXDATAFIFO3__TX7DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO3__TX7DATAFIFOSIZE                            7
-#define R_TXDATAFIFO4                                               0x225
-#define   O_TXDATAFIFO4__TX8DATAFIFOSTART                           24
-#define   W_TXDATAFIFO4__TX8DATAFIFOSTART                           7
-#define   O_TXDATAFIFO4__TX8DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO4__TX8DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO4__TX9DATAFIFOSTART                           8
-#define   W_TXDATAFIFO4__TX9DATAFIFOSTART                           7
-#define   O_TXDATAFIFO4__TX9DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO4__TX9DATAFIFOSIZE                            7
-#define R_TXDATAFIFO5                                               0x226
-#define   O_TXDATAFIFO5__TX10DATAFIFOSTART                          24
-#define   W_TXDATAFIFO5__TX10DATAFIFOSTART                          7
-#define   O_TXDATAFIFO5__TX10DATAFIFOSIZE                           16
-#define   W_TXDATAFIFO5__TX10DATAFIFOSIZE                           7
-#define   O_TXDATAFIFO5__TX11DATAFIFOSTART                          8
-#define   W_TXDATAFIFO5__TX11DATAFIFOSTART                          7
-#define   O_TXDATAFIFO5__TX11DATAFIFOSIZE                           0
-#define   W_TXDATAFIFO5__TX11DATAFIFOSIZE                           7
-#define R_TXDATAFIFO6                                               0x227
-#define   O_TXDATAFIFO6__TX12DATAFIFOSTART                          24
-#define   W_TXDATAFIFO6__TX12DATAFIFOSTART                          7
-#define   O_TXDATAFIFO6__TX12DATAFIFOSIZE                           16
-#define   W_TXDATAFIFO6__TX12DATAFIFOSIZE                           7
-#define   O_TXDATAFIFO6__TX13DATAFIFOSTART                          8
-#define   W_TXDATAFIFO6__TX13DATAFIFOSTART                          7
-#define   O_TXDATAFIFO6__TX13DATAFIFOSIZE                           0
-#define   W_TXDATAFIFO6__TX13DATAFIFOSIZE                           7
-#define R_TXDATAFIFO7                                               0x228
-#define   O_TXDATAFIFO7__TX14DATAFIFOSTART                          24
-#define   W_TXDATAFIFO7__TX14DATAFIFOSTART                          7
-#define   O_TXDATAFIFO7__TX14DATAFIFOSIZE                           16
-#define   W_TXDATAFIFO7__TX14DATAFIFOSIZE                           7
-#define   O_TXDATAFIFO7__TX15DATAFIFOSTART                          8
-#define   W_TXDATAFIFO7__TX15DATAFIFOSTART                          7
-#define   O_TXDATAFIFO7__TX15DATAFIFOSIZE                           0
-#define   W_TXDATAFIFO7__TX15DATAFIFOSIZE                           7
-#define R_RXDATAFIFO0                                               0x229
-#define   O_RXDATAFIFO0__RX0DATAFIFOSTART                           24
-#define   W_RXDATAFIFO0__RX0DATAFIFOSTART                           7
-#define   O_RXDATAFIFO0__RX0DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO0__RX0DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO0__RX1DATAFIFOSTART                           8
-#define   W_RXDATAFIFO0__RX1DATAFIFOSTART                           7
-#define   O_RXDATAFIFO0__RX1DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO0__RX1DATAFIFOSIZE                            7
-#define R_RXDATAFIFO1                                               0x22A
-#define   O_RXDATAFIFO1__RX2DATAFIFOSTART                           24
-#define   W_RXDATAFIFO1__RX2DATAFIFOSTART                           7
-#define   O_RXDATAFIFO1__RX2DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO1__RX2DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO1__RX3DATAFIFOSTART                           8
-#define   W_RXDATAFIFO1__RX3DATAFIFOSTART                           7
-#define   O_RXDATAFIFO1__RX3DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO1__RX3DATAFIFOSIZE                            7
-#define R_RXDATAFIFO2                                               0x22B
-#define   O_RXDATAFIFO2__RX4DATAFIFOSTART                           24
-#define   W_RXDATAFIFO2__RX4DATAFIFOSTART                           7
-#define   O_RXDATAFIFO2__RX4DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO2__RX4DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO2__RX5DATAFIFOSTART                           8
-#define   W_RXDATAFIFO2__RX5DATAFIFOSTART                           7
-#define   O_RXDATAFIFO2__RX5DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO2__RX5DATAFIFOSIZE                            7
-#define R_RXDATAFIFO3                                               0x22C
-#define   O_RXDATAFIFO3__RX6DATAFIFOSTART                           24
-#define   W_RXDATAFIFO3__RX6DATAFIFOSTART                           7
-#define   O_RXDATAFIFO3__RX6DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO3__RX6DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO3__RX7DATAFIFOSTART                           8
-#define   W_RXDATAFIFO3__RX7DATAFIFOSTART                           7
-#define   O_RXDATAFIFO3__RX7DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO3__RX7DATAFIFOSIZE                            7
-#define R_RXDATAFIFO4                                               0x22D
-#define   O_RXDATAFIFO4__RX8DATAFIFOSTART                           24
-#define   W_RXDATAFIFO4__RX8DATAFIFOSTART                           7
-#define   O_RXDATAFIFO4__RX8DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO4__RX8DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO4__RX9DATAFIFOSTART                           8
-#define   W_RXDATAFIFO4__RX9DATAFIFOSTART                           7
-#define   O_RXDATAFIFO4__RX9DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO4__RX9DATAFIFOSIZE                            7
-#define R_RXDATAFIFO5                                               0x22E
-#define   O_RXDATAFIFO5__RX10DATAFIFOSTART                          24
-#define   W_RXDATAFIFO5__RX10DATAFIFOSTART                          7
-#define   O_RXDATAFIFO5__RX10DATAFIFOSIZE                           16
-#define   W_RXDATAFIFO5__RX10DATAFIFOSIZE                           7
-#define   O_RXDATAFIFO5__RX11DATAFIFOSTART                          8
-#define   W_RXDATAFIFO5__RX11DATAFIFOSTART                          7
-#define   O_RXDATAFIFO5__RX11DATAFIFOSIZE                           0
-#define   W_RXDATAFIFO5__RX11DATAFIFOSIZE                           7
-#define R_RXDATAFIFO6                                               0x22F
-#define   O_RXDATAFIFO6__RX12DATAFIFOSTART                          24
-#define   W_RXDATAFIFO6__RX12DATAFIFOSTART                          7
-#define   O_RXDATAFIFO6__RX12DATAFIFOSIZE                           16
-#define   W_RXDATAFIFO6__RX12DATAFIFOSIZE                           7
-#define   O_RXDATAFIFO6__RX13DATAFIFOSTART                          8
-#define   W_RXDATAFIFO6__RX13DATAFIFOSTART                          7
-#define   O_RXDATAFIFO6__RX13DATAFIFOSIZE                           0
-#define   W_RXDATAFIFO6__RX13DATAFIFOSIZE                           7
-#define R_RXDATAFIFO7                                               0x230
-#define   O_RXDATAFIFO7__RX14DATAFIFOSTART                          24
-#define   W_RXDATAFIFO7__RX14DATAFIFOSTART                          7
-#define   O_RXDATAFIFO7__RX14DATAFIFOSIZE                           16
-#define   W_RXDATAFIFO7__RX14DATAFIFOSIZE                           7
-#define   O_RXDATAFIFO7__RX15DATAFIFOSTART                          8
-#define   W_RXDATAFIFO7__RX15DATAFIFOSTART                          7
-#define   O_RXDATAFIFO7__RX15DATAFIFOSIZE                           0
-#define   W_RXDATAFIFO7__RX15DATAFIFOSIZE                           7
-#define R_XGMACPADCALIBRATION                                       0x231
-#define R_FREEQCARVE                                                0x233
-#define R_SPI4STATICDELAY0                                          0x240
-#define   O_SPI4STATICDELAY0__DATALINE7                             28
-#define   W_SPI4STATICDELAY0__DATALINE7                             4
-#define   O_SPI4STATICDELAY0__DATALINE6                             24
-#define   W_SPI4STATICDELAY0__DATALINE6                             4
-#define   O_SPI4STATICDELAY0__DATALINE5                             20
-#define   W_SPI4STATICDELAY0__DATALINE5                             4
-#define   O_SPI4STATICDELAY0__DATALINE4                             16
-#define   W_SPI4STATICDELAY0__DATALINE4                             4
-#define   O_SPI4STATICDELAY0__DATALINE3                             12
-#define   W_SPI4STATICDELAY0__DATALINE3                             4
-#define   O_SPI4STATICDELAY0__DATALINE2                             8
-#define   W_SPI4STATICDELAY0__DATALINE2                             4
-#define   O_SPI4STATICDELAY0__DATALINE1                             4
-#define   W_SPI4STATICDELAY0__DATALINE1                             4
-#define   O_SPI4STATICDELAY0__DATALINE0                             0
-#define   W_SPI4STATICDELAY0__DATALINE0                             4
-#define R_SPI4STATICDELAY1                                          0x241
-#define   O_SPI4STATICDELAY1__DATALINE15                            28
-#define   W_SPI4STATICDELAY1__DATALINE15                            4
-#define   O_SPI4STATICDELAY1__DATALINE14                            24
-#define   W_SPI4STATICDELAY1__DATALINE14                            4
-#define   O_SPI4STATICDELAY1__DATALINE13                            20
-#define   W_SPI4STATICDELAY1__DATALINE13                            4
-#define   O_SPI4STATICDELAY1__DATALINE12                            16
-#define   W_SPI4STATICDELAY1__DATALINE12                            4
-#define   O_SPI4STATICDELAY1__DATALINE11                            12
-#define   W_SPI4STATICDELAY1__DATALINE11                            4
-#define   O_SPI4STATICDELAY1__DATALINE10                            8
-#define   W_SPI4STATICDELAY1__DATALINE10                            4
-#define   O_SPI4STATICDELAY1__DATALINE9                             4
-#define   W_SPI4STATICDELAY1__DATALINE9                             4
-#define   O_SPI4STATICDELAY1__DATALINE8                             0
-#define   W_SPI4STATICDELAY1__DATALINE8                             4
-#define R_SPI4STATICDELAY2                                          0x242
-#define   O_SPI4STATICDELAY0__TXSTAT1                               8
-#define   W_SPI4STATICDELAY0__TXSTAT1                               4
-#define   O_SPI4STATICDELAY0__TXSTAT0                               4
-#define   W_SPI4STATICDELAY0__TXSTAT0                               4
-#define   O_SPI4STATICDELAY0__RXCONTROL                             0
-#define   W_SPI4STATICDELAY0__RXCONTROL                             4
-#define R_SPI4CONTROL                                               0x243
-#define   O_SPI4CONTROL__STATICDELAY                                2
-#define   O_SPI4CONTROL__LVDS_LVTTL                                 1
-#define   O_SPI4CONTROL__SPI4ENABLE                                 0
-#define R_CLASSWATERMARKS                                           0x244
-#define   O_CLASSWATERMARKS__CLASS0WATERMARK                        24
-#define   W_CLASSWATERMARKS__CLASS0WATERMARK                        5
-#define   O_CLASSWATERMARKS__CLASS1WATERMARK                        16
-#define   W_CLASSWATERMARKS__CLASS1WATERMARK                        5
-#define   O_CLASSWATERMARKS__CLASS3WATERMARK                        0
-#define   W_CLASSWATERMARKS__CLASS3WATERMARK                        5
-#define R_RXWATERMARKS1                                              0x245
-#define   O_RXWATERMARKS__RX0DATAWATERMARK                          24
-#define   W_RXWATERMARKS__RX0DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX1DATAWATERMARK                          16
-#define   W_RXWATERMARKS__RX1DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX3DATAWATERMARK                          0
-#define   W_RXWATERMARKS__RX3DATAWATERMARK                          7
-#define R_RXWATERMARKS2                                              0x246
-#define   O_RXWATERMARKS__RX4DATAWATERMARK                          24
-#define   W_RXWATERMARKS__RX4DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX5DATAWATERMARK                          16
-#define   W_RXWATERMARKS__RX5DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX6DATAWATERMARK                          8
-#define   W_RXWATERMARKS__RX6DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX7DATAWATERMARK                          0
-#define   W_RXWATERMARKS__RX7DATAWATERMARK                          7
-#define R_RXWATERMARKS3                                              0x247
-#define   O_RXWATERMARKS__RX8DATAWATERMARK                          24
-#define   W_RXWATERMARKS__RX8DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX9DATAWATERMARK                          16
-#define   W_RXWATERMARKS__RX9DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX10DATAWATERMARK                         8
-#define   W_RXWATERMARKS__RX10DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX11DATAWATERMARK                         0
-#define   W_RXWATERMARKS__RX11DATAWATERMARK                         7
-#define R_RXWATERMARKS4                                              0x248
-#define   O_RXWATERMARKS__RX12DATAWATERMARK                         24
-#define   W_RXWATERMARKS__RX12DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX13DATAWATERMARK                         16
-#define   W_RXWATERMARKS__RX13DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX14DATAWATERMARK                         8
-#define   W_RXWATERMARKS__RX14DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX15DATAWATERMARK                         0
-#define   W_RXWATERMARKS__RX15DATAWATERMARK                         7
-#define R_FREEWATERMARKS                                            0x249
-#define   O_FREEWATERMARKS__FREEOUTWATERMARK                        16
-#define   W_FREEWATERMARKS__FREEOUTWATERMARK                        16
-#define   O_FREEWATERMARKS__JUMFRWATERMARK                          8
-#define   W_FREEWATERMARKS__JUMFRWATERMARK                          7
-#define   O_FREEWATERMARKS__REGFRWATERMARK                          0
-#define   W_FREEWATERMARKS__REGFRWATERMARK                          7
-#define R_EGRESSFIFOCARVINGSLOTS                                    0x24a
-
-#define CTRL_RES0           0
-#define CTRL_RES1           1
-#define CTRL_REG_FREE       2
-#define CTRL_JUMBO_FREE     3
-#define CTRL_CONT           4
-#define CTRL_EOP            5
-#define CTRL_START          6
-#define CTRL_SNGL           7
-
-#define CTRL_B0_NOT_EOP     0
-#define CTRL_B0_EOP         1
-
-#define R_ROUND_ROBIN_TABLE                 0
-#define R_PDE_CLASS_0                       0x300
-#define R_PDE_CLASS_1                       0x302
-#define R_PDE_CLASS_2                       0x304
-#define R_PDE_CLASS_3                       0x306
-
-#define R_MSG_TX_THRESHOLD                  0x308
-
-#define R_GMAC_JFR0_BUCKET_SIZE              0x320
-#define R_GMAC_RFR0_BUCKET_SIZE              0x321
-#define R_GMAC_TX0_BUCKET_SIZE              0x322
-#define R_GMAC_TX1_BUCKET_SIZE              0x323
-#define R_GMAC_TX2_BUCKET_SIZE              0x324
-#define R_GMAC_TX3_BUCKET_SIZE              0x325
-#define R_GMAC_JFR1_BUCKET_SIZE              0x326
-#define R_GMAC_RFR1_BUCKET_SIZE              0x327
-
-#define R_XGS_TX0_BUCKET_SIZE               0x320
-#define R_XGS_TX1_BUCKET_SIZE               0x321
-#define R_XGS_TX2_BUCKET_SIZE               0x322
-#define R_XGS_TX3_BUCKET_SIZE               0x323
-#define R_XGS_TX4_BUCKET_SIZE               0x324
-#define R_XGS_TX5_BUCKET_SIZE               0x325
-#define R_XGS_TX6_BUCKET_SIZE               0x326
-#define R_XGS_TX7_BUCKET_SIZE               0x327
-#define R_XGS_TX8_BUCKET_SIZE               0x328
-#define R_XGS_TX9_BUCKET_SIZE               0x329
-#define R_XGS_TX10_BUCKET_SIZE              0x32A
-#define R_XGS_TX11_BUCKET_SIZE              0x32B
-#define R_XGS_TX12_BUCKET_SIZE              0x32C
-#define R_XGS_TX13_BUCKET_SIZE              0x32D
-#define R_XGS_TX14_BUCKET_SIZE              0x32E
-#define R_XGS_TX15_BUCKET_SIZE              0x32F
-#define R_XGS_JFR_BUCKET_SIZE               0x330
-#define R_XGS_RFR_BUCKET_SIZE               0x331
-
-#define R_CC_CPU0_0                         0x380
-#define R_CC_CPU1_0                         0x388
-#define R_CC_CPU2_0                         0x390
-#define R_CC_CPU3_0                         0x398
-#define R_CC_CPU4_0                         0x3a0
-#define R_CC_CPU5_0                         0x3a8
-#define R_CC_CPU6_0                         0x3b0
-#define R_CC_CPU7_0                         0x3b8
-
-#define XLR_GMAC_BLK_SZ		            (XLR_IO_GMAC_1_OFFSET - \
-		XLR_IO_GMAC_0_OFFSET)
-
-/* Constants used for configuring the devices */
-
-#define XLR_FB_STN			6 /* Bucket used for Tx freeback */
-
-#define MAC_B2B_IPG                     88
-
-#define	XLR_NET_PREPAD_LEN		32
-
-/* frame sizes need to be cacheline aligned */
-#define MAX_FRAME_SIZE                  (1536 + XLR_NET_PREPAD_LEN)
-#define MAX_FRAME_SIZE_JUMBO            9216
-
-#define MAC_SKB_BACK_PTR_SIZE           SMP_CACHE_BYTES
-#define MAC_PREPAD                      0
-#define BYTE_OFFSET                     2
-#define XLR_RX_BUF_SIZE                 (MAX_FRAME_SIZE + BYTE_OFFSET + \
-		MAC_PREPAD + MAC_SKB_BACK_PTR_SIZE + SMP_CACHE_BYTES)
-#define MAC_CRC_LEN                     4
-#define MAX_NUM_MSGRNG_STN_CC           128
-#define MAX_MSG_SND_ATTEMPTS		100	/* 13 stns x 4 entry msg/stn +
-						 * headroom
-						 */
-
-#define MAC_FRIN_TO_BE_SENT_THRESHOLD   16
-
-#define MAX_NUM_DESC_SPILL		1024
-#define MAX_FRIN_SPILL                  (MAX_NUM_DESC_SPILL << 2)
-#define MAX_FROUT_SPILL                 (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_0_SPILL               (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_1_SPILL               (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_2_SPILL               (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_3_SPILL               (MAX_NUM_DESC_SPILL << 2)
-
-enum {
-	SGMII_SPEED_10 = 0x00000000,
-	SGMII_SPEED_100 = 0x02000000,
-	SGMII_SPEED_1000 = 0x04000000,
-};
-
-enum tsv_rsv_reg {
-	TX_RX_64_BYTE_FRAME = 0x20,
-	TX_RX_64_127_BYTE_FRAME,
-	TX_RX_128_255_BYTE_FRAME,
-	TX_RX_256_511_BYTE_FRAME,
-	TX_RX_512_1023_BYTE_FRAME,
-	TX_RX_1024_1518_BYTE_FRAME,
-	TX_RX_1519_1522_VLAN_BYTE_FRAME,
-
-	RX_BYTE_COUNTER = 0x27,
-	RX_PACKET_COUNTER,
-	RX_FCS_ERROR_COUNTER,
-	RX_MULTICAST_PACKET_COUNTER,
-	RX_BROADCAST_PACKET_COUNTER,
-	RX_CONTROL_FRAME_PACKET_COUNTER,
-	RX_PAUSE_FRAME_PACKET_COUNTER,
-	RX_UNKNOWN_OP_CODE_COUNTER,
-	RX_ALIGNMENT_ERROR_COUNTER,
-	RX_FRAME_LENGTH_ERROR_COUNTER,
-	RX_CODE_ERROR_COUNTER,
-	RX_CARRIER_SENSE_ERROR_COUNTER,
-	RX_UNDERSIZE_PACKET_COUNTER,
-	RX_OVERSIZE_PACKET_COUNTER,
-	RX_FRAGMENTS_COUNTER,
-	RX_JABBER_COUNTER,
-	RX_DROP_PACKET_COUNTER,
-
-	TX_BYTE_COUNTER   = 0x38,
-	TX_PACKET_COUNTER,
-	TX_MULTICAST_PACKET_COUNTER,
-	TX_BROADCAST_PACKET_COUNTER,
-	TX_PAUSE_CONTROL_FRAME_COUNTER,
-	TX_DEFERRAL_PACKET_COUNTER,
-	TX_EXCESSIVE_DEFERRAL_PACKET_COUNTER,
-	TX_SINGLE_COLLISION_PACKET_COUNTER,
-	TX_MULTI_COLLISION_PACKET_COUNTER,
-	TX_LATE_COLLISION_PACKET_COUNTER,
-	TX_EXCESSIVE_COLLISION_PACKET_COUNTER,
-	TX_TOTAL_COLLISION_COUNTER,
-	TX_PAUSE_FRAME_HONERED_COUNTER,
-	TX_DROP_FRAME_COUNTER,
-	TX_JABBER_FRAME_COUNTER,
-	TX_FCS_ERROR_COUNTER,
-	TX_CONTROL_FRAME_COUNTER,
-	TX_OVERSIZE_FRAME_COUNTER,
-	TX_UNDERSIZE_FRAME_COUNTER,
-	TX_FRAGMENT_FRAME_COUNTER,
-
-	CARRY_REG_1 = 0x4c,
-	CARRY_REG_2 = 0x4d,
-};
-
-struct xlr_adapter {
-	struct net_device *netdev[4];
-};
-
-struct xlr_net_priv {
-	u32 __iomem *base_addr;
-	struct net_device *ndev;
-	struct xlr_adapter *adapter;
-	struct mii_bus *mii_bus;
-	int num_rx_desc;
-	int phy_addr;	/* PHY addr on MDIO bus */
-	int pcs_id;	/* PCS id on MDIO bus */
-	int port_id;	/* Port(gmac/xgmac) number, i.e 0-7 */
-	int tx_stnid;
-	u32 __iomem *mii_addr;
-	u32 __iomem *serdes_addr;
-	u32 __iomem *pcs_addr;
-	u32 __iomem *gpio_addr;
-	int phy_speed;
-	int port_type;
-	struct timer_list queue_timer;
-	int wakeup_q;
-	struct platform_device *pdev;
-	struct xlr_net_data *nd;
-
-	u64 *frin_spill;
-	u64 *frout_spill;
-	u64 *class_0_spill;
-	u64 *class_1_spill;
-	u64 *class_2_spill;
-	u64 *class_3_spill;
-};
-
-void xlr_set_gmac_speed(struct xlr_net_priv *priv);
diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 55c3d4a..b4820ad 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -107,6 +107,7 @@
 	{0x01},	/* 0x10, RT_CHANNEL_DOMAIN_JAPAN */
 	{0x02},	/* 0x11, RT_CHANNEL_DOMAIN_FCC_NO_DFS */
 	{0x01},	/* 0x12, RT_CHANNEL_DOMAIN_JAPAN_NO_DFS */
+	{0x00}, /* 0x13 */
 	{0x02},	/* 0x14, RT_CHANNEL_DOMAIN_TAIWAN_NO_DFS */
 	{0x00},	/* 0x15, RT_CHANNEL_DOMAIN_ETSI_NO_DFS */
 	{0x00},	/* 0x16, RT_CHANNEL_DOMAIN_KOREA_NO_DFS */
@@ -118,6 +119,7 @@
 	{0x00},	/* 0x1C, */
 	{0x00},	/* 0x1D, */
 	{0x00},	/* 0x1E, */
+	{0x00},	/* 0x1F, */
 	/*  0x20 ~ 0x7F , New Define ===== */
 	{0x00},	/* 0x20, RT_CHANNEL_DOMAIN_WORLD_NULL */
 	{0x01},	/* 0x21, RT_CHANNEL_DOMAIN_ETSI1_NULL */
@@ -6845,12 +6847,12 @@
 	struct mlme_ext_priv		*pmlmeext = &padapter->mlmeextpriv;
 	struct cmd_priv *pcmdpriv = &padapter->cmdpriv;
 
-	pcmd_obj = kzalloc(sizeof(struct cmd_obj), GFP_KERNEL);
+	pcmd_obj = kzalloc(sizeof(*pcmd_obj), GFP_ATOMIC);
 	if (!pcmd_obj)
 		return;
 
 	cmdsz = (sizeof(struct stadel_event) + sizeof(struct C2HEvent_Header));
-	pevtcmd = kzalloc(cmdsz, GFP_KERNEL);
+	pevtcmd = kzalloc(cmdsz, GFP_ATOMIC);
 	if (!pevtcmd) {
 		kfree(pcmd_obj);
 		return;
diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index 52d42e5..9404355 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -1980,6 +1980,7 @@
 	u32 data32;
 	u32 bytes;
 	u8 *ptmp;
+	int ret;
 
 	padapter = (struct adapter *)rtw_netdev_priv(dev);
 	p = &wrqu->data;
@@ -2007,12 +2008,17 @@
 		break;
 	default:
 		DBG_88E(KERN_INFO "%s: usage> read [bytes],[address(hex)]\n", __func__);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_free_ptmp;
 	}
 	DBG_88E(KERN_INFO "%s: addr = 0x%08X data =%s\n", __func__, addr, extra);
 
 	kfree(ptmp);
 	return 0;
+
+err_free_ptmp:
+	kfree(ptmp);
+	return ret;
 }
 
 static int rtw_wx_write32(struct net_device *dev,
diff --git a/drivers/staging/r8188eu/os_dep/mlme_linux.c b/drivers/staging/r8188eu/os_dep/mlme_linux.c
index a9b6ffd..f7ce724 100644
--- a/drivers/staging/r8188eu/os_dep/mlme_linux.c
+++ b/drivers/staging/r8188eu/os_dep/mlme_linux.c
@@ -112,7 +112,7 @@
 
 	buff = NULL;
 	if (authmode == _WPA_IE_ID_) {
-		buff = kzalloc(IW_CUSTOM_MAX, GFP_KERNEL);
+		buff = kzalloc(IW_CUSTOM_MAX, GFP_ATOMIC);
 		if (!buff)
 			return;
 		p = buff;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index d2e9df6..b9ce718 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -2549,13 +2549,14 @@
 			free_irq(dev->irq, dev);
 			priv->irq = 0;
 		}
-		free_rtllib(dev);
 
 		if (dev->mem_start != 0) {
 			iounmap((void __iomem *)dev->mem_start);
 			release_mem_region(pci_resource_start(pdev, 1),
 					pci_resource_len(pdev, 1));
 		}
+
+		free_rtllib(dev);
 	}
 
 	pci_disable_device(pdev);
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 0b65de9..95a88f6 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -520,7 +520,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_show_tg_pt_gp_info(lun, page);
@@ -531,7 +531,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_store_tg_pt_gp_info(lun, page, count);
@@ -542,7 +542,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_show_offline_bit(lun, page);
@@ -553,7 +553,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_store_offline_bit(lun, page, count);
@@ -564,7 +564,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_show_secondary_status(lun, page);
@@ -575,7 +575,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_store_secondary_status(lun, page, count);
@@ -586,7 +586,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_show_secondary_write_metadata(lun, page);
@@ -597,7 +597,7 @@
 {
 	struct se_lun *lun = item_to_lun(item);
 
-	if (!lun || !lun->lun_se_dev)
+	if (!lun->lun_se_dev)
 		return -ENODEV;
 
 	return core_alua_store_secondary_write_metadata(lun, page, count);
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 22703a0..4c76498 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -40,11 +40,11 @@
 	 *
 	 * See spc4r17 section 6.4.2 Table 135
 	 */
-	spin_lock(&lun->lun_tg_pt_gp_lock);
-	tg_pt_gp = lun->lun_tg_pt_gp;
+	rcu_read_lock();
+	tg_pt_gp = rcu_dereference(lun->lun_tg_pt_gp);
 	if (tg_pt_gp)
 		buf[5] |= tg_pt_gp->tg_pt_gp_alua_access_type;
-	spin_unlock(&lun->lun_tg_pt_gp_lock);
+	rcu_read_unlock();
 }
 
 static u16
@@ -325,14 +325,14 @@
 		 * Get the PROTOCOL IDENTIFIER as defined by spc4r17
 		 * section 7.5.1 Table 362
 		 */
-		spin_lock(&lun->lun_tg_pt_gp_lock);
-		tg_pt_gp = lun->lun_tg_pt_gp;
+		rcu_read_lock();
+		tg_pt_gp = rcu_dereference(lun->lun_tg_pt_gp);
 		if (!tg_pt_gp) {
-			spin_unlock(&lun->lun_tg_pt_gp_lock);
+			rcu_read_unlock();
 			goto check_lu_gp;
 		}
 		tg_pt_gp_id = tg_pt_gp->tg_pt_gp_id;
-		spin_unlock(&lun->lun_tg_pt_gp_lock);
+		rcu_read_unlock();
 
 		buf[off] = tpg->proto_id << 4;
 		buf[off++] |= 0x1; /* CODE SET == Binary */
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index e7fcbc0..bac1114 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -50,15 +50,6 @@
 
 void core_tmr_release_req(struct se_tmr_req *tmr)
 {
-	struct se_device *dev = tmr->tmr_dev;
-	unsigned long flags;
-
-	if (dev) {
-		spin_lock_irqsave(&dev->se_tmr_lock, flags);
-		list_del_init(&tmr->tmr_list);
-		spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
-	}
-
 	kfree(tmr);
 }
 
@@ -156,13 +147,6 @@
 			se_cmd->state_active = false;
 			spin_unlock_irqrestore(&dev->queues[i].lock, flags);
 
-			/*
-			 * Ensure that this ABORT request is visible to the LU
-			 * RESET code.
-			 */
-			if (!tmr->tmr_dev)
-				WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd) < 0);
-
 			if (dev->transport->tmr_notify)
 				dev->transport->tmr_notify(dev, TMR_ABORT_TASK,
 							   &aborted_list);
@@ -234,6 +218,7 @@
 		}
 
 		list_move_tail(&tmr_p->tmr_list, &drain_tmr_list);
+		tmr_p->tmr_dev = NULL;
 	}
 	spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
 
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 4a2e749..7838dc2 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -676,6 +676,21 @@
 	spin_unlock_irqrestore(&dev->queues[cmd->cpuid].lock, flags);
 }
 
+static void target_remove_from_tmr_list(struct se_cmd *cmd)
+{
+	struct se_device *dev = NULL;
+	unsigned long flags;
+
+	if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
+		dev = cmd->se_tmr_req->tmr_dev;
+
+	if (dev) {
+		spin_lock_irqsave(&dev->se_tmr_lock, flags);
+		if (cmd->se_tmr_req->tmr_dev)
+			list_del_init(&cmd->se_tmr_req->tmr_list);
+		spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
+	}
+}
 /*
  * This function is called by the target core after the target core has
  * finished processing a SCSI command or SCSI TMF. Both the regular command
@@ -687,13 +702,6 @@
 {
 	unsigned long flags;
 
-	target_remove_from_state_list(cmd);
-
-	/*
-	 * Clear struct se_cmd->se_lun before the handoff to FE.
-	 */
-	cmd->se_lun = NULL;
-
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
 	/*
 	 * Determine if frontend context caller is requesting the stopping of
@@ -728,8 +736,16 @@
 	if (!lun)
 		return;
 
+	target_remove_from_state_list(cmd);
+	target_remove_from_tmr_list(cmd);
+
 	if (cmpxchg(&cmd->lun_ref_active, true, false))
 		percpu_ref_put(&lun->lun_ref);
+
+	/*
+	 * Clear struct se_cmd->se_lun before the handoff to FE.
+	 */
+	cmd->se_lun = NULL;
 }
 
 static void target_complete_failure_work(struct work_struct *work)
diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
index 4542482..d8c8683 100644
--- a/drivers/tee/optee/ffa_abi.c
+++ b/drivers/tee/optee/ffa_abi.c
@@ -810,10 +810,9 @@
 		return -EINVAL;
 
 	optee = kzalloc(sizeof(*optee), GFP_KERNEL);
-	if (!optee) {
-		rc = -ENOMEM;
-		goto err;
-	}
+	if (!optee)
+		return -ENOMEM;
+
 	optee->pool = optee_ffa_config_dyn_shm();
 	if (IS_ERR(optee->pool)) {
 		rc = PTR_ERR(optee->pool);
diff --git a/drivers/thermal/intel/int340x_thermal/Kconfig b/drivers/thermal/intel/int340x_thermal/Kconfig
index 45c31f3..5d046de 100644
--- a/drivers/thermal/intel/int340x_thermal/Kconfig
+++ b/drivers/thermal/intel/int340x_thermal/Kconfig
@@ -5,12 +5,12 @@
 
 config INT340X_THERMAL
 	tristate "ACPI INT340X thermal drivers"
-	depends on X86 && ACPI && PCI
+	depends on X86_64 && ACPI && PCI
 	select THERMAL_GOV_USER_SPACE
 	select ACPI_THERMAL_REL
 	select ACPI_FAN
 	select INTEL_SOC_DTS_IOSF_CORE
-	select PROC_THERMAL_MMIO_RAPL if X86_64 && POWERCAP
+	select PROC_THERMAL_MMIO_RAPL if POWERCAP
 	help
 	  Newer laptops and tablets that use ACPI may have thermal sensors and
 	  other devices with thermal control capabilities outside the core
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
index a865219..01008ae 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include "processor_thermal_device.h"
 
 #define MBOX_CMD_WORKLOAD_TYPE_READ	0x0E
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 648829a..82654dc 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -421,6 +421,8 @@
 {
 	struct thermal_instance *pos;
 	tz->temperature = THERMAL_TEMP_INVALID;
+	tz->prev_low_trip = -INT_MAX;
+	tz->prev_high_trip = INT_MAX;
 	list_for_each_entry(pos, &tz->thermal_instances, tz_node)
 		pos->initialized = false;
 }
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index f0bf01e..71e0dd2 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -522,6 +522,7 @@
 	.remove = xencons_remove,
 	.resume = xencons_resume,
 	.otherend_changed = xencons_backend_changed,
+	.not_essential = true,
 };
 #endif /* CONFIG_HVC_XEN_FRONTEND */
 
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index f1d1006..097142f 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -420,15 +420,15 @@
 	data->phy = devm_usb_get_phy_by_phandle(dev, "fsl,usbphy", 0);
 	if (IS_ERR(data->phy)) {
 		ret = PTR_ERR(data->phy);
-		if (ret == -ENODEV) {
-			data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0);
-			if (IS_ERR(data->phy)) {
-				ret = PTR_ERR(data->phy);
-				if (ret == -ENODEV)
-					data->phy = NULL;
-				else
-					goto err_clk;
-			}
+		if (ret != -ENODEV)
+			goto err_clk;
+		data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0);
+		if (IS_ERR(data->phy)) {
+			ret = PTR_ERR(data->phy);
+			if (ret == -ENODEV)
+				data->phy = NULL;
+			else
+				goto err_clk;
 		}
 	}
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 86658a8..00070a8 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4700,8 +4700,6 @@
 	if (oldspeed == USB_SPEED_LOW)
 		delay = HUB_LONG_RESET_TIME;
 
-	mutex_lock(hcd->address0_mutex);
-
 	/* Reset the device; full speed may morph to high speed */
 	/* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
 	retval = hub_port_reset(hub, port1, udev, delay, false);
@@ -5016,7 +5014,6 @@
 		hub_port_disable(hub, port1, 0);
 		update_devnum(udev, devnum);	/* for disconnect processing */
 	}
-	mutex_unlock(hcd->address0_mutex);
 	return retval;
 }
 
@@ -5191,6 +5188,7 @@
 	struct usb_port *port_dev = hub->ports[port1 - 1];
 	struct usb_device *udev = port_dev->child;
 	static int unreliable_port = -1;
+	bool retry_locked;
 
 	/* Disconnect any existing devices under this port */
 	if (udev) {
@@ -5246,8 +5244,11 @@
 		unit_load = 100;
 
 	status = 0;
-	for (i = 0; i < PORT_INIT_TRIES; i++) {
 
+	for (i = 0; i < PORT_INIT_TRIES; i++) {
+		usb_lock_port(port_dev);
+		mutex_lock(hcd->address0_mutex);
+		retry_locked = true;
 		/* reallocate for each attempt, since references
 		 * to the previous one can escape in various ways
 		 */
@@ -5255,6 +5256,8 @@
 		if (!udev) {
 			dev_err(&port_dev->dev,
 					"couldn't allocate usb_device\n");
+			mutex_unlock(hcd->address0_mutex);
+			usb_unlock_port(port_dev);
 			goto done;
 		}
 
@@ -5276,12 +5279,14 @@
 		}
 
 		/* reset (non-USB 3.0 devices) and get descriptor */
-		usb_lock_port(port_dev);
 		status = hub_port_init(hub, udev, port1, i);
-		usb_unlock_port(port_dev);
 		if (status < 0)
 			goto loop;
 
+		mutex_unlock(hcd->address0_mutex);
+		usb_unlock_port(port_dev);
+		retry_locked = false;
+
 		if (udev->quirks & USB_QUIRK_DELAY_INIT)
 			msleep(2000);
 
@@ -5374,6 +5379,10 @@
 		usb_ep0_reinit(udev);
 		release_devnum(udev);
 		hub_free_dev(udev);
+		if (retry_locked) {
+			mutex_unlock(hcd->address0_mutex);
+			usb_unlock_port(port_dev);
+		}
 		usb_put_dev(udev);
 		if ((status == -ENOTCONN) || (status == -ENOTSUPP))
 			break;
@@ -5915,6 +5924,8 @@
 	bos = udev->bos;
 	udev->bos = NULL;
 
+	mutex_lock(hcd->address0_mutex);
+
 	for (i = 0; i < PORT_INIT_TRIES; ++i) {
 
 		/* ep0 maxpacket size may change; let the HCD know about it.
@@ -5924,6 +5935,7 @@
 		if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV)
 			break;
 	}
+	mutex_unlock(hcd->address0_mutex);
 
 	if (ret < 0)
 		goto re_enumerate;
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 4ab4a1d..ab8d7da 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1198,6 +1198,8 @@
 			}
 			ctrl |= DXEPCTL_CNAK;
 		} else {
+			hs_req->req.frame_number = hs_ep->target_frame;
+			hs_req->req.actual = 0;
 			dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA);
 			return;
 		}
@@ -2857,9 +2859,12 @@
 
 	do {
 		hs_req = get_ep_head(hs_ep);
-		if (hs_req)
+		if (hs_req) {
+			hs_req->req.frame_number = hs_ep->target_frame;
+			hs_req->req.actual = 0;
 			dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req,
 						    -ENODATA);
+		}
 		dwc2_gadget_incr_frame_num(hs_ep);
 		/* Update current frame number value. */
 		hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg);
@@ -2912,8 +2917,11 @@
 
 	while (dwc2_gadget_target_frame_elapsed(ep)) {
 		hs_req = get_ep_head(ep);
-		if (hs_req)
+		if (hs_req) {
+			hs_req->req.frame_number = ep->target_frame;
+			hs_req->req.actual = 0;
 			dwc2_hsotg_complete_request(hsotg, ep, hs_req, -ENODATA);
+		}
 
 		dwc2_gadget_incr_frame_num(ep);
 		/* Update current frame number value. */
@@ -3002,8 +3010,11 @@
 
 	while (dwc2_gadget_target_frame_elapsed(hs_ep)) {
 		hs_req = get_ep_head(hs_ep);
-		if (hs_req)
+		if (hs_req) {
+			hs_req->req.frame_number = hs_ep->target_frame;
+			hs_req->req.actual = 0;
 			dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA);
+		}
 
 		dwc2_gadget_incr_frame_num(hs_ep);
 		/* Update current frame number value. */
diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index 89a7883..24beff6 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -59,7 +59,7 @@
 #define DWC2_UNRESERVE_DELAY (msecs_to_jiffies(5))
 
 /* If we get a NAK, wait this long before retrying */
-#define DWC2_RETRY_WAIT_DELAY (1 * 1E6L)
+#define DWC2_RETRY_WAIT_DELAY (1 * NSEC_PER_MSEC)
 
 /**
  * dwc2_periodic_channel_available() - Checks that a channel is available for a
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 643239d..f4c0995 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1594,9 +1594,11 @@
 
 	dwc3_get_properties(dwc);
 
-	ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
-	if (ret)
-		return ret;
+	if (!dwc->sysdev_is_parent) {
+		ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
+		if (ret)
+			return ret;
+	}
 
 	dwc->reset = devm_reset_control_array_get_optional_shared(dev);
 	if (IS_ERR(dwc->reset))
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 620c8d3..5c491d0 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -143,7 +143,7 @@
 #define DWC3_GHWPARAMS8		0xc600
 #define DWC3_GUCTL3		0xc60c
 #define DWC3_GFLADJ		0xc630
-#define DWC3_GHWPARAMS9		0xc680
+#define DWC3_GHWPARAMS9		0xc6e0
 
 /* Device Registers */
 #define DWC3_DCFG		0xc700
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 23de2a5..7e3db00 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -310,13 +310,24 @@
 	if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) {
 		int link_state;
 
+		/*
+		 * Initiate remote wakeup if the link state is in U3 when
+		 * operating in SS/SSP or L1/L2 when operating in HS/FS. If the
+		 * link state is in U1/U2, no remote wakeup is needed. The Start
+		 * Transfer command will initiate the link recovery.
+		 */
 		link_state = dwc3_gadget_get_link_state(dwc);
-		if (link_state == DWC3_LINK_STATE_U1 ||
-		    link_state == DWC3_LINK_STATE_U2 ||
-		    link_state == DWC3_LINK_STATE_U3) {
+		switch (link_state) {
+		case DWC3_LINK_STATE_U2:
+			if (dwc->gadget->speed >= USB_SPEED_SUPER)
+				break;
+
+			fallthrough;
+		case DWC3_LINK_STATE_U3:
 			ret = __dwc3_gadget_wakeup(dwc);
 			dev_WARN_ONCE(dwc->dev, ret, "wakeup failed --> %d\n",
 					ret);
+			break;
 		}
 	}
 
@@ -3252,6 +3263,9 @@
 	struct dwc3		*dwc = dep->dwc;
 	bool			no_started_trb = true;
 
+	if (!dep->endpoint.desc)
+		return no_started_trb;
+
 	dwc3_gadget_ep_cleanup_completed_requests(dep, event, status);
 
 	if (dep->flags & DWC3_EP_END_TRANSFER_PENDING)
@@ -3299,6 +3313,9 @@
 {
 	int status = 0;
 
+	if (!dep->endpoint.desc)
+		return;
+
 	if (usb_endpoint_xfer_isoc(dep->endpoint.desc))
 		dwc3_gadget_endpoint_frame_from_event(dep, event);
 
@@ -3352,6 +3369,14 @@
 	if (cmd != DWC3_DEPCMD_ENDTRANSFER)
 		return;
 
+	/*
+	 * The END_TRANSFER command will cause the controller to generate a
+	 * NoStream Event, and it's not due to the host DP NoStream rejection.
+	 * Ignore the next NoStream event.
+	 */
+	if (dep->stream_capable)
+		dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM;
+
 	dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
 	dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
 	dwc3_gadget_ep_cleanup_cancelled_requests(dep);
@@ -3574,14 +3599,6 @@
 	WARN_ON_ONCE(ret);
 	dep->resource_index = 0;
 
-	/*
-	 * The END_TRANSFER command will cause the controller to generate a
-	 * NoStream Event, and it's not due to the host DP NoStream rejection.
-	 * Ignore the next NoStream event.
-	 */
-	if (dep->stream_capable)
-		dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM;
-
 	if (!interrupt)
 		dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
 	else
diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index f5ca670..857159d 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -2136,7 +2136,7 @@
 
 	ret = usb_add_gadget_udc(&pdev->dev, &udc->gadget);
 	if (ret)
-		goto fail;
+		goto err_disable_unprepare_clk;
 
 	udc->dev = &udc->gadget.dev;
 
@@ -2155,6 +2155,9 @@
 		 udc->dma_enabled ? "with DMA" : "without DMA");
 
 	return 0;
+
+err_disable_unprepare_clk:
+	clk_disable_unprepare(udc->clk);
 fail:
 	dev_err(&pdev->dev, "probe failed, %d\n", ret);
 	return ret;
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 1bf494b..c8af2cd 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1400,6 +1400,7 @@
 
 static int tegra_xusb_probe(struct platform_device *pdev)
 {
+	struct of_phandle_args args;
 	struct tegra_xusb *tegra;
 	struct device_node *np;
 	struct resource *regs;
@@ -1454,10 +1455,17 @@
 		goto put_padctl;
 	}
 
-	tegra->padctl_irq = of_irq_get(np, 0);
-	if (tegra->padctl_irq <= 0) {
-		err = (tegra->padctl_irq == 0) ? -ENODEV : tegra->padctl_irq;
-		goto put_padctl;
+	/* Older device-trees don't have padctrl interrupt */
+	err = of_irq_parse_one(np, 0, &args);
+	if (!err) {
+		tegra->padctl_irq = of_irq_get(np, 0);
+		if (tegra->padctl_irq <= 0) {
+			err = (tegra->padctl_irq == 0) ? -ENODEV : tegra->padctl_irq;
+			goto put_padctl;
+		}
+	} else {
+		dev_dbg(&pdev->dev,
+			"%pOF is missing an interrupt, disabling PM support\n", np);
 	}
 
 	tegra->host_clk = devm_clk_get(&pdev->dev, "xusb_host");
@@ -1696,11 +1704,15 @@
 		goto remove_usb3;
 	}
 
-	err = devm_request_threaded_irq(&pdev->dev, tegra->padctl_irq, NULL, tegra_xusb_padctl_irq,
-					IRQF_ONESHOT, dev_name(&pdev->dev), tegra);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to request padctl IRQ: %d\n", err);
-		goto remove_usb3;
+	if (tegra->padctl_irq) {
+		err = devm_request_threaded_irq(&pdev->dev, tegra->padctl_irq,
+						NULL, tegra_xusb_padctl_irq,
+						IRQF_ONESHOT, dev_name(&pdev->dev),
+						tegra);
+		if (err < 0) {
+			dev_err(&pdev->dev, "failed to request padctl IRQ: %d\n", err);
+			goto remove_usb3;
+		}
 	}
 
 	err = tegra_xusb_enable_firmware_messages(tegra);
@@ -1718,13 +1730,16 @@
 	/* Enable wake for both USB 2.0 and USB 3.0 roothubs */
 	device_init_wakeup(&tegra->hcd->self.root_hub->dev, true);
 	device_init_wakeup(&xhci->shared_hcd->self.root_hub->dev, true);
-	device_init_wakeup(tegra->dev, true);
 
 	pm_runtime_use_autosuspend(tegra->dev);
 	pm_runtime_set_autosuspend_delay(tegra->dev, 2000);
 	pm_runtime_mark_last_busy(tegra->dev);
 	pm_runtime_set_active(tegra->dev);
-	pm_runtime_enable(tegra->dev);
+
+	if (tegra->padctl_irq) {
+		device_init_wakeup(tegra->dev, true);
+		pm_runtime_enable(tegra->dev);
+	}
 
 	return 0;
 
@@ -1772,7 +1787,9 @@
 	dma_free_coherent(&pdev->dev, tegra->fw.size, tegra->fw.virt,
 			  tegra->fw.phys);
 
-	pm_runtime_disable(&pdev->dev);
+	if (tegra->padctl_irq)
+		pm_runtime_disable(&pdev->dev);
+
 	pm_runtime_put(&pdev->dev);
 
 	tegra_xusb_powergate_partitions(tegra);
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index a484ff5..546fce4 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1267,6 +1267,8 @@
 	  .driver_info = NCTRL(2) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, 0x9010),				/* Telit SBL FN980 flashing device */
 	  .driver_info = NCTRL(0) | ZLP },
+	{ USB_DEVICE(TELIT_VENDOR_ID, 0x9200),				/* Telit LE910S1 flashing device */
+	  .driver_info = NCTRL(0) | ZLP },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
 	  .driver_info = RSVD(1) },
@@ -2094,6 +2096,9 @@
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) },	/* Fibocom FG150 Diag */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) },		/* Fibocom FG150 AT */
 	{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },			/* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
+	{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) },			/* Fibocom FM101-GL (laptop MBIM) */
+	{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff),			/* Fibocom FM101-GL (laptop MBIM) */
+	  .driver_info = RSVD(4) },
 	{ USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) },			/* LongSung M5710 */
 	{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) },			/* GosunCn GM500 RNDIS */
 	{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) },			/* GosunCn GM500 MBIM */
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index f45ca7d..a70fd86 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -432,6 +432,7 @@
 	case 0x200:
 		switch (bcdDevice) {
 		case 0x100:
+		case 0x105:
 		case 0x305:
 		case 0x405:
 			/*
diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index 7a2a178..72f9001 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -669,25 +669,27 @@
 		ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK,
 					     FUSB_REG_MASK_BC_LVL |
 					     FUSB_REG_MASK_COMP_CHNG,
-					     FUSB_REG_MASK_COMP_CHNG);
-		if (ret < 0) {
-			fusb302_log(chip, "cannot set SRC interrupt, ret=%d",
-				    ret);
-			goto done;
-		}
-		chip->intr_comp_chng = true;
-		break;
-	case TYPEC_CC_RD:
-		ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK,
-					     FUSB_REG_MASK_BC_LVL |
-					     FUSB_REG_MASK_COMP_CHNG,
 					     FUSB_REG_MASK_BC_LVL);
 		if (ret < 0) {
 			fusb302_log(chip, "cannot set SRC interrupt, ret=%d",
 				    ret);
 			goto done;
 		}
+		chip->intr_comp_chng = true;
+		chip->intr_bc_lvl = false;
+		break;
+	case TYPEC_CC_RD:
+		ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK,
+					     FUSB_REG_MASK_BC_LVL |
+					     FUSB_REG_MASK_COMP_CHNG,
+					     FUSB_REG_MASK_COMP_CHNG);
+		if (ret < 0) {
+			fusb302_log(chip, "cannot set SRC interrupt, ret=%d",
+				    ret);
+			goto done;
+		}
 		chip->intr_bc_lvl = true;
+		chip->intr_comp_chng = false;
 		break;
 	default:
 		break;
diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index fb8ef12..6d27a5b 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -653,7 +653,7 @@
 	if (state == target_state)
 		return 0;
 
-	ret = tps6598x_exec_cmd(tps, "SPSS", sizeof(u8), &target_state, 0, NULL);
+	ret = tps6598x_exec_cmd(tps, "SSPS", sizeof(u8), &target_state, 0, NULL);
 	if (ret)
 		return ret;
 
@@ -707,6 +707,7 @@
 	u32 conf;
 	u32 vid;
 	int ret;
+	u64 mask1;
 
 	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
 	if (!tps)
@@ -730,11 +731,6 @@
 	if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
 		tps->i2c_protocol = true;
 
-	/* Make sure the controller has application firmware running */
-	ret = tps6598x_check_mode(tps);
-	if (ret)
-		return ret;
-
 	if (np && of_device_is_compatible(np, "apple,cd321x")) {
 		/* Switch CD321X chips to the correct system power state */
 		ret = cd321x_switch_power_state(tps, TPS_SYSTEM_POWER_STATE_S0);
@@ -742,24 +738,27 @@
 			return ret;
 
 		/* CD321X chips have all interrupts masked initially */
-		ret = tps6598x_write64(tps, TPS_REG_INT_MASK1,
-					APPLE_CD_REG_INT_POWER_STATUS_UPDATE |
-					APPLE_CD_REG_INT_DATA_STATUS_UPDATE |
-					APPLE_CD_REG_INT_PLUG_EVENT);
-		if (ret)
-			return ret;
+		mask1 = APPLE_CD_REG_INT_POWER_STATUS_UPDATE |
+			APPLE_CD_REG_INT_DATA_STATUS_UPDATE |
+			APPLE_CD_REG_INT_PLUG_EVENT;
 
 		irq_handler = cd321x_interrupt;
 	} else {
 		/* Enable power status, data status and plug event interrupts */
-		ret = tps6598x_write64(tps, TPS_REG_INT_MASK1,
-				       TPS_REG_INT_POWER_STATUS_UPDATE |
-				       TPS_REG_INT_DATA_STATUS_UPDATE |
-				       TPS_REG_INT_PLUG_EVENT);
-		if (ret)
-			return ret;
+		mask1 = TPS_REG_INT_POWER_STATUS_UPDATE |
+			TPS_REG_INT_DATA_STATUS_UPDATE |
+			TPS_REG_INT_PLUG_EVENT;
 	}
 
+	/* Make sure the controller has application firmware running */
+	ret = tps6598x_check_mode(tps);
+	if (ret)
+		return ret;
+
+	ret = tps6598x_write64(tps, TPS_REG_INT_MASK1, mask1);
+	if (ret)
+		return ret;
+
 	ret = tps6598x_read32(tps, TPS_REG_STATUS, &status);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 5f484fff..41b0cd1 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -591,8 +591,11 @@
 		vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
 	}
 
-	put_iova_domain(&vdpasim->iova);
-	iova_cache_put();
+	if (vdpa_get_dma_dev(vdpa)) {
+		put_iova_domain(&vdpasim->iova);
+		iova_cache_put();
+	}
+
 	kvfree(vdpasim->buffer);
 	if (vdpasim->iommu)
 		vhost_iotlb_free(vdpasim->iommu);
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 01c59ce..29cced1 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -1014,12 +1014,12 @@
 
 	mutex_lock(&d->mutex);
 	filep->private_data = NULL;
+	vhost_vdpa_clean_irq(v);
 	vhost_vdpa_reset(v);
 	vhost_dev_stop(&v->vdev);
 	vhost_vdpa_iotlb_free(v);
 	vhost_vdpa_free_domain(v);
 	vhost_vdpa_config_put(v);
-	vhost_vdpa_clean_irq(v);
 	vhost_dev_cleanup(&v->vdev);
 	kfree(v->vdev.vqs);
 	mutex_unlock(&d->mutex);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 938aefb..d6ca1c7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -511,8 +511,6 @@
 
 	vhost_disable_notify(&vsock->dev, vq);
 	do {
-		u32 len;
-
 		if (!vhost_vsock_more_replies(vsock)) {
 			/* Stop tx until the device processes already
 			 * pending replies.  Leave tx virtqueue
@@ -540,7 +538,7 @@
 			continue;
 		}
 
-		len = pkt->len;
+		total_len += sizeof(pkt->hdr) + pkt->len;
 
 		/* Deliver to monitoring devices all received packets */
 		virtio_transport_deliver_tap_pkt(pkt);
@@ -553,9 +551,7 @@
 		else
 			virtio_transport_free_pkt(pkt);
 
-		len += sizeof(pkt->hdr);
-		vhost_add_used(vq, head, len);
-		total_len += len;
+		vhost_add_used(vq, head, 0);
 		added = true;
 	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
 
diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index 1b45116..40496e9 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -332,13 +332,13 @@
 			    bool blink, bool underline, bool reverse,
 			    bool italic)
 {
-    u8 attr = ((color & 0x70) >> 1) | ((color & 7));
+	u8 fg = color & 7;
+	u8 bg = (color & 0x70) >> 4;
 
-    if (reverse) {
-	color = ((color >> 3) & 0x7) | ((color & 0x7) << 3);
-    }
-
-    return attr;
+	if (reverse)
+		return (fg << 3) | bg;
+	else
+		return (bg << 3) | fg;
 }
 
 static void sticon_invert_region(struct vc_data *conp, u16 *p, int count)
diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c
index f98e8f2..01fae2c 100644
--- a/drivers/video/fbdev/core/bitblit.c
+++ b/drivers/video/fbdev/core/bitblit.c
@@ -43,21 +43,6 @@
 	}
 }
 
-static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy,
-		      int sx, int dy, int dx, int height, int width)
-{
-	struct fb_copyarea area;
-
-	area.sx = sx * vc->vc_font.width;
-	area.sy = sy * vc->vc_font.height;
-	area.dx = dx * vc->vc_font.width;
-	area.dy = dy * vc->vc_font.height;
-	area.height = height * vc->vc_font.height;
-	area.width = width * vc->vc_font.width;
-
-	info->fbops->fb_copyarea(info, &area);
-}
-
 static void bit_clear(struct vc_data *vc, struct fb_info *info, int sy,
 		      int sx, int height, int width)
 {
@@ -393,7 +378,6 @@
 
 void fbcon_set_bitops(struct fbcon_ops *ops)
 {
-	ops->bmove = bit_bmove;
 	ops->clear = bit_clear;
 	ops->putcs = bit_putcs;
 	ops->clear_margins = bit_clear_margins;
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 22bb389..99ecd9a 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -173,8 +173,6 @@
 			int count, int ypos, int xpos);
 static void fbcon_clear_margins(struct vc_data *vc, int bottom_only);
 static void fbcon_cursor(struct vc_data *vc, int mode);
-static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
-			int height, int width);
 static int fbcon_switch(struct vc_data *vc);
 static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch);
 static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table);
@@ -182,16 +180,8 @@
 /*
  *  Internal routines
  */
-static __inline__ void ywrap_up(struct vc_data *vc, int count);
-static __inline__ void ywrap_down(struct vc_data *vc, int count);
-static __inline__ void ypan_up(struct vc_data *vc, int count);
-static __inline__ void ypan_down(struct vc_data *vc, int count);
-static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
-			    int dy, int dx, int height, int width, u_int y_break);
 static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var,
 			   int unit);
-static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
-			      int line, int count, int dy);
 static void fbcon_modechanged(struct fb_info *info);
 static void fbcon_set_all_vcs(struct fb_info *info);
 static void fbcon_start(void);
@@ -1136,14 +1126,6 @@
 	ops->graphics = 0;
 
 	/*
-	 * No more hw acceleration for fbcon.
-	 *
-	 * FIXME: Garbage collect all the now dead code after sufficient time
-	 * has passed.
-	 */
-	p->scrollmode = SCROLL_REDRAW;
-
-	/*
 	 *  ++guenther: console.c:vc_allocate() relies on initializing
 	 *  vc_{cols,rows}, but we must not set those if we are only
 	 *  resizing the console.
@@ -1229,14 +1211,13 @@
  *  This system is now divided into two levels because of complications
  *  caused by hardware scrolling. Top level functions:
  *
- *	fbcon_bmove(), fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
+ *	fbcon_clear(), fbcon_putc(), fbcon_clear_margins()
  *
  *  handles y values in range [0, scr_height-1] that correspond to real
  *  screen positions. y_wrap shift means that first line of bitmap may be
  *  anywhere on this display. These functions convert lineoffsets to
  *  bitmap offsets and deal with the wrap-around case by splitting blits.
  *
- *	fbcon_bmove_physical_8()    -- These functions fast implementations
  *	fbcon_clear_physical_8()    -- of original fbcon_XXX fns.
  *	fbcon_putc_physical_8()	    -- (font width != 8) may be added later
  *
@@ -1409,224 +1390,6 @@
 	}
 }
 
-static __inline__ void ywrap_up(struct vc_data *vc, int count)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_ops *ops = info->fbcon_par;
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-	
-	p->yscroll += count;
-	if (p->yscroll >= p->vrows)	/* Deal with wrap */
-		p->yscroll -= p->vrows;
-	ops->var.xoffset = 0;
-	ops->var.yoffset = p->yscroll * vc->vc_font.height;
-	ops->var.vmode |= FB_VMODE_YWRAP;
-	ops->update_start(info);
-	scrollback_max += count;
-	if (scrollback_max > scrollback_phys_max)
-		scrollback_max = scrollback_phys_max;
-	scrollback_current = 0;
-}
-
-static __inline__ void ywrap_down(struct vc_data *vc, int count)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_ops *ops = info->fbcon_par;
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-	
-	p->yscroll -= count;
-	if (p->yscroll < 0)	/* Deal with wrap */
-		p->yscroll += p->vrows;
-	ops->var.xoffset = 0;
-	ops->var.yoffset = p->yscroll * vc->vc_font.height;
-	ops->var.vmode |= FB_VMODE_YWRAP;
-	ops->update_start(info);
-	scrollback_max -= count;
-	if (scrollback_max < 0)
-		scrollback_max = 0;
-	scrollback_current = 0;
-}
-
-static __inline__ void ypan_up(struct vc_data *vc, int count)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-	struct fbcon_ops *ops = info->fbcon_par;
-
-	p->yscroll += count;
-	if (p->yscroll > p->vrows - vc->vc_rows) {
-		ops->bmove(vc, info, p->vrows - vc->vc_rows,
-			    0, 0, 0, vc->vc_rows, vc->vc_cols);
-		p->yscroll -= p->vrows - vc->vc_rows;
-	}
-
-	ops->var.xoffset = 0;
-	ops->var.yoffset = p->yscroll * vc->vc_font.height;
-	ops->var.vmode &= ~FB_VMODE_YWRAP;
-	ops->update_start(info);
-	fbcon_clear_margins(vc, 1);
-	scrollback_max += count;
-	if (scrollback_max > scrollback_phys_max)
-		scrollback_max = scrollback_phys_max;
-	scrollback_current = 0;
-}
-
-static __inline__ void ypan_up_redraw(struct vc_data *vc, int t, int count)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_ops *ops = info->fbcon_par;
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-
-	p->yscroll += count;
-
-	if (p->yscroll > p->vrows - vc->vc_rows) {
-		p->yscroll -= p->vrows - vc->vc_rows;
-		fbcon_redraw_move(vc, p, t + count, vc->vc_rows - count, t);
-	}
-
-	ops->var.xoffset = 0;
-	ops->var.yoffset = p->yscroll * vc->vc_font.height;
-	ops->var.vmode &= ~FB_VMODE_YWRAP;
-	ops->update_start(info);
-	fbcon_clear_margins(vc, 1);
-	scrollback_max += count;
-	if (scrollback_max > scrollback_phys_max)
-		scrollback_max = scrollback_phys_max;
-	scrollback_current = 0;
-}
-
-static __inline__ void ypan_down(struct vc_data *vc, int count)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-	struct fbcon_ops *ops = info->fbcon_par;
-	
-	p->yscroll -= count;
-	if (p->yscroll < 0) {
-		ops->bmove(vc, info, 0, 0, p->vrows - vc->vc_rows,
-			    0, vc->vc_rows, vc->vc_cols);
-		p->yscroll += p->vrows - vc->vc_rows;
-	}
-
-	ops->var.xoffset = 0;
-	ops->var.yoffset = p->yscroll * vc->vc_font.height;
-	ops->var.vmode &= ~FB_VMODE_YWRAP;
-	ops->update_start(info);
-	fbcon_clear_margins(vc, 1);
-	scrollback_max -= count;
-	if (scrollback_max < 0)
-		scrollback_max = 0;
-	scrollback_current = 0;
-}
-
-static __inline__ void ypan_down_redraw(struct vc_data *vc, int t, int count)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_ops *ops = info->fbcon_par;
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-
-	p->yscroll -= count;
-
-	if (p->yscroll < 0) {
-		p->yscroll += p->vrows - vc->vc_rows;
-		fbcon_redraw_move(vc, p, t, vc->vc_rows - count, t + count);
-	}
-
-	ops->var.xoffset = 0;
-	ops->var.yoffset = p->yscroll * vc->vc_font.height;
-	ops->var.vmode &= ~FB_VMODE_YWRAP;
-	ops->update_start(info);
-	fbcon_clear_margins(vc, 1);
-	scrollback_max -= count;
-	if (scrollback_max < 0)
-		scrollback_max = 0;
-	scrollback_current = 0;
-}
-
-static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
-			      int line, int count, int dy)
-{
-	unsigned short *s = (unsigned short *)
-		(vc->vc_origin + vc->vc_size_row * line);
-
-	while (count--) {
-		unsigned short *start = s;
-		unsigned short *le = advance_row(s, 1);
-		unsigned short c;
-		int x = 0;
-		unsigned short attr = 1;
-
-		do {
-			c = scr_readw(s);
-			if (attr != (c & 0xff00)) {
-				attr = c & 0xff00;
-				if (s > start) {
-					fbcon_putcs(vc, start, s - start,
-						    dy, x);
-					x += s - start;
-					start = s;
-				}
-			}
-			console_conditional_schedule();
-			s++;
-		} while (s < le);
-		if (s > start)
-			fbcon_putcs(vc, start, s - start, dy, x);
-		console_conditional_schedule();
-		dy++;
-	}
-}
-
-static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info,
-			struct fbcon_display *p, int line, int count, int ycount)
-{
-	int offset = ycount * vc->vc_cols;
-	unsigned short *d = (unsigned short *)
-	    (vc->vc_origin + vc->vc_size_row * line);
-	unsigned short *s = d + offset;
-	struct fbcon_ops *ops = info->fbcon_par;
-
-	while (count--) {
-		unsigned short *start = s;
-		unsigned short *le = advance_row(s, 1);
-		unsigned short c;
-		int x = 0;
-
-		do {
-			c = scr_readw(s);
-
-			if (c == scr_readw(d)) {
-				if (s > start) {
-					ops->bmove(vc, info, line + ycount, x,
-						   line, x, 1, s-start);
-					x += s - start + 1;
-					start = s + 1;
-				} else {
-					x++;
-					start++;
-				}
-			}
-
-			scr_writew(c, d);
-			console_conditional_schedule();
-			s++;
-			d++;
-		} while (s < le);
-		if (s > start)
-			ops->bmove(vc, info, line + ycount, x, line, x, 1,
-				   s-start);
-		console_conditional_schedule();
-		if (ycount > 0)
-			line++;
-		else {
-			line--;
-			/* NOTE: We subtract two lines from these pointers */
-			s -= vc->vc_size_row;
-			d -= vc->vc_size_row;
-		}
-	}
-}
-
 static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p,
 			 int line, int count, int offset)
 {
@@ -1687,7 +1450,6 @@
 {
 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
 	struct fbcon_display *p = &fb_display[vc->vc_num];
-	int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK;
 
 	if (fbcon_is_inactive(vc, info))
 		return true;
@@ -1704,249 +1466,32 @@
 	case SM_UP:
 		if (count > vc->vc_rows)	/* Maximum realistic size */
 			count = vc->vc_rows;
-		if (logo_shown >= 0)
-			goto redraw_up;
-		switch (p->scrollmode) {
-		case SCROLL_MOVE:
-			fbcon_redraw_blit(vc, info, p, t, b - t - count,
-				     count);
-			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
-			scr_memsetw((unsigned short *) (vc->vc_origin +
-							vc->vc_size_row *
-							(b - count)),
-				    vc->vc_video_erase_char,
-				    vc->vc_size_row * count);
-			return true;
-
-		case SCROLL_WRAP_MOVE:
-			if (b - t - count > 3 * vc->vc_rows >> 2) {
-				if (t > 0)
-					fbcon_bmove(vc, 0, 0, count, 0, t,
-						    vc->vc_cols);
-				ywrap_up(vc, count);
-				if (vc->vc_rows - b > 0)
-					fbcon_bmove(vc, b - count, 0, b, 0,
-						    vc->vc_rows - b,
-						    vc->vc_cols);
-			} else if (info->flags & FBINFO_READS_FAST)
-				fbcon_bmove(vc, t + count, 0, t, 0,
-					    b - t - count, vc->vc_cols);
-			else
-				goto redraw_up;
-			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
-			break;
-
-		case SCROLL_PAN_REDRAW:
-			if ((p->yscroll + count <=
-			     2 * (p->vrows - vc->vc_rows))
-			    && ((!scroll_partial && (b - t == vc->vc_rows))
-				|| (scroll_partial
-				    && (b - t - count >
-					3 * vc->vc_rows >> 2)))) {
-				if (t > 0)
-					fbcon_redraw_move(vc, p, 0, t, count);
-				ypan_up_redraw(vc, t, count);
-				if (vc->vc_rows - b > 0)
-					fbcon_redraw_move(vc, p, b,
-							  vc->vc_rows - b, b);
-			} else
-				fbcon_redraw_move(vc, p, t + count, b - t - count, t);
-			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
-			break;
-
-		case SCROLL_PAN_MOVE:
-			if ((p->yscroll + count <=
-			     2 * (p->vrows - vc->vc_rows))
-			    && ((!scroll_partial && (b - t == vc->vc_rows))
-				|| (scroll_partial
-				    && (b - t - count >
-					3 * vc->vc_rows >> 2)))) {
-				if (t > 0)
-					fbcon_bmove(vc, 0, 0, count, 0, t,
-						    vc->vc_cols);
-				ypan_up(vc, count);
-				if (vc->vc_rows - b > 0)
-					fbcon_bmove(vc, b - count, 0, b, 0,
-						    vc->vc_rows - b,
-						    vc->vc_cols);
-			} else if (info->flags & FBINFO_READS_FAST)
-				fbcon_bmove(vc, t + count, 0, t, 0,
-					    b - t - count, vc->vc_cols);
-			else
-				goto redraw_up;
-			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
-			break;
-
-		case SCROLL_REDRAW:
-		      redraw_up:
-			fbcon_redraw(vc, p, t, b - t - count,
-				     count * vc->vc_cols);
-			fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
-			scr_memsetw((unsigned short *) (vc->vc_origin +
-							vc->vc_size_row *
-							(b - count)),
-				    vc->vc_video_erase_char,
-				    vc->vc_size_row * count);
-			return true;
-		}
-		break;
+		fbcon_redraw(vc, p, t, b - t - count,
+			     count * vc->vc_cols);
+		fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
+		scr_memsetw((unsigned short *) (vc->vc_origin +
+						vc->vc_size_row *
+						(b - count)),
+			    vc->vc_video_erase_char,
+			    vc->vc_size_row * count);
+		return true;
 
 	case SM_DOWN:
 		if (count > vc->vc_rows)	/* Maximum realistic size */
 			count = vc->vc_rows;
-		if (logo_shown >= 0)
-			goto redraw_down;
-		switch (p->scrollmode) {
-		case SCROLL_MOVE:
-			fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
-				     -count);
-			fbcon_clear(vc, t, 0, count, vc->vc_cols);
-			scr_memsetw((unsigned short *) (vc->vc_origin +
-							vc->vc_size_row *
-							t),
-				    vc->vc_video_erase_char,
-				    vc->vc_size_row * count);
-			return true;
-
-		case SCROLL_WRAP_MOVE:
-			if (b - t - count > 3 * vc->vc_rows >> 2) {
-				if (vc->vc_rows - b > 0)
-					fbcon_bmove(vc, b, 0, b - count, 0,
-						    vc->vc_rows - b,
-						    vc->vc_cols);
-				ywrap_down(vc, count);
-				if (t > 0)
-					fbcon_bmove(vc, count, 0, 0, 0, t,
-						    vc->vc_cols);
-			} else if (info->flags & FBINFO_READS_FAST)
-				fbcon_bmove(vc, t, 0, t + count, 0,
-					    b - t - count, vc->vc_cols);
-			else
-				goto redraw_down;
-			fbcon_clear(vc, t, 0, count, vc->vc_cols);
-			break;
-
-		case SCROLL_PAN_MOVE:
-			if ((count - p->yscroll <= p->vrows - vc->vc_rows)
-			    && ((!scroll_partial && (b - t == vc->vc_rows))
-				|| (scroll_partial
-				    && (b - t - count >
-					3 * vc->vc_rows >> 2)))) {
-				if (vc->vc_rows - b > 0)
-					fbcon_bmove(vc, b, 0, b - count, 0,
-						    vc->vc_rows - b,
-						    vc->vc_cols);
-				ypan_down(vc, count);
-				if (t > 0)
-					fbcon_bmove(vc, count, 0, 0, 0, t,
-						    vc->vc_cols);
-			} else if (info->flags & FBINFO_READS_FAST)
-				fbcon_bmove(vc, t, 0, t + count, 0,
-					    b - t - count, vc->vc_cols);
-			else
-				goto redraw_down;
-			fbcon_clear(vc, t, 0, count, vc->vc_cols);
-			break;
-
-		case SCROLL_PAN_REDRAW:
-			if ((count - p->yscroll <= p->vrows - vc->vc_rows)
-			    && ((!scroll_partial && (b - t == vc->vc_rows))
-				|| (scroll_partial
-				    && (b - t - count >
-					3 * vc->vc_rows >> 2)))) {
-				if (vc->vc_rows - b > 0)
-					fbcon_redraw_move(vc, p, b, vc->vc_rows - b,
-							  b - count);
-				ypan_down_redraw(vc, t, count);
-				if (t > 0)
-					fbcon_redraw_move(vc, p, count, t, 0);
-			} else
-				fbcon_redraw_move(vc, p, t, b - t - count, t + count);
-			fbcon_clear(vc, t, 0, count, vc->vc_cols);
-			break;
-
-		case SCROLL_REDRAW:
-		      redraw_down:
-			fbcon_redraw(vc, p, b - 1, b - t - count,
-				     -count * vc->vc_cols);
-			fbcon_clear(vc, t, 0, count, vc->vc_cols);
-			scr_memsetw((unsigned short *) (vc->vc_origin +
-							vc->vc_size_row *
-							t),
-				    vc->vc_video_erase_char,
-				    vc->vc_size_row * count);
-			return true;
-		}
+		fbcon_redraw(vc, p, b - 1, b - t - count,
+			     -count * vc->vc_cols);
+		fbcon_clear(vc, t, 0, count, vc->vc_cols);
+		scr_memsetw((unsigned short *) (vc->vc_origin +
+						vc->vc_size_row *
+						t),
+			    vc->vc_video_erase_char,
+			    vc->vc_size_row * count);
+		return true;
 	}
 	return false;
 }
 
-
-static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx,
-			int height, int width)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_display *p = &fb_display[vc->vc_num];
-	
-	if (fbcon_is_inactive(vc, info))
-		return;
-
-	if (!width || !height)
-		return;
-
-	/*  Split blits that cross physical y_wrap case.
-	 *  Pathological case involves 4 blits, better to use recursive
-	 *  code rather than unrolled case
-	 *
-	 *  Recursive invocations don't need to erase the cursor over and
-	 *  over again, so we use fbcon_bmove_rec()
-	 */
-	fbcon_bmove_rec(vc, p, sy, sx, dy, dx, height, width,
-			p->vrows - p->yscroll);
-}
-
-static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx,
-			    int dy, int dx, int height, int width, u_int y_break)
-{
-	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
-	struct fbcon_ops *ops = info->fbcon_par;
-	u_int b;
-
-	if (sy < y_break && sy + height > y_break) {
-		b = y_break - sy;
-		if (dy < sy) {	/* Avoid trashing self */
-			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
-					y_break);
-			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
-					height - b, width, y_break);
-		} else {
-			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
-					height - b, width, y_break);
-			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
-					y_break);
-		}
-		return;
-	}
-
-	if (dy < y_break && dy + height > y_break) {
-		b = y_break - dy;
-		if (dy < sy) {	/* Avoid trashing self */
-			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
-					y_break);
-			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
-					height - b, width, y_break);
-		} else {
-			fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx,
-					height - b, width, y_break);
-			fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width,
-					y_break);
-		}
-		return;
-	}
-	ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx,
-		   height, width);
-}
-
 static void updatescrollmode(struct fbcon_display *p,
 					struct fb_info *info,
 					struct vc_data *vc)
@@ -2119,21 +1664,7 @@
 
 	updatescrollmode(p, info, vc);
 
-	switch (p->scrollmode) {
-	case SCROLL_WRAP_MOVE:
-		scrollback_phys_max = p->vrows - vc->vc_rows;
-		break;
-	case SCROLL_PAN_MOVE:
-	case SCROLL_PAN_REDRAW:
-		scrollback_phys_max = p->vrows - 2 * vc->vc_rows;
-		if (scrollback_phys_max < 0)
-			scrollback_phys_max = 0;
-		break;
-	default:
-		scrollback_phys_max = 0;
-		break;
-	}
-
+	scrollback_phys_max = 0;
 	scrollback_max = 0;
 	scrollback_current = 0;
 
diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
index 9315b36..a00603b 100644
--- a/drivers/video/fbdev/core/fbcon.h
+++ b/drivers/video/fbdev/core/fbcon.h
@@ -29,7 +29,6 @@
     /* Filled in by the low-level console driver */
     const u_char *fontdata;
     int userfont;                   /* != 0 if fontdata kmalloc()ed */
-    u_short scrollmode;             /* Scroll Method */
     u_short inverse;                /* != 0 text black on white as default */
     short yscroll;                  /* Hardware scrolling */
     int vrows;                      /* number of virtual rows */
@@ -52,8 +51,6 @@
 };
 
 struct fbcon_ops {
-	void (*bmove)(struct vc_data *vc, struct fb_info *info, int sy,
-		      int sx, int dy, int dx, int height, int width);
 	void (*clear)(struct vc_data *vc, struct fb_info *info, int sy,
 		      int sx, int height, int width);
 	void (*putcs)(struct vc_data *vc, struct fb_info *info,
@@ -152,62 +149,6 @@
 #define attr_bgcol_ec(bgshift, vc, info) attr_col_ec(bgshift, vc, info, 0)
 #define attr_fgcol_ec(fgshift, vc, info) attr_col_ec(fgshift, vc, info, 1)
 
-    /*
-     *  Scroll Method
-     */
-     
-/* There are several methods fbcon can use to move text around the screen:
- *
- *                     Operation   Pan    Wrap
- *---------------------------------------------
- * SCROLL_MOVE         copyarea    No     No
- * SCROLL_PAN_MOVE     copyarea    Yes    No
- * SCROLL_WRAP_MOVE    copyarea    No     Yes
- * SCROLL_REDRAW       imageblit   No     No
- * SCROLL_PAN_REDRAW   imageblit   Yes    No
- * SCROLL_WRAP_REDRAW  imageblit   No     Yes
- *
- * (SCROLL_WRAP_REDRAW is not implemented yet)
- *
- * In general, fbcon will choose the best scrolling
- * method based on the rule below:
- *
- * Pan/Wrap > accel imageblit > accel copyarea >
- * soft imageblit > (soft copyarea)
- *
- * Exception to the rule: Pan + accel copyarea is
- * preferred over Pan + accel imageblit.
- *
- * The above is typical for PCI/AGP cards. Unless
- * overridden, fbcon will never use soft copyarea.
- *
- * If you need to override the above rule, set the
- * appropriate flags in fb_info->flags.  For example,
- * to prefer copyarea over imageblit, set
- * FBINFO_READS_FAST.
- *
- * Other notes:
- * + use the hardware engine to move the text
- *    (hw-accelerated copyarea() and fillrect())
- * + use hardware-supported panning on a large virtual screen
- * + amifb can not only pan, but also wrap the display by N lines
- *    (i.e. visible line i = physical line (i+N) % yres).
- * + read what's already rendered on the screen and
- *     write it in a different place (this is cfb_copyarea())
- * + re-render the text to the screen
- *
- * Whether to use wrapping or panning can only be figured out at
- * runtime (when we know whether our font height is a multiple
- * of the pan/wrap step)
- *
- */
-
-#define SCROLL_MOVE	   0x001
-#define SCROLL_PAN_MOVE	   0x002
-#define SCROLL_WRAP_MOVE   0x003
-#define SCROLL_REDRAW	   0x004
-#define SCROLL_PAN_REDRAW  0x005
-
 #ifdef CONFIG_FB_TILEBLITTING
 extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
 #endif
diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c
index 9cd2c4b..ffa7893 100644
--- a/drivers/video/fbdev/core/fbcon_ccw.c
+++ b/drivers/video/fbdev/core/fbcon_ccw.c
@@ -59,31 +59,12 @@
 	}
 }
 
-
-static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
-		     int sx, int dy, int dx, int height, int width)
-{
-	struct fbcon_ops *ops = info->fbcon_par;
-	struct fb_copyarea area;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
-
-	area.sx = sy * vc->vc_font.height;
-	area.sy = vyres - ((sx + width) * vc->vc_font.width);
-	area.dx = dy * vc->vc_font.height;
-	area.dy = vyres - ((dx + width) * vc->vc_font.width);
-	area.width = height * vc->vc_font.height;
-	area.height  = width * vc->vc_font.width;
-
-	info->fbops->fb_copyarea(info, &area);
-}
-
 static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy,
 		     int sx, int height, int width)
 {
-	struct fbcon_ops *ops = info->fbcon_par;
 	struct fb_fillrect region;
 	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
 
 	region.color = attr_bgcol_ec(bgshift,vc,info);
 	region.dx = sy * vc->vc_font.height;
@@ -140,7 +121,7 @@
 	u32 cnt, pitch, size;
 	u32 attribute = get_attribute(info, scr_readw(s));
 	u8 *dst, *buf = NULL;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
 
 	if (!ops->fontbuffer)
 		return;
@@ -229,7 +210,7 @@
 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
 	int err = 1, dx, dy;
 	char *src;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
 
 	if (!ops->fontbuffer)
 		return;
@@ -387,7 +368,7 @@
 {
 	struct fbcon_ops *ops = info->fbcon_par;
 	u32 yoffset;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
 	int err;
 
 	yoffset = (vyres - info->var.yres) - ops->var.xoffset;
@@ -402,7 +383,6 @@
 
 void fbcon_rotate_ccw(struct fbcon_ops *ops)
 {
-	ops->bmove = ccw_bmove;
 	ops->clear = ccw_clear;
 	ops->putcs = ccw_putcs;
 	ops->clear_margins = ccw_clear_margins;
diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c
index 88d89fa..92e5b7f 100644
--- a/drivers/video/fbdev/core/fbcon_cw.c
+++ b/drivers/video/fbdev/core/fbcon_cw.c
@@ -44,31 +44,12 @@
 	}
 }
 
-
-static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy,
-		     int sx, int dy, int dx, int height, int width)
-{
-	struct fbcon_ops *ops = info->fbcon_par;
-	struct fb_copyarea area;
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
-
-	area.sx = vxres - ((sy + height) * vc->vc_font.height);
-	area.sy = sx * vc->vc_font.width;
-	area.dx = vxres - ((dy + height) * vc->vc_font.height);
-	area.dy = dx * vc->vc_font.width;
-	area.width = height * vc->vc_font.height;
-	area.height  = width * vc->vc_font.width;
-
-	info->fbops->fb_copyarea(info, &area);
-}
-
 static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy,
 		     int sx, int height, int width)
 {
-	struct fbcon_ops *ops = info->fbcon_par;
 	struct fb_fillrect region;
 	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vxres = info->var.xres;
 
 	region.color = attr_bgcol_ec(bgshift,vc,info);
 	region.dx = vxres - ((sy + height) * vc->vc_font.height);
@@ -125,7 +106,7 @@
 	u32 cnt, pitch, size;
 	u32 attribute = get_attribute(info, scr_readw(s));
 	u8 *dst, *buf = NULL;
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vxres = info->var.xres;
 
 	if (!ops->fontbuffer)
 		return;
@@ -212,7 +193,7 @@
 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
 	int err = 1, dx, dy;
 	char *src;
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vxres = info->var.xres;
 
 	if (!ops->fontbuffer)
 		return;
@@ -369,7 +350,7 @@
 static int cw_update_start(struct fb_info *info)
 {
 	struct fbcon_ops *ops = info->fbcon_par;
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vxres = info->var.xres;
 	u32 xoffset;
 	int err;
 
@@ -385,7 +366,6 @@
 
 void fbcon_rotate_cw(struct fbcon_ops *ops)
 {
-	ops->bmove = cw_bmove;
 	ops->clear = cw_clear;
 	ops->putcs = cw_putcs;
 	ops->clear_margins = cw_clear_margins;
diff --git a/drivers/video/fbdev/core/fbcon_rotate.h b/drivers/video/fbdev/core/fbcon_rotate.h
index e233444..b528b2e 100644
--- a/drivers/video/fbdev/core/fbcon_rotate.h
+++ b/drivers/video/fbdev/core/fbcon_rotate.h
@@ -11,15 +11,6 @@
 #ifndef _FBCON_ROTATE_H
 #define _FBCON_ROTATE_H
 
-#define GETVYRES(s,i) ({                           \
-        (s == SCROLL_REDRAW || s == SCROLL_MOVE) ? \
-        (i)->var.yres : (i)->var.yres_virtual; })
-
-#define GETVXRES(s,i) ({                           \
-        (s == SCROLL_REDRAW || s == SCROLL_MOVE || !(i)->fix.xpanstep) ? \
-        (i)->var.xres : (i)->var.xres_virtual; })
-
-
 static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat)
 {
 	u32 tmp = (y * pitch) + x, index = tmp / 8,  bit = tmp % 8;
diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c
index 8d5e66b..09619bd 100644
--- a/drivers/video/fbdev/core/fbcon_ud.c
+++ b/drivers/video/fbdev/core/fbcon_ud.c
@@ -44,33 +44,13 @@
 	}
 }
 
-
-static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy,
-		     int sx, int dy, int dx, int height, int width)
-{
-	struct fbcon_ops *ops = info->fbcon_par;
-	struct fb_copyarea area;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
-
-	area.sy = vyres - ((sy + height) * vc->vc_font.height);
-	area.sx = vxres - ((sx + width) * vc->vc_font.width);
-	area.dy = vyres - ((dy + height) * vc->vc_font.height);
-	area.dx = vxres - ((dx + width) * vc->vc_font.width);
-	area.height = height * vc->vc_font.height;
-	area.width  = width * vc->vc_font.width;
-
-	info->fbops->fb_copyarea(info, &area);
-}
-
 static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy,
 		     int sx, int height, int width)
 {
-	struct fbcon_ops *ops = info->fbcon_par;
 	struct fb_fillrect region;
 	int bgshift = (vc->vc_hi_font_mask) ? 13 : 12;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
+	u32 vxres = info->var.xres;
 
 	region.color = attr_bgcol_ec(bgshift,vc,info);
 	region.dy = vyres - ((sy + height) * vc->vc_font.height);
@@ -162,8 +142,8 @@
 	u32 mod = vc->vc_font.width % 8, cnt, pitch, size;
 	u32 attribute = get_attribute(info, scr_readw(s));
 	u8 *dst, *buf = NULL;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
+	u32 vxres = info->var.xres;
 
 	if (!ops->fontbuffer)
 		return;
@@ -259,8 +239,8 @@
 	int attribute, use_sw = vc->vc_cursor_type & CUR_SW;
 	int err = 1, dx, dy;
 	char *src;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
+	u32 vxres = info->var.xres;
 
 	if (!ops->fontbuffer)
 		return;
@@ -410,8 +390,8 @@
 {
 	struct fbcon_ops *ops = info->fbcon_par;
 	int xoffset, yoffset;
-	u32 vyres = GETVYRES(ops->p->scrollmode, info);
-	u32 vxres = GETVXRES(ops->p->scrollmode, info);
+	u32 vyres = info->var.yres;
+	u32 vxres = info->var.xres;
 	int err;
 
 	xoffset = vxres - info->var.xres - ops->var.xoffset;
@@ -429,7 +409,6 @@
 
 void fbcon_rotate_ud(struct fbcon_ops *ops)
 {
-	ops->bmove = ud_bmove;
 	ops->clear = ud_clear;
 	ops->putcs = ud_putcs;
 	ops->clear_margins = ud_clear_margins;
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 7420d2c..826175ad 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1702,8 +1702,11 @@
 {
 	unlink_framebuffer(fb_info);
 	if (fb_info->pixmap.addr &&
-	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
+	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) {
 		kfree(fb_info->pixmap.addr);
+		fb_info->pixmap.addr = NULL;
+	}
+
 	fb_destroy_modelist(&fb_info->modelist);
 	registered_fb[fb_info->node] = NULL;
 	num_registered_fb--;
diff --git a/drivers/video/fbdev/core/tileblit.c b/drivers/video/fbdev/core/tileblit.c
index 2768eff..72af950 100644
--- a/drivers/video/fbdev/core/tileblit.c
+++ b/drivers/video/fbdev/core/tileblit.c
@@ -16,21 +16,6 @@
 #include <asm/types.h>
 #include "fbcon.h"
 
-static void tile_bmove(struct vc_data *vc, struct fb_info *info, int sy,
-		       int sx, int dy, int dx, int height, int width)
-{
-	struct fb_tilearea area;
-
-	area.sx = sx;
-	area.sy = sy;
-	area.dx = dx;
-	area.dy = dy;
-	area.height = height;
-	area.width = width;
-
-	info->tileops->fb_tilecopy(info, &area);
-}
-
 static void tile_clear(struct vc_data *vc, struct fb_info *info, int sy,
 		       int sx, int height, int width)
 {
@@ -133,7 +118,6 @@
 	struct fb_tilemap map;
 	struct fbcon_ops *ops = info->fbcon_par;
 
-	ops->bmove = tile_bmove;
 	ops->clear = tile_clear;
 	ops->putcs = tile_putcs;
 	ops->clear_margins = tile_clear_margins;
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index edca370..ea42ba6 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -351,6 +351,17 @@
 	char *option = NULL;
 	efi_memory_desc_t md;
 
+	/*
+	 * Generic drivers must not be registered if a framebuffer exists.
+	 * If a native driver was probed, the display hardware was already
+	 * taken and attempting to use the system framebuffer is dangerous.
+	 */
+	if (num_registered_fb > 0) {
+		dev_err(&dev->dev,
+			"efifb: a framebuffer is already registered\n");
+		return -EINVAL;
+	}
+
 	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
 		return -ENODEV;
 
diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index 62f0ded..b63074f 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -407,6 +407,17 @@
 	struct simplefb_par *par;
 	struct resource *mem;
 
+	/*
+	 * Generic drivers must not be registered if a framebuffer exists.
+	 * If a native driver was probed, the display hardware was already
+	 * taken and attempting to use the system framebuffer is dangerous.
+	 */
+	if (num_registered_fb > 0) {
+		dev_err(&pdev->dev,
+			"simplefb: a framebuffer is already registered\n");
+		return -EINVAL;
+	}
+
 	if (fb_get_options("simplefb", NULL))
 		return -ENODEV;
 
diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c
index bcacfb6..0fe922f 100644
--- a/drivers/video/fbdev/skeletonfb.c
+++ b/drivers/video/fbdev/skeletonfb.c
@@ -505,15 +505,15 @@
 }
 
 /**
- *      xxxfb_copyarea - REQUIRED function. Can use generic routines if
- *                       non acclerated hardware and packed pixel based.
+ *      xxxfb_copyarea - OBSOLETE function.
  *                       Copies one area of the screen to another area.
+ *                       Will be deleted in a future version
  *
  *      @info: frame buffer structure that represents a single frame buffer
  *      @area: Structure providing the data to copy the framebuffer contents
  *	       from one region to another.
  *
- *      This drawing operation copies a rectangular area from one area of the
+ *      This drawing operation copied a rectangular area from one area of the
  *	screen to another area.
  */
 void xxxfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) 
@@ -645,9 +645,9 @@
 	.fb_setcolreg	= xxxfb_setcolreg,
 	.fb_blank	= xxxfb_blank,
 	.fb_pan_display	= xxxfb_pan_display,
-	.fb_fillrect	= xxxfb_fillrect, 	/* Needed !!! */
-	.fb_copyarea	= xxxfb_copyarea,	/* Needed !!! */
-	.fb_imageblit	= xxxfb_imageblit,	/* Needed !!! */
+	.fb_fillrect	= xxxfb_fillrect,	/* Needed !!!   */
+	.fb_copyarea	= xxxfb_copyarea,	/* Obsolete     */
+	.fb_imageblit	= xxxfb_imageblit,	/* Needed !!!   */
 	.fb_cursor	= xxxfb_cursor,		/* Optional !!! */
 	.fb_sync	= xxxfb_sync,
 	.fb_ioctl	= xxxfb_ioctl,
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 5ec5144..6826f98 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -695,6 +695,7 @@
 	.remove = xenfb_remove,
 	.resume = xenfb_resume,
 	.otherend_changed = xenfb_backend_changed,
+	.not_essential = true,
 };
 
 static int __init xenfb_init(void)
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 0da0af2..96e5a87 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -2889,6 +2889,7 @@
 #if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA)
 	VIRTIO_MEM_F_ACPI_PXM,
 #endif
+	VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE,
 };
 
 static const struct virtio_device_id virtio_mem_id_table[] = {
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 00f64f2..6d2614e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -14,9 +14,6 @@
 #include <linux/spinlock.h>
 #include <xen/xen.h>
 
-static bool force_used_validation = false;
-module_param(force_used_validation, bool, 0444);
-
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
 #define BAD_RING(_vq, fmt, args...)				\
@@ -185,9 +182,6 @@
 		} packed;
 	};
 
-	/* Per-descriptor in buffer length */
-	u32 *buflen;
-
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
 
@@ -496,7 +490,6 @@
 	unsigned int i, n, avail, descs_used, prev, err_idx;
 	int head;
 	bool indirect;
-	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -578,7 +571,6 @@
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
 						     indirect);
-			buflen += sg->length;
 		}
 	}
 	/* Last one doesn't continue. */
@@ -618,10 +610,6 @@
 	else
 		vq->split.desc_state[head].indir_desc = ctx;
 
-	/* Store in buffer length if necessary */
-	if (vq->buflen)
-		vq->buflen[head] = buflen;
-
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -796,11 +784,6 @@
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
-	if (vq->buflen && unlikely(*len > vq->buflen[i])) {
-		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
-			*len, vq->buflen[i]);
-		return NULL;
-	}
 
 	/* detach_buf_split clears data, so grab it now. */
 	ret = vq->split.desc_state[i].data;
@@ -1079,7 +1062,6 @@
 	unsigned int i, n, err_idx;
 	u16 head, id;
 	dma_addr_t addr;
-	u32 buflen = 0;
 
 	head = vq->packed.next_avail_idx;
 	desc = alloc_indirect_packed(total_sg, gfp);
@@ -1109,8 +1091,6 @@
 			desc[i].addr = cpu_to_le64(addr);
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
-			if (n >= out_sgs)
-				buflen += sg->length;
 		}
 	}
 
@@ -1164,10 +1144,6 @@
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;
 
-	/* Store in buffer length if necessary */
-	if (vq->buflen)
-		vq->buflen[id] = buflen;
-
 	vq->num_added += 1;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -1203,7 +1179,6 @@
 	__le16 head_flags, flags;
 	u16 head, id, prev, curr, avail_used_flags;
 	int err;
-	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -1283,8 +1258,6 @@
 					1 << VRING_PACKED_DESC_F_AVAIL |
 					1 << VRING_PACKED_DESC_F_USED;
 			}
-			if (n >= out_sgs)
-				buflen += sg->length;
 		}
 	}
 
@@ -1304,10 +1277,6 @@
 	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;
 
-	/* Store in buffer length if necessary */
-	if (vq->buflen)
-		vq->buflen[id] = buflen;
-
 	/*
 	 * A driver MUST NOT make the first descriptor in the list
 	 * available before all subsequent descriptors comprising
@@ -1494,11 +1463,6 @@
 		BAD_RING(vq, "id %u is not a head!\n", id);
 		return NULL;
 	}
-	if (vq->buflen && unlikely(*len > vq->buflen[id])) {
-		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
-			*len, vq->buflen[id]);
-		return NULL;
-	}
 
 	/* detach_buf_packed clears data, so grab it now. */
 	ret = vq->packed.desc_state[id].data;
@@ -1704,7 +1668,6 @@
 	struct vring_virtqueue *vq;
 	struct vring_packed_desc *ring;
 	struct vring_packed_desc_event *driver, *device;
-	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
 	size_t ring_size_in_bytes, event_size_in_bytes;
 
@@ -1794,15 +1757,6 @@
 	if (!vq->packed.desc_extra)
 		goto err_desc_extra;
 
-	if (!drv->suppress_used_validation || force_used_validation) {
-		vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
-					   GFP_KERNEL);
-		if (!vq->buflen)
-			goto err_buflen;
-	} else {
-		vq->buflen = NULL;
-	}
-
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@@ -1815,8 +1769,6 @@
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
-err_buflen:
-	kfree(vq->packed.desc_extra);
 err_desc_extra:
 	kfree(vq->packed.desc_state);
 err_desc_state:
@@ -2224,7 +2176,6 @@
 					void (*callback)(struct virtqueue *),
 					const char *name)
 {
-	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	struct vring_virtqueue *vq;
 
 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@@ -2284,15 +2235,6 @@
 	if (!vq->split.desc_extra)
 		goto err_extra;
 
-	if (!drv->suppress_used_validation || force_used_validation) {
-		vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
-					   GFP_KERNEL);
-		if (!vq->buflen)
-			goto err_buflen;
-	} else {
-		vq->buflen = NULL;
-	}
-
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	memset(vq->split.desc_state, 0, vring.num *
@@ -2303,8 +2245,6 @@
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
-err_buflen:
-	kfree(vq->split.desc_extra);
 err_extra:
 	kfree(vq->split.desc_state);
 err_state:
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a1b11c62..33e941e 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -259,9 +259,15 @@
 	  if guests need generic access to SCSI devices.
 
 config XEN_PRIVCMD
-	tristate
+	tristate "Xen hypercall passthrough driver"
 	depends on XEN
 	default m
+	help
+	  The hypercall passthrough driver allows privileged user programs to
+	  perform Xen hypercalls. This driver is normally required for systems
+	  running as Dom0 to perform privileged operations, but in some
+	  disaggregated Xen setups this driver might be needed for other
+	  domains, too.
 
 config XEN_ACPI_PROCESSOR
 	tristate "Xen ACPI processor"
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 7984645..3c9ae15 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -1275,6 +1275,7 @@
 	.probe = pvcalls_front_probe,
 	.remove = pvcalls_front_remove,
 	.otherend_changed = pvcalls_front_changed,
+	.not_essential = true,
 };
 
 static int __init pvcalls_frontend_init(void)
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd003ca..fe360c3 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -909,7 +909,7 @@
 
 static int __init xenbus_init(void)
 {
-	int err = 0;
+	int err;
 	uint64_t v = 0;
 	xen_store_domain_type = XS_UNKNOWN;
 
@@ -949,6 +949,29 @@
 		err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
 		if (err)
 			goto out_error;
+		/*
+		 * Uninitialized hvm_params are zero and return no error.
+		 * Although it is theoretically possible to have
+		 * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is
+		 * not zero when valid. If zero, it means that Xenstore hasn't
+		 * been properly initialized. Instead of attempting to map a
+		 * wrong guest physical address return error.
+		 *
+		 * Also recognize all bits set as an invalid value.
+		 */
+		if (!v || !~v) {
+			err = -ENOENT;
+			goto out_error;
+		}
+		/* Avoid truncation on 32-bit. */
+#if BITS_PER_LONG == 32
+		if (v > ULONG_MAX) {
+			pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
+			       __func__, v);
+			err = -EINVAL;
+			goto out_error;
+		}
+#endif
 		xen_store_gfn = (unsigned long)v;
 		xen_store_interface =
 			xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
@@ -983,8 +1006,10 @@
 	 */
 	proc_create_mount_point("xen");
 #endif
+	return 0;
 
 out_error:
+	xen_store_domain_type = XS_UNKNOWN;
 	return err;
 }
 
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 4809446..07b010a 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -211,19 +211,11 @@
 	if (drv && (dev->driver != drv))
 		return 0;
 
-	if (ignore_nonessential) {
-		/* With older QEMU, for PVonHVM guests the guest config files
-		 * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0']
-		 * which is nonsensical as there is no PV FB (there can be
-		 * a PVKB) running as HVM guest. */
-
-		if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0))
-			return 0;
-
-		if ((strncmp(xendev->nodename, "device/vfb", 10) == 0))
-			return 0;
-	}
 	xendrv = to_xenbus_driver(dev->driver);
+
+	if (ignore_nonessential && xendrv->not_essential)
+		return 0;
+
 	return (xendev->state < XenbusStateConnected ||
 		(xendev->state == XenbusStateConnected &&
 		 xendrv->is_ready && !xendrv->is_ready(xendev)));
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index adafdf8..fac918c 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -108,7 +108,9 @@
  */
 static int v9fs_vfs_readpage(struct file *file, struct page *page)
 {
-	return netfs_readpage(file, page, &v9fs_req_ops, NULL);
+	struct folio *folio = page_folio(page);
+
+	return netfs_readpage(file, folio, &v9fs_req_ops, NULL);
 }
 
 /**
@@ -130,13 +132,15 @@
 
 static int v9fs_release_page(struct page *page, gfp_t gfp)
 {
-	if (PagePrivate(page))
+	struct folio *folio = page_folio(page);
+
+	if (folio_test_private(folio))
 		return 0;
 #ifdef CONFIG_9P_FSCACHE
-	if (PageFsCache(page)) {
+	if (folio_test_fscache(folio)) {
 		if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS))
 			return 0;
-		wait_on_page_fscache(page);
+		folio_wait_fscache(folio);
 	}
 #endif
 	return 1;
@@ -152,55 +156,58 @@
 static void v9fs_invalidate_page(struct page *page, unsigned int offset,
 				 unsigned int length)
 {
-	wait_on_page_fscache(page);
+	struct folio *folio = page_folio(page);
+
+	folio_wait_fscache(folio);
 }
 
-static int v9fs_vfs_writepage_locked(struct page *page)
+static int v9fs_vfs_write_folio_locked(struct folio *folio)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = folio_inode(folio);
 	struct v9fs_inode *v9inode = V9FS_I(inode);
-	loff_t start = page_offset(page);
-	loff_t size = i_size_read(inode);
+	loff_t start = folio_pos(folio);
+	loff_t i_size = i_size_read(inode);
 	struct iov_iter from;
-	int err, len;
+	size_t len = folio_size(folio);
+	int err;
 
-	if (page->index == size >> PAGE_SHIFT)
-		len = size & ~PAGE_MASK;
-	else
-		len = PAGE_SIZE;
+	if (start >= i_size)
+		return 0; /* Simultaneous truncation occurred */
 
-	iov_iter_xarray(&from, WRITE, &page->mapping->i_pages, start, len);
+	len = min_t(loff_t, i_size - start, len);
+
+	iov_iter_xarray(&from, WRITE, &folio_mapping(folio)->i_pages, start, len);
 
 	/* We should have writeback_fid always set */
 	BUG_ON(!v9inode->writeback_fid);
 
-	set_page_writeback(page);
+	folio_start_writeback(folio);
 
 	p9_client_write(v9inode->writeback_fid, start, &from, &err);
 
-	end_page_writeback(page);
+	folio_end_writeback(folio);
 	return err;
 }
 
 static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
 {
+	struct folio *folio = page_folio(page);
 	int retval;
 
-	p9_debug(P9_DEBUG_VFS, "page %p\n", page);
+	p9_debug(P9_DEBUG_VFS, "folio %p\n", folio);
 
-	retval = v9fs_vfs_writepage_locked(page);
+	retval = v9fs_vfs_write_folio_locked(folio);
 	if (retval < 0) {
 		if (retval == -EAGAIN) {
-			redirty_page_for_writepage(wbc, page);
+			folio_redirty_for_writepage(wbc, folio);
 			retval = 0;
 		} else {
-			SetPageError(page);
-			mapping_set_error(page->mapping, retval);
+			mapping_set_error(folio_mapping(folio), retval);
 		}
 	} else
 		retval = 0;
 
-	unlock_page(page);
+	folio_unlock(folio);
 	return retval;
 }
 
@@ -213,14 +220,15 @@
 
 static int v9fs_launder_page(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	int retval;
 
-	if (clear_page_dirty_for_io(page)) {
-		retval = v9fs_vfs_writepage_locked(page);
+	if (folio_clear_dirty_for_io(folio)) {
+		retval = v9fs_vfs_write_folio_locked(folio);
 		if (retval)
 			return retval;
 	}
-	wait_on_page_fscache(page);
+	folio_wait_fscache(folio);
 	return 0;
 }
 
@@ -265,10 +273,10 @@
 
 static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
 			    loff_t pos, unsigned int len, unsigned int flags,
-			    struct page **pagep, void **fsdata)
+			    struct page **subpagep, void **fsdata)
 {
 	int retval;
-	struct page *page;
+	struct folio *folio;
 	struct v9fs_inode *v9inode = V9FS_I(mapping->host);
 
 	p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
@@ -279,31 +287,32 @@
 	 * file.  We need to do this before we get a lock on the page in case
 	 * there's more than one writer competing for the same cache block.
 	 */
-	retval = netfs_write_begin(filp, mapping, pos, len, flags, &page, fsdata,
+	retval = netfs_write_begin(filp, mapping, pos, len, flags, &folio, fsdata,
 				   &v9fs_req_ops, NULL);
 	if (retval < 0)
 		return retval;
 
-	*pagep = find_subpage(page, pos / PAGE_SIZE);
+	*subpagep = &folio->page;
 	return retval;
 }
 
 static int v9fs_write_end(struct file *filp, struct address_space *mapping,
 			  loff_t pos, unsigned int len, unsigned int copied,
-			  struct page *page, void *fsdata)
+			  struct page *subpage, void *fsdata)
 {
 	loff_t last_pos = pos + copied;
-	struct inode *inode = page->mapping->host;
+	struct folio *folio = page_folio(subpage);
+	struct inode *inode = mapping->host;
 
 	p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
 
-	if (!PageUptodate(page)) {
+	if (!folio_test_uptodate(folio)) {
 		if (unlikely(copied < len)) {
 			copied = 0;
 			goto out;
 		}
 
-		SetPageUptodate(page);
+		folio_mark_uptodate(folio);
 	}
 
 	/*
@@ -314,10 +323,10 @@
 		inode_add_bytes(inode, last_pos - inode->i_size);
 		i_size_write(inode, last_pos);
 	}
-	set_page_dirty(page);
+	folio_mark_dirty(folio);
 out:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 
 	return copied;
 }
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 4244d48..612e297 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -528,13 +528,13 @@
 v9fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
 	struct v9fs_inode *v9inode;
-	struct page *page = vmf->page;
+	struct folio *folio = page_folio(vmf->page);
 	struct file *filp = vmf->vma->vm_file;
 	struct inode *inode = file_inode(filp);
 
 
-	p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
-		 page, (unsigned long)filp->private_data);
+	p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
+		 folio, (unsigned long)filp->private_data);
 
 	v9inode = V9FS_I(inode);
 
@@ -542,24 +542,24 @@
 	 * be modified.  We then assume the entire page will need writing back.
 	 */
 #ifdef CONFIG_9P_FSCACHE
-	if (PageFsCache(page) &&
-	    wait_on_page_fscache_killable(page) < 0)
-		return VM_FAULT_RETRY;
+	if (folio_test_fscache(folio) &&
+	    folio_wait_fscache_killable(folio) < 0)
+		return VM_FAULT_NOPAGE;
 #endif
 
 	/* Update file times before taking page lock */
 	file_update_time(filp);
 
 	BUG_ON(!v9inode->writeback_fid);
-	if (lock_page_killable(page) < 0)
+	if (folio_lock_killable(folio) < 0)
 		return VM_FAULT_RETRY;
-	if (page->mapping != inode->i_mapping)
+	if (folio_mapping(folio) != inode->i_mapping)
 		goto out_unlock;
-	wait_for_stable_page(page);
+	folio_wait_stable(folio);
 
 	return VM_FAULT_LOCKED;
 out_unlock:
-	unlock_page(page);
+	folio_unlock(folio);
 	return VM_FAULT_NOPAGE;
 }
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 4579bbd..da9b4f8 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -103,13 +103,13 @@
 };
 
 /*
- * Drop the refs that we're holding on the pages we were reading into.  We've
+ * Drop the refs that we're holding on the folios we were reading into.  We've
  * got refs on the first nr_pages pages.
  */
 static void afs_dir_read_cleanup(struct afs_read *req)
 {
 	struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
-	struct page *page;
+	struct folio *folio;
 	pgoff_t last = req->nr_pages - 1;
 
 	XA_STATE(xas, &mapping->i_pages, 0);
@@ -118,65 +118,56 @@
 		return;
 
 	rcu_read_lock();
-	xas_for_each(&xas, page, last) {
-		if (xas_retry(&xas, page))
+	xas_for_each(&xas, folio, last) {
+		if (xas_retry(&xas, folio))
 			continue;
-		BUG_ON(xa_is_value(page));
-		BUG_ON(PageCompound(page));
-		ASSERTCMP(page->mapping, ==, mapping);
+		BUG_ON(xa_is_value(folio));
+		ASSERTCMP(folio_file_mapping(folio), ==, mapping);
 
-		put_page(page);
+		folio_put(folio);
 	}
 
 	rcu_read_unlock();
 }
 
 /*
- * check that a directory page is valid
+ * check that a directory folio is valid
  */
-static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
-			       loff_t i_size)
+static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
+				loff_t i_size)
 {
-	struct afs_xdr_dir_page *dbuf;
-	loff_t latter, off;
-	int tmp, qty;
+	union afs_xdr_dir_block *block;
+	size_t offset, size;
+	loff_t pos;
 
-	/* Determine how many magic numbers there should be in this page, but
+	/* Determine how many magic numbers there should be in this folio, but
 	 * we must take care because the directory may change size under us.
 	 */
-	off = page_offset(page);
-	if (i_size <= off)
+	pos = folio_pos(folio);
+	if (i_size <= pos)
 		goto checked;
 
-	latter = i_size - off;
-	if (latter >= PAGE_SIZE)
-		qty = PAGE_SIZE;
-	else
-		qty = latter;
-	qty /= sizeof(union afs_xdr_dir_block);
-
-	/* check them */
-	dbuf = kmap_atomic(page);
-	for (tmp = 0; tmp < qty; tmp++) {
-		if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
-			printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
-			       __func__, dvnode->vfs_inode.i_ino, tmp, qty,
-			       ntohs(dbuf->blocks[tmp].hdr.magic));
-			trace_afs_dir_check_failed(dvnode, off, i_size);
-			kunmap(page);
+	size = min_t(loff_t, folio_size(folio), i_size - pos);
+	for (offset = 0; offset < size; offset += sizeof(*block)) {
+		block = kmap_local_folio(folio, offset);
+		if (block->hdr.magic != AFS_DIR_MAGIC) {
+			printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
+			       __func__, dvnode->vfs_inode.i_ino,
+			       pos, offset, size, ntohs(block->hdr.magic));
+			trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
+			kunmap_local(block);
 			trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
 			goto error;
 		}
 
 		/* Make sure each block is NUL terminated so we can reasonably
-		 * use string functions on it.  The filenames in the page
+		 * use string functions on it.  The filenames in the folio
 		 * *should* be NUL-terminated anyway.
 		 */
-		((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
+		((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
+
+		kunmap_local(block);
 	}
-
-	kunmap_atomic(dbuf);
-
 checked:
 	afs_stat_v(dvnode, n_read_dir);
 	return true;
@@ -190,11 +181,11 @@
  */
 static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
 {
-	struct afs_xdr_dir_page *dbuf;
+	union afs_xdr_dir_block *block;
 	struct address_space *mapping = dvnode->vfs_inode.i_mapping;
-	struct page *page;
-	unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+	struct folio *folio;
 	pgoff_t last = req->nr_pages - 1;
+	size_t offset, size;
 
 	XA_STATE(xas, &mapping->i_pages, 0);
 
@@ -205,30 +196,28 @@
 		req->pos, req->nr_pages,
 		req->iter->iov_offset,  iov_iter_count(req->iter));
 
-	xas_for_each(&xas, page, last) {
-		if (xas_retry(&xas, page))
+	xas_for_each(&xas, folio, last) {
+		if (xas_retry(&xas, folio))
 			continue;
 
-		BUG_ON(PageCompound(page));
-		BUG_ON(page->mapping != mapping);
+		BUG_ON(folio_file_mapping(folio) != mapping);
 
-		dbuf = kmap_atomic(page);
-		for (i = 0; i < qty; i++) {
-			union afs_xdr_dir_block *block = &dbuf->blocks[i];
-
-			pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
+		size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
+		for (offset = 0; offset < size; offset += sizeof(*block)) {
+			block = kmap_local_folio(folio, offset);
+			pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block);
+			kunmap_local(block);
 		}
-		kunmap_atomic(dbuf);
 	}
 }
 
 /*
- * Check all the pages in a directory.  All the pages are held pinned.
+ * Check all the blocks in a directory.  All the folios are held pinned.
  */
 static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
 {
 	struct address_space *mapping = dvnode->vfs_inode.i_mapping;
-	struct page *page;
+	struct folio *folio;
 	pgoff_t last = req->nr_pages - 1;
 	int ret = 0;
 
@@ -238,14 +227,13 @@
 		return 0;
 
 	rcu_read_lock();
-	xas_for_each(&xas, page, last) {
-		if (xas_retry(&xas, page))
+	xas_for_each(&xas, folio, last) {
+		if (xas_retry(&xas, folio))
 			continue;
 
-		BUG_ON(PageCompound(page));
-		BUG_ON(page->mapping != mapping);
+		BUG_ON(folio_file_mapping(folio) != mapping);
 
-		if (!afs_dir_check_page(dvnode, page, req->file_size)) {
+		if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
 			afs_dir_dump(dvnode, req);
 			ret = -EIO;
 			break;
@@ -274,15 +262,16 @@
 
 /*
  * Read the directory into the pagecache in one go, scrubbing the previous
- * contents.  The list of pages is returned, pinning them so that they don't
+ * contents.  The list of folios is returned, pinning them so that they don't
  * get reclaimed during the iteration.
  */
 static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 	__acquires(&dvnode->validate_lock)
 {
+	struct address_space *mapping = dvnode->vfs_inode.i_mapping;
 	struct afs_read *req;
 	loff_t i_size;
-	int nr_pages, i, n;
+	int nr_pages, i;
 	int ret;
 
 	_enter("");
@@ -320,43 +309,30 @@
 	req->iter = &req->def_iter;
 
 	/* Fill in any gaps that we might find where the memory reclaimer has
-	 * been at work and pin all the pages.  If there are any gaps, we will
+	 * been at work and pin all the folios.  If there are any gaps, we will
 	 * need to reread the entire directory contents.
 	 */
 	i = req->nr_pages;
 	while (i < nr_pages) {
-		struct page *pages[8], *page;
+		struct folio *folio;
 
-		n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
-					  min_t(unsigned int, nr_pages - i,
-						ARRAY_SIZE(pages)),
-					  pages);
-		_debug("find %u at %u/%u", n, i, nr_pages);
-
-		if (n == 0) {
-			gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
-
+		folio = filemap_get_folio(mapping, i);
+		if (!folio) {
 			if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
 				afs_stat_v(dvnode, n_inval);
 
 			ret = -ENOMEM;
-			page = __page_cache_alloc(gfp);
-			if (!page)
+			folio = __filemap_get_folio(mapping,
+						    i, FGP_LOCK | FGP_CREAT,
+						    mapping->gfp_mask);
+			if (!folio)
 				goto error;
-			ret = add_to_page_cache_lru(page,
-						    dvnode->vfs_inode.i_mapping,
-						    i, gfp);
-			if (ret < 0)
-				goto error;
-
-			attach_page_private(page, (void *)1);
-			unlock_page(page);
-			req->nr_pages++;
-			i++;
-		} else {
-			req->nr_pages += n;
-			i += n;
+			folio_attach_private(folio, (void *)1);
+			folio_unlock(folio);
 		}
+
+		req->nr_pages += folio_nr_pages(folio);
+		i += folio_nr_pages(folio);
 	}
 
 	/* If we're going to reload, we need to lock all the pages to prevent
@@ -424,7 +400,7 @@
 	size_t nlen;
 	int tmp;
 
-	_enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block);
+	_enter("%llx,%x", ctx->pos, blkoff);
 
 	curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
 
@@ -513,12 +489,10 @@
 			   struct key *key, afs_dataversion_t *_dir_version)
 {
 	struct afs_vnode *dvnode = AFS_FS_I(dir);
-	struct afs_xdr_dir_page *dbuf;
 	union afs_xdr_dir_block *dblock;
 	struct afs_read *req;
-	struct page *page;
-	unsigned blkoff, limit;
-	void __rcu **slot;
+	struct folio *folio;
+	unsigned offset, size;
 	int ret;
 
 	_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -540,43 +514,30 @@
 	/* walk through the blocks in sequence */
 	ret = 0;
 	while (ctx->pos < req->actual_len) {
-		blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
-
-		/* Fetch the appropriate page from the directory and re-add it
+		/* Fetch the appropriate folio from the directory and re-add it
 		 * to the LRU.  We have all the pages pinned with an extra ref.
 		 */
-		rcu_read_lock();
-		page = NULL;
-		slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
-					      blkoff / PAGE_SIZE);
-		if (slot)
-			page = radix_tree_deref_slot(slot);
-		rcu_read_unlock();
-		if (!page) {
+		folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
+					    FGP_ACCESSED, 0);
+		if (!folio) {
 			ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
 			break;
 		}
-		mark_page_accessed(page);
 
-		limit = blkoff & ~(PAGE_SIZE - 1);
+		offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
+		size = min_t(loff_t, folio_size(folio),
+			     req->actual_len - folio_file_pos(folio));
 
-		dbuf = kmap(page);
-
-		/* deal with the individual blocks stashed on this page */
 		do {
-			dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
-					       sizeof(union afs_xdr_dir_block)];
-			ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff);
-			if (ret != 1) {
-				kunmap(page);
+			dblock = kmap_local_folio(folio, offset);
+			ret = afs_dir_iterate_block(dvnode, ctx, dblock,
+						    folio_file_pos(folio) + offset);
+			kunmap_local(dblock);
+			if (ret != 1)
 				goto out;
-			}
 
-			blkoff += sizeof(union afs_xdr_dir_block);
+		} while (offset += sizeof(*dblock), offset < size);
 
-		} while (ctx->pos < dir->i_size && blkoff < limit);
-
-		kunmap(page);
 		ret = 0;
 	}
 
@@ -2037,42 +1998,42 @@
 }
 
 /*
- * Release a directory page and clean up its private state if it's not busy
- * - return true if the page can now be released, false if not
+ * Release a directory folio and clean up its private state if it's not busy
+ * - return true if the folio can now be released, false if not
  */
-static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
+static int afs_dir_releasepage(struct page *subpage, gfp_t gfp_flags)
 {
-	struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
+	struct folio *folio = page_folio(subpage);
+	struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
 
-	_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
+	_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio));
 
-	detach_page_private(page);
+	folio_detach_private(folio);
 
 	/* The directory will need reloading. */
 	if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
 		afs_stat_v(dvnode, n_relpg);
-	return 1;
+	return true;
 }
 
 /*
- * invalidate part or all of a page
- * - release a page and clean up its private data if offset is 0 (indicating
- *   the entire page)
+ * Invalidate part or all of a folio.
  */
-static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
+static void afs_dir_invalidatepage(struct page *subpage, unsigned int offset,
 				   unsigned int length)
 {
-	struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
+	struct folio *folio = page_folio(subpage);
+	struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
 
-	_enter("{%lu},%u,%u", page->index, offset, length);
+	_enter("{%lu},%u,%u", folio_index(folio), offset, length);
 
-	BUG_ON(!PageLocked(page));
+	BUG_ON(!folio_test_locked(folio));
 
 	/* The directory will need reloading. */
 	if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
 		afs_stat_v(dvnode, n_inval);
 
-	/* we clean up only if the entire page is being invalidated */
-	if (offset == 0 && length == thp_size(page))
-		detach_page_private(page);
+	/* we clean up only if the entire folio is being invalidated */
+	if (offset == 0 && length == folio_size(folio))
+		folio_detach_private(folio);
 }
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index 540b9fc..d98e109 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -105,6 +105,25 @@
 }
 
 /*
+ * Get a new directory folio.
+ */
+static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
+{
+	struct address_space *mapping = vnode->vfs_inode.i_mapping;
+	struct folio *folio;
+
+	folio = __filemap_get_folio(mapping, index,
+				    FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+				    mapping->gfp_mask);
+	if (!folio)
+		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	else if (folio && !folio_test_private(folio))
+		folio_attach_private(folio, (void *)1);
+
+	return folio;
+}
+
+/*
  * Scan a directory block looking for a dirent of the right name.
  */
 static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
@@ -188,13 +207,11 @@
 		      enum afs_edit_dir_reason why)
 {
 	union afs_xdr_dir_block *meta, *block;
-	struct afs_xdr_dir_page *meta_page, *dir_page;
 	union afs_xdr_dirent *de;
-	struct page *page0, *page;
+	struct folio *folio0, *folio;
 	unsigned int need_slots, nr_blocks, b;
 	pgoff_t index;
 	loff_t i_size;
-	gfp_t gfp;
 	int slot;
 
 	_enter(",,{%d,%s},", name->len, name->name);
@@ -206,10 +223,8 @@
 		return;
 	}
 
-	gfp = vnode->vfs_inode.i_mapping->gfp_mask;
-	page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp);
-	if (!page0) {
-		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	folio0 = afs_dir_get_folio(vnode, 0);
+	if (!folio0) {
 		_leave(" [fgp]");
 		return;
 	}
@@ -217,42 +232,35 @@
 	/* Work out how many slots we're going to need. */
 	need_slots = afs_dir_calc_slots(name->len);
 
-	meta_page = kmap(page0);
-	meta = &meta_page->blocks[0];
+	meta = kmap_local_folio(folio0, 0);
 	if (i_size == 0)
 		goto new_directory;
 	nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
 
-	/* Find a block that has sufficient slots available.  Each VM page
+	/* Find a block that has sufficient slots available.  Each folio
 	 * contains two or more directory blocks.
 	 */
 	for (b = 0; b < nr_blocks + 1; b++) {
-		/* If the directory extended into a new page, then we need to
-		 * tack a new page on the end.
+		/* If the directory extended into a new folio, then we need to
+		 * tack a new folio on the end.
 		 */
 		index = b / AFS_DIR_BLOCKS_PER_PAGE;
-		if (index == 0) {
-			page = page0;
-			dir_page = meta_page;
+		if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
+			goto error;
+		if (index >= folio_nr_pages(folio0)) {
+			folio = afs_dir_get_folio(vnode, index);
+			if (!folio)
+				goto error;
 		} else {
-			if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
-				goto error;
-			gfp = vnode->vfs_inode.i_mapping->gfp_mask;
-			page = find_or_create_page(vnode->vfs_inode.i_mapping,
-						   index, gfp);
-			if (!page)
-				goto error;
-			if (!PagePrivate(page))
-				attach_page_private(page, (void *)1);
-			dir_page = kmap(page);
+			folio = folio0;
 		}
 
+		block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+
 		/* Abandon the edit if we got a callback break. */
 		if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
 			goto invalidated;
 
-		block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
-
 		_debug("block %u: %2u %3u %u",
 		       b,
 		       (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99,
@@ -266,7 +274,7 @@
 			afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
 		}
 
-		/* Only lower dir pages have a counter in the header. */
+		/* Only lower dir blocks have a counter in the header. */
 		if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
 		    meta->meta.alloc_ctrs[b] >= need_slots) {
 			/* We need to try and find one or more consecutive
@@ -279,10 +287,10 @@
 			}
 		}
 
-		if (page != page0) {
-			unlock_page(page);
-			kunmap(page);
-			put_page(page);
+		kunmap_local(block);
+		if (folio != folio0) {
+			folio_unlock(folio);
+			folio_put(folio);
 		}
 	}
 
@@ -298,8 +306,8 @@
 	i_size = AFS_DIR_BLOCK_SIZE;
 	afs_set_i_size(vnode, i_size);
 	slot = AFS_DIR_RESV_BLOCKS0;
-	page = page0;
-	block = meta;
+	folio = folio0;
+	block = kmap_local_folio(folio, 0);
 	nr_blocks = 1;
 	b = 0;
 
@@ -318,10 +326,10 @@
 
 	/* Adjust the bitmap. */
 	afs_set_contig_bits(block, slot, need_slots);
-	if (page != page0) {
-		unlock_page(page);
-		kunmap(page);
-		put_page(page);
+	kunmap_local(block);
+	if (folio != folio0) {
+		folio_unlock(folio);
+		folio_put(folio);
 	}
 
 	/* Adjust the allocation counter. */
@@ -333,18 +341,19 @@
 	_debug("Insert %s in %u[%u]", name->name, b, slot);
 
 out_unmap:
-	unlock_page(page0);
-	kunmap(page0);
-	put_page(page0);
+	kunmap_local(meta);
+	folio_unlock(folio0);
+	folio_put(folio0);
 	_leave("");
 	return;
 
 invalidated:
 	trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
 	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
-	if (page != page0) {
-		kunmap(page);
-		put_page(page);
+	kunmap_local(block);
+	if (folio != folio0) {
+		folio_unlock(folio);
+		folio_put(folio);
 	}
 	goto out_unmap;
 
@@ -364,10 +373,9 @@
 void afs_edit_dir_remove(struct afs_vnode *vnode,
 			 struct qstr *name, enum afs_edit_dir_reason why)
 {
-	struct afs_xdr_dir_page *meta_page, *dir_page;
 	union afs_xdr_dir_block *meta, *block;
 	union afs_xdr_dirent *de;
-	struct page *page0, *page;
+	struct folio *folio0, *folio;
 	unsigned int need_slots, nr_blocks, b;
 	pgoff_t index;
 	loff_t i_size;
@@ -384,9 +392,8 @@
 	}
 	nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
 
-	page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0);
-	if (!page0) {
-		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	folio0 = afs_dir_get_folio(vnode, 0);
+	if (!folio0) {
 		_leave(" [fgp]");
 		return;
 	}
@@ -394,30 +401,27 @@
 	/* Work out how many slots we're going to discard. */
 	need_slots = afs_dir_calc_slots(name->len);
 
-	meta_page = kmap(page0);
-	meta = &meta_page->blocks[0];
+	meta = kmap_local_folio(folio0, 0);
 
-	/* Find a page that has sufficient slots available.  Each VM page
+	/* Find a block that has sufficient slots available.  Each folio
 	 * contains two or more directory blocks.
 	 */
 	for (b = 0; b < nr_blocks; b++) {
 		index = b / AFS_DIR_BLOCKS_PER_PAGE;
-		if (index != 0) {
-			page = find_lock_page(vnode->vfs_inode.i_mapping, index);
-			if (!page)
+		if (index >= folio_nr_pages(folio0)) {
+			folio = afs_dir_get_folio(vnode, index);
+			if (!folio)
 				goto error;
-			dir_page = kmap(page);
 		} else {
-			page = page0;
-			dir_page = meta_page;
+			folio = folio0;
 		}
 
+		block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+
 		/* Abandon the edit if we got a callback break. */
 		if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
 			goto invalidated;
 
-		block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
-
 		if (b > AFS_DIR_BLOCKS_WITH_CTR ||
 		    meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
 			slot = afs_dir_scan_block(block, name, b);
@@ -425,10 +429,10 @@
 				goto found_dirent;
 		}
 
-		if (page != page0) {
-			unlock_page(page);
-			kunmap(page);
-			put_page(page);
+		kunmap_local(block);
+		if (folio != folio0) {
+			folio_unlock(folio);
+			folio_put(folio);
 		}
 	}
 
@@ -449,10 +453,10 @@
 
 	/* Adjust the bitmap. */
 	afs_clear_contig_bits(block, slot, need_slots);
-	if (page != page0) {
-		unlock_page(page);
-		kunmap(page);
-		put_page(page);
+	kunmap_local(block);
+	if (folio != folio0) {
+		folio_unlock(folio);
+		folio_put(folio);
 	}
 
 	/* Adjust the allocation counter. */
@@ -464,9 +468,9 @@
 	_debug("Remove %s from %u[%u]", name->name, b, slot);
 
 out_unmap:
-	unlock_page(page0);
-	kunmap(page0);
-	put_page(page0);
+	kunmap_local(meta);
+	folio_unlock(folio0);
+	folio_put(folio0);
 	_leave("");
 	return;
 
@@ -474,10 +478,10 @@
 	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
 			   0, 0, 0, 0, name->name);
 	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
-	if (page != page0) {
-		unlock_page(page);
-		kunmap(page);
-		put_page(page);
+	kunmap_local(block);
+	if (folio != folio0) {
+		folio_unlock(folio);
+		folio_put(folio);
 	}
 	goto out_unmap;
 
diff --git a/fs/afs/file.c b/fs/afs/file.c
index eb11d04..cb6ad61 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -324,21 +324,24 @@
 {
 	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
 	struct afs_read *fsreq;
+	struct folio *folio = page_folio(page);
 	int ret;
 
 	fsreq = afs_alloc_read(GFP_NOFS);
 	if (!fsreq)
 		return -ENOMEM;
 
-	fsreq->pos	= page->index * PAGE_SIZE;
-	fsreq->len	= PAGE_SIZE;
+	fsreq->pos	= folio_pos(folio);
+	fsreq->len	= folio_size(folio);
 	fsreq->vnode	= vnode;
 	fsreq->iter	= &fsreq->def_iter;
 	iov_iter_xarray(&fsreq->def_iter, READ, &page->mapping->i_pages,
 			fsreq->pos, fsreq->len);
 
 	ret = afs_fetch_data(fsreq->vnode, fsreq);
-	page_endio(page, false, ret);
+	if (ret == 0)
+		SetPageUptodate(page);
+	unlock_page(page);
 	return ret;
 }
 
@@ -362,7 +365,7 @@
 }
 
 static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
-				 struct page *page, void **_fsdata)
+				 struct folio *folio, void **_fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 
@@ -385,7 +388,9 @@
 
 static int afs_readpage(struct file *file, struct page *page)
 {
-	return netfs_readpage(file, page, &afs_req_ops, NULL);
+	struct folio *folio = page_folio(page);
+
+	return netfs_readpage(file, folio, &afs_req_ops, NULL);
 }
 
 static void afs_readahead(struct readahead_control *ractl)
@@ -397,29 +402,29 @@
  * Adjust the dirty region of the page on truncation or full invalidation,
  * getting rid of the markers altogether if the region is entirely invalidated.
  */
-static void afs_invalidate_dirty(struct page *page, unsigned int offset,
+static void afs_invalidate_dirty(struct folio *folio, unsigned int offset,
 				 unsigned int length)
 {
-	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
 	unsigned long priv;
 	unsigned int f, t, end = offset + length;
 
-	priv = page_private(page);
+	priv = (unsigned long)folio_get_private(folio);
 
 	/* we clean up only if the entire page is being invalidated */
-	if (offset == 0 && length == thp_size(page))
+	if (offset == 0 && length == folio_size(folio))
 		goto full_invalidate;
 
 	 /* If the page was dirtied by page_mkwrite(), the PTE stays writable
 	  * and we don't get another notification to tell us to expand it
 	  * again.
 	  */
-	if (afs_is_page_dirty_mmapped(priv))
+	if (afs_is_folio_dirty_mmapped(priv))
 		return;
 
 	/* We may need to shorten the dirty region */
-	f = afs_page_dirty_from(page, priv);
-	t = afs_page_dirty_to(page, priv);
+	f = afs_folio_dirty_from(folio, priv);
+	t = afs_folio_dirty_to(folio, priv);
 
 	if (t <= offset || f >= end)
 		return; /* Doesn't overlap */
@@ -437,17 +442,17 @@
 	if (f == t)
 		goto undirty;
 
-	priv = afs_page_dirty(page, f, t);
-	set_page_private(page, priv);
-	trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page);
+	priv = afs_folio_dirty(folio, f, t);
+	folio_change_private(folio, (void *)priv);
+	trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
 	return;
 
 undirty:
-	trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page);
-	clear_page_dirty_for_io(page);
+	trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
+	folio_clear_dirty_for_io(folio);
 full_invalidate:
-	trace_afs_page_dirty(vnode, tracepoint_string("inval"), page);
-	detach_page_private(page);
+	trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
+	folio_detach_private(folio);
 }
 
 /*
@@ -458,14 +463,16 @@
 static void afs_invalidatepage(struct page *page, unsigned int offset,
 			       unsigned int length)
 {
-	_enter("{%lu},%u,%u", page->index, offset, length);
+	struct folio *folio = page_folio(page);
+
+	_enter("{%lu},%u,%u", folio_index(folio), offset, length);
 
 	BUG_ON(!PageLocked(page));
 
 	if (PagePrivate(page))
-		afs_invalidate_dirty(page, offset, length);
+		afs_invalidate_dirty(folio, offset, length);
 
-	wait_on_page_fscache(page);
+	folio_wait_fscache(folio);
 	_leave("");
 }
 
@@ -475,30 +482,31 @@
  */
 static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 {
-	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+	struct folio *folio = page_folio(page);
+	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
 
 	_enter("{{%llx:%llu}[%lu],%lx},%x",
-	       vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
+	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
 	       gfp_flags);
 
 	/* deny if page is being written to the cache and the caller hasn't
 	 * elected to wait */
 #ifdef CONFIG_AFS_FSCACHE
-	if (PageFsCache(page)) {
+	if (folio_test_fscache(folio)) {
 		if (!(gfp_flags & __GFP_DIRECT_RECLAIM) || !(gfp_flags & __GFP_FS))
 			return false;
-		wait_on_page_fscache(page);
+		folio_wait_fscache(folio);
 	}
 #endif
 
-	if (PagePrivate(page)) {
-		trace_afs_page_dirty(vnode, tracepoint_string("rel"), page);
-		detach_page_private(page);
+	if (folio_test_private(folio)) {
+		trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
+		folio_detach_private(folio);
 	}
 
-	/* indicate that the page can be released */
+	/* Indicate that the folio can be released */
 	_leave(" = T");
-	return 1;
+	return true;
 }
 
 static void afs_add_open_mmap(struct afs_vnode *vnode)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 9357c53..aa4c0d6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -876,59 +876,59 @@
 } __packed;
 
 /*
- * We use page->private to hold the amount of the page that we've written to,
+ * We use folio->private to hold the amount of the folio that we've written to,
  * splitting the field into two parts.  However, we need to represent a range
- * 0...PAGE_SIZE, so we reduce the resolution if the size of the page
+ * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
  * exceeds what we can encode.
  */
 #ifdef CONFIG_64BIT
-#define __AFS_PAGE_PRIV_MASK	0x7fffffffUL
-#define __AFS_PAGE_PRIV_SHIFT	32
-#define __AFS_PAGE_PRIV_MMAPPED	0x80000000UL
+#define __AFS_FOLIO_PRIV_MASK		0x7fffffffUL
+#define __AFS_FOLIO_PRIV_SHIFT		32
+#define __AFS_FOLIO_PRIV_MMAPPED	0x80000000UL
 #else
-#define __AFS_PAGE_PRIV_MASK	0x7fffUL
-#define __AFS_PAGE_PRIV_SHIFT	16
-#define __AFS_PAGE_PRIV_MMAPPED	0x8000UL
+#define __AFS_FOLIO_PRIV_MASK		0x7fffUL
+#define __AFS_FOLIO_PRIV_SHIFT		16
+#define __AFS_FOLIO_PRIV_MMAPPED	0x8000UL
 #endif
 
-static inline unsigned int afs_page_dirty_resolution(struct page *page)
+static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
 {
-	int shift = thp_order(page) + PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
+	int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
 	return (shift > 0) ? shift : 0;
 }
 
-static inline size_t afs_page_dirty_from(struct page *page, unsigned long priv)
+static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
 {
-	unsigned long x = priv & __AFS_PAGE_PRIV_MASK;
+	unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
 
 	/* The lower bound is inclusive */
-	return x << afs_page_dirty_resolution(page);
+	return x << afs_folio_dirty_resolution(folio);
 }
 
-static inline size_t afs_page_dirty_to(struct page *page, unsigned long priv)
+static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
 {
-	unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK;
+	unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
 
 	/* The upper bound is immediately beyond the region */
-	return (x + 1) << afs_page_dirty_resolution(page);
+	return (x + 1) << afs_folio_dirty_resolution(folio);
 }
 
-static inline unsigned long afs_page_dirty(struct page *page, size_t from, size_t to)
+static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
 {
-	unsigned int res = afs_page_dirty_resolution(page);
+	unsigned int res = afs_folio_dirty_resolution(folio);
 	from >>= res;
 	to = (to - 1) >> res;
-	return (to << __AFS_PAGE_PRIV_SHIFT) | from;
+	return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
 }
 
-static inline unsigned long afs_page_dirty_mmapped(unsigned long priv)
+static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
 {
-	return priv | __AFS_PAGE_PRIV_MMAPPED;
+	return priv | __AFS_FOLIO_PRIV_MMAPPED;
 }
 
-static inline bool afs_is_page_dirty_mmapped(unsigned long priv)
+static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
 {
-	return priv & __AFS_PAGE_PRIV_MMAPPED;
+	return priv & __AFS_FOLIO_PRIV_MMAPPED;
 }
 
 #include <trace/events/afs.h>
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 8b1d9c2..ca4909b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -32,7 +32,7 @@
 		    struct page **_page, void **fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-	struct page *page;
+	struct folio *folio;
 	unsigned long priv;
 	unsigned f, from;
 	unsigned t, to;
@@ -46,12 +46,12 @@
 	 * file.  We need to do this before we get a lock on the page in case
 	 * there's more than one writer competing for the same cache block.
 	 */
-	ret = netfs_write_begin(file, mapping, pos, len, flags, &page, fsdata,
+	ret = netfs_write_begin(file, mapping, pos, len, flags, &folio, fsdata,
 				&afs_req_ops, NULL);
 	if (ret < 0)
 		return ret;
 
-	index = page->index;
+	index = folio_index(folio);
 	from = pos - index * PAGE_SIZE;
 	to = from + len;
 
@@ -59,14 +59,14 @@
 	/* See if this page is already partially written in a way that we can
 	 * merge the new write with.
 	 */
-	if (PagePrivate(page)) {
-		priv = page_private(page);
-		f = afs_page_dirty_from(page, priv);
-		t = afs_page_dirty_to(page, priv);
+	if (folio_test_private(folio)) {
+		priv = (unsigned long)folio_get_private(folio);
+		f = afs_folio_dirty_from(folio, priv);
+		t = afs_folio_dirty_to(folio, priv);
 		ASSERTCMP(f, <=, t);
 
-		if (PageWriteback(page)) {
-			trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), page);
+		if (folio_test_writeback(folio)) {
+			trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
 			goto flush_conflicting_write;
 		}
 		/* If the file is being filled locally, allow inter-write
@@ -78,7 +78,7 @@
 			goto flush_conflicting_write;
 	}
 
-	*_page = page;
+	*_page = &folio->page;
 	_leave(" = 0");
 	return 0;
 
@@ -87,17 +87,17 @@
 	 */
 flush_conflicting_write:
 	_debug("flush conflict");
-	ret = write_one_page(page);
+	ret = folio_write_one(folio);
 	if (ret < 0)
 		goto error;
 
-	ret = lock_page_killable(page);
+	ret = folio_lock_killable(folio);
 	if (ret < 0)
 		goto error;
 	goto try_again;
 
 error:
-	put_page(page);
+	folio_put(folio);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -107,24 +107,25 @@
  */
 int afs_write_end(struct file *file, struct address_space *mapping,
 		  loff_t pos, unsigned len, unsigned copied,
-		  struct page *page, void *fsdata)
+		  struct page *subpage, void *fsdata)
 {
+	struct folio *folio = page_folio(subpage);
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	unsigned long priv;
-	unsigned int f, from = pos & (thp_size(page) - 1);
+	unsigned int f, from = offset_in_folio(folio, pos);
 	unsigned int t, to = from + copied;
 	loff_t i_size, maybe_i_size;
 
 	_enter("{%llx:%llu},{%lx}",
-	       vnode->fid.vid, vnode->fid.vnode, page->index);
+	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
 
-	if (!PageUptodate(page)) {
+	if (!folio_test_uptodate(folio)) {
 		if (copied < len) {
 			copied = 0;
 			goto out;
 		}
 
-		SetPageUptodate(page);
+		folio_mark_uptodate(folio);
 	}
 
 	if (copied == 0)
@@ -141,29 +142,29 @@
 		write_sequnlock(&vnode->cb_lock);
 	}
 
-	if (PagePrivate(page)) {
-		priv = page_private(page);
-		f = afs_page_dirty_from(page, priv);
-		t = afs_page_dirty_to(page, priv);
+	if (folio_test_private(folio)) {
+		priv = (unsigned long)folio_get_private(folio);
+		f = afs_folio_dirty_from(folio, priv);
+		t = afs_folio_dirty_to(folio, priv);
 		if (from < f)
 			f = from;
 		if (to > t)
 			t = to;
-		priv = afs_page_dirty(page, f, t);
-		set_page_private(page, priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), page);
+		priv = afs_folio_dirty(folio, f, t);
+		folio_change_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
 	} else {
-		priv = afs_page_dirty(page, from, to);
-		attach_page_private(page, (void *)priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("dirty"), page);
+		priv = afs_folio_dirty(folio, from, to);
+		folio_attach_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
 	}
 
-	if (set_page_dirty(page))
-		_debug("dirtied %lx", page->index);
+	if (folio_mark_dirty(folio))
+		_debug("dirtied %lx", folio_index(folio));
 
 out:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 	return copied;
 }
 
@@ -174,40 +175,32 @@
 			   loff_t start, loff_t len)
 {
 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	struct pagevec pv;
-	unsigned int loop, psize;
+	struct folio *folio;
+	pgoff_t index = start / PAGE_SIZE;
+	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
 
 	_enter("{%llx:%llu},%llx @%llx",
 	       vnode->fid.vid, vnode->fid.vnode, len, start);
 
-	pagevec_init(&pv);
-
 	do {
-		_debug("kill %llx @%llx", len, start);
+		_debug("kill %lx (to %lx)", index, last);
 
-		pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
-					      PAGEVEC_SIZE, pv.pages);
-		if (pv.nr == 0)
-			break;
-
-		for (loop = 0; loop < pv.nr; loop++) {
-			struct page *page = pv.pages[loop];
-
-			if (page->index * PAGE_SIZE >= start + len)
-				break;
-
-			psize = thp_size(page);
-			start += psize;
-			len -= psize;
-			ClearPageUptodate(page);
-			end_page_writeback(page);
-			lock_page(page);
-			generic_error_remove_page(mapping, page);
-			unlock_page(page);
+		folio = filemap_get_folio(mapping, index);
+		if (!folio) {
+			next = index + 1;
+			continue;
 		}
 
-		__pagevec_release(&pv);
-	} while (len > 0);
+		next = folio_next_index(folio);
+
+		folio_clear_uptodate(folio);
+		folio_end_writeback(folio);
+		folio_lock(folio);
+		generic_error_remove_page(mapping, &folio->page);
+		folio_unlock(folio);
+		folio_put(folio);
+
+	} while (index = next, index <= last);
 
 	_leave("");
 }
@@ -220,37 +213,27 @@
 			      loff_t start, loff_t len)
 {
 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	struct pagevec pv;
-	unsigned int loop, psize;
+	struct folio *folio;
+	pgoff_t index = start / PAGE_SIZE;
+	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
 
 	_enter("{%llx:%llu},%llx @%llx",
 	       vnode->fid.vid, vnode->fid.vnode, len, start);
 
-	pagevec_init(&pv);
-
 	do {
 		_debug("redirty %llx @%llx", len, start);
 
-		pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
-					      PAGEVEC_SIZE, pv.pages);
-		if (pv.nr == 0)
-			break;
-
-		for (loop = 0; loop < pv.nr; loop++) {
-			struct page *page = pv.pages[loop];
-
-			if (page->index * PAGE_SIZE >= start + len)
-				break;
-
-			psize = thp_size(page);
-			start += psize;
-			len -= psize;
-			redirty_page_for_writepage(wbc, page);
-			end_page_writeback(page);
+		folio = filemap_get_folio(mapping, index);
+		if (!folio) {
+			next = index + 1;
+			continue;
 		}
 
-		__pagevec_release(&pv);
-	} while (len > 0);
+		next = index + folio_nr_pages(folio);
+		folio_redirty_for_writepage(wbc, folio);
+		folio_end_writeback(folio);
+		folio_put(folio);
+	} while (index = next, index <= last);
 
 	_leave("");
 }
@@ -261,7 +244,7 @@
 static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
 {
 	struct address_space *mapping = vnode->vfs_inode.i_mapping;
-	struct page *page;
+	struct folio *folio;
 	pgoff_t end;
 
 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
@@ -272,15 +255,16 @@
 	rcu_read_lock();
 
 	end = (start + len - 1) / PAGE_SIZE;
-	xas_for_each(&xas, page, end) {
-		if (!PageWriteback(page)) {
-			kdebug("bad %x @%llx page %lx %lx", len, start, page->index, end);
-			ASSERT(PageWriteback(page));
+	xas_for_each(&xas, folio, end) {
+		if (!folio_test_writeback(folio)) {
+			kdebug("bad %x @%llx page %lx %lx",
+			       len, start, folio_index(folio), end);
+			ASSERT(folio_test_writeback(folio));
 		}
 
-		trace_afs_page_dirty(vnode, tracepoint_string("clear"), page);
-		detach_page_private(page);
-		page_endio(page, true, 0);
+		trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
+		folio_detach_private(folio);
+		folio_end_writeback(folio);
 	}
 
 	rcu_read_unlock();
@@ -437,7 +421,7 @@
 				 unsigned int *_len)
 {
 	struct pagevec pvec;
-	struct page *page;
+	struct folio *folio;
 	unsigned long priv;
 	unsigned int psize, filler = 0;
 	unsigned int f, t;
@@ -456,43 +440,43 @@
 		 */
 		rcu_read_lock();
 
-		xas_for_each(&xas, page, ULONG_MAX) {
+		xas_for_each(&xas, folio, ULONG_MAX) {
 			stop = true;
-			if (xas_retry(&xas, page))
+			if (xas_retry(&xas, folio))
 				continue;
-			if (xa_is_value(page))
+			if (xa_is_value(folio))
 				break;
-			if (page->index != index)
+			if (folio_index(folio) != index)
 				break;
 
-			if (!page_cache_get_speculative(page)) {
+			if (!folio_try_get_rcu(folio)) {
 				xas_reset(&xas);
 				continue;
 			}
 
 			/* Has the page moved or been split? */
-			if (unlikely(page != xas_reload(&xas))) {
-				put_page(page);
+			if (unlikely(folio != xas_reload(&xas))) {
+				folio_put(folio);
 				break;
 			}
 
-			if (!trylock_page(page)) {
-				put_page(page);
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
 				break;
 			}
-			if (!PageDirty(page) || PageWriteback(page)) {
-				unlock_page(page);
-				put_page(page);
+			if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
+				folio_unlock(folio);
+				folio_put(folio);
 				break;
 			}
 
-			psize = thp_size(page);
-			priv = page_private(page);
-			f = afs_page_dirty_from(page, priv);
-			t = afs_page_dirty_to(page, priv);
+			psize = folio_size(folio);
+			priv = (unsigned long)folio_get_private(folio);
+			f = afs_folio_dirty_from(folio, priv);
+			t = afs_folio_dirty_to(folio, priv);
 			if (f != 0 && !new_content) {
-				unlock_page(page);
-				put_page(page);
+				folio_unlock(folio);
+				folio_put(folio);
 				break;
 			}
 
@@ -503,8 +487,8 @@
 			else if (t == psize || new_content)
 				stop = false;
 
-			index += thp_nr_pages(page);
-			if (!pagevec_add(&pvec, page))
+			index += folio_nr_pages(folio);
+			if (!pagevec_add(&pvec, &folio->page))
 				break;
 			if (stop)
 				break;
@@ -521,16 +505,16 @@
 			break;
 
 		for (i = 0; i < pagevec_count(&pvec); i++) {
-			page = pvec.pages[i];
-			trace_afs_page_dirty(vnode, tracepoint_string("store+"), page);
+			folio = page_folio(pvec.pages[i]);
+			trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
 
-			if (!clear_page_dirty_for_io(page))
+			if (!folio_clear_dirty_for_io(folio))
 				BUG();
-			if (test_set_page_writeback(page))
+			if (folio_start_writeback(folio))
 				BUG();
 
-			*_count -= thp_nr_pages(page);
-			unlock_page(page);
+			*_count -= folio_nr_pages(folio);
+			folio_unlock(folio);
 		}
 
 		pagevec_release(&pvec);
@@ -544,10 +528,10 @@
  * Synchronously write back the locked page and any subsequent non-locked dirty
  * pages.
  */
-static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
-					       struct writeback_control *wbc,
-					       struct page *page,
-					       loff_t start, loff_t end)
+static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
+						struct writeback_control *wbc,
+						struct folio *folio,
+						loff_t start, loff_t end)
 {
 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
 	struct iov_iter iter;
@@ -558,22 +542,22 @@
 	long count = wbc->nr_to_write;
 	int ret;
 
-	_enter(",%lx,%llx-%llx", page->index, start, end);
+	_enter(",%lx,%llx-%llx", folio_index(folio), start, end);
 
-	if (test_set_page_writeback(page))
+	if (folio_start_writeback(folio))
 		BUG();
 
-	count -= thp_nr_pages(page);
+	count -= folio_nr_pages(folio);
 
 	/* Find all consecutive lockable dirty pages that have contiguous
 	 * written regions, stopping when we find a page that is not
 	 * immediately lockable, is not dirty or is missing, or we reach the
 	 * end of the range.
 	 */
-	priv = page_private(page);
-	offset = afs_page_dirty_from(page, priv);
-	to = afs_page_dirty_to(page, priv);
-	trace_afs_page_dirty(vnode, tracepoint_string("store"), page);
+	priv = (unsigned long)folio_get_private(folio);
+	offset = afs_folio_dirty_from(folio, priv);
+	to = afs_folio_dirty_to(folio, priv);
+	trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
 
 	len = to - offset;
 	start += offset;
@@ -586,7 +570,7 @@
 		max_len = min_t(unsigned long long, max_len, i_size - start);
 
 		if (len < max_len &&
-		    (to == thp_size(page) || new_content))
+		    (to == folio_size(folio) || new_content))
 			afs_extend_writeback(mapping, vnode, &count,
 					     start, max_len, new_content, &len);
 		len = min_t(loff_t, len, max_len);
@@ -596,7 +580,7 @@
 	 * set; the first page is still locked at this point, but all the rest
 	 * have been unlocked.
 	 */
-	unlock_page(page);
+	folio_unlock(folio);
 
 	if (start < i_size) {
 		_debug("write back %x @%llx [%llx]", len, start, i_size);
@@ -657,16 +641,17 @@
  * write a page back to the server
  * - the caller locked the page for us
  */
-int afs_writepage(struct page *page, struct writeback_control *wbc)
+int afs_writepage(struct page *subpage, struct writeback_control *wbc)
 {
+	struct folio *folio = page_folio(subpage);
 	ssize_t ret;
 	loff_t start;
 
-	_enter("{%lx},", page->index);
+	_enter("{%lx},", folio_index(folio));
 
-	start = page->index * PAGE_SIZE;
-	ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
-					      start, LLONG_MAX - start);
+	start = folio_index(folio) * PAGE_SIZE;
+	ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc,
+					       folio, start, LLONG_MAX - start);
 	if (ret < 0) {
 		_leave(" = %zd", ret);
 		return ret;
@@ -683,7 +668,8 @@
 				 struct writeback_control *wbc,
 				 loff_t start, loff_t end, loff_t *_next)
 {
-	struct page *page;
+	struct folio *folio;
+	struct page *head_page;
 	ssize_t ret;
 	int n;
 
@@ -693,13 +679,14 @@
 		pgoff_t index = start / PAGE_SIZE;
 
 		n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
-					     PAGECACHE_TAG_DIRTY, 1, &page);
+					     PAGECACHE_TAG_DIRTY, 1, &head_page);
 		if (!n)
 			break;
 
-		start = (loff_t)page->index * PAGE_SIZE; /* May regress with THPs */
+		folio = page_folio(head_page);
+		start = folio_pos(folio); /* May regress with THPs */
 
-		_debug("wback %lx", page->index);
+		_debug("wback %lx", folio_index(folio));
 
 		/* At this point we hold neither the i_pages lock nor the
 		 * page lock: the page may be truncated or invalidated
@@ -707,37 +694,38 @@
 		 * back from swapper_space to tmpfs file mapping
 		 */
 		if (wbc->sync_mode != WB_SYNC_NONE) {
-			ret = lock_page_killable(page);
+			ret = folio_lock_killable(folio);
 			if (ret < 0) {
-				put_page(page);
+				folio_put(folio);
 				return ret;
 			}
 		} else {
-			if (!trylock_page(page)) {
-				put_page(page);
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
 				return 0;
 			}
 		}
 
-		if (page->mapping != mapping || !PageDirty(page)) {
-			start += thp_size(page);
-			unlock_page(page);
-			put_page(page);
+		if (folio_mapping(folio) != mapping ||
+		    !folio_test_dirty(folio)) {
+			start += folio_size(folio);
+			folio_unlock(folio);
+			folio_put(folio);
 			continue;
 		}
 
-		if (PageWriteback(page)) {
-			unlock_page(page);
+		if (folio_test_writeback(folio)) {
+			folio_unlock(folio);
 			if (wbc->sync_mode != WB_SYNC_NONE)
-				wait_on_page_writeback(page);
-			put_page(page);
+				folio_wait_writeback(folio);
+			folio_put(folio);
 			continue;
 		}
 
-		if (!clear_page_dirty_for_io(page))
+		if (!folio_clear_dirty_for_io(folio))
 			BUG();
-		ret = afs_write_back_from_locked_page(mapping, wbc, page, start, end);
-		put_page(page);
+		ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+		folio_put(folio);
 		if (ret < 0) {
 			_leave(" = %zd", ret);
 			return ret;
@@ -862,7 +850,6 @@
 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 {
 	struct folio *folio = page_folio(vmf->page);
-	struct page *page = &folio->page;
 	struct file *file = vmf->vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
@@ -870,7 +857,7 @@
 	unsigned long priv;
 	vm_fault_t ret = VM_FAULT_RETRY;
 
-	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
+	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
 
 	afs_validate(vnode, af->key);
 
@@ -880,18 +867,18 @@
 	 * be modified.  We then assume the entire page will need writing back.
 	 */
 #ifdef CONFIG_AFS_FSCACHE
-	if (PageFsCache(page) &&
-	    wait_on_page_fscache_killable(page) < 0)
+	if (folio_test_fscache(folio) &&
+	    folio_wait_fscache_killable(folio) < 0)
 		goto out;
 #endif
 
 	if (folio_wait_writeback_killable(folio))
 		goto out;
 
-	if (lock_page_killable(page) < 0)
+	if (folio_lock_killable(folio) < 0)
 		goto out;
 
-	/* We mustn't change page->private until writeback is complete as that
+	/* We mustn't change folio->private until writeback is complete as that
 	 * details the portion of the page we need to write back and we might
 	 * need to redirty the page if there's a problem.
 	 */
@@ -900,14 +887,14 @@
 		goto out;
 	}
 
-	priv = afs_page_dirty(page, 0, thp_size(page));
-	priv = afs_page_dirty_mmapped(priv);
-	if (PagePrivate(page)) {
-		set_page_private(page, priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("mkwrite+"), page);
+	priv = afs_folio_dirty(folio, 0, folio_size(folio));
+	priv = afs_folio_dirty_mmapped(priv);
+	if (folio_test_private(folio)) {
+		folio_change_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
 	} else {
-		attach_page_private(page, (void *)priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), page);
+		folio_attach_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
 	}
 	file_update_time(file);
 
@@ -948,38 +935,38 @@
 /*
  * Clean up a page during invalidation.
  */
-int afs_launder_page(struct page *page)
+int afs_launder_page(struct page *subpage)
 {
-	struct address_space *mapping = page->mapping;
-	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+	struct folio *folio = page_folio(subpage);
+	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
 	struct iov_iter iter;
 	struct bio_vec bv[1];
 	unsigned long priv;
 	unsigned int f, t;
 	int ret = 0;
 
-	_enter("{%lx}", page->index);
+	_enter("{%lx}", folio_index(folio));
 
-	priv = page_private(page);
-	if (clear_page_dirty_for_io(page)) {
+	priv = (unsigned long)folio_get_private(folio);
+	if (folio_clear_dirty_for_io(folio)) {
 		f = 0;
-		t = thp_size(page);
-		if (PagePrivate(page)) {
-			f = afs_page_dirty_from(page, priv);
-			t = afs_page_dirty_to(page, priv);
+		t = folio_size(folio);
+		if (folio_test_private(folio)) {
+			f = afs_folio_dirty_from(folio, priv);
+			t = afs_folio_dirty_to(folio, priv);
 		}
 
-		bv[0].bv_page = page;
+		bv[0].bv_page = &folio->page;
 		bv[0].bv_offset = f;
 		bv[0].bv_len = t - f;
 		iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
 
-		trace_afs_page_dirty(vnode, tracepoint_string("launder"), page);
-		ret = afs_store_data(vnode, &iter, page_offset(page) + f, true);
+		trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
+		ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
 	}
 
-	trace_afs_page_dirty(vnode, tracepoint_string("laundered"), page);
-	detach_page_private(page);
-	wait_on_page_fscache(page);
+	trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
+	folio_detach_private(folio);
+	folio_wait_fscache(folio);
 	return ret;
 }
diff --git a/fs/attr.c b/fs/attr.c
index 473d21b..66899b6 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -35,7 +35,7 @@
 		     kuid_t uid)
 {
 	kuid_t kuid = i_uid_into_mnt(mnt_userns, inode);
-	if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, kuid))
+	if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, inode->i_uid))
 		return true;
 	if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
 		return true;
@@ -62,7 +62,7 @@
 {
 	kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
 	if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
-	    (in_group_p(gid) || gid_eq(gid, kgid)))
+	    (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
 		return true;
 	if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
 		return true;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 309516e..43c8995 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -234,6 +234,13 @@
 				  ordered_list);
 		if (!test_bit(WORK_DONE_BIT, &work->flags))
 			break;
+		/*
+		 * Orders all subsequent loads after reading WORK_DONE_BIT,
+		 * paired with the smp_mb__before_atomic in btrfs_work_helper
+		 * this guarantees that the ordered function will see all
+		 * updates from ordinary work function.
+		 */
+		smp_rmb();
 
 		/*
 		 * we are going to call the ordered done function, but
@@ -317,6 +324,13 @@
 	thresh_exec_hook(wq);
 	work->func(work);
 	if (need_order) {
+		/*
+		 * Ensures all memory accesses done in the work function are
+		 * ordered before setting the WORK_DONE_BIT. Ensuring the thread
+		 * which is going to executed the ordered work sees them.
+		 * Pairs with the smp_rmb in run_ordered_work.
+		 */
+		smp_mb__before_atomic();
 		set_bit(WORK_DONE_BIT, &work->flags);
 		run_ordered_work(wq, work);
 	} else {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59c3be8..514ead6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3978,11 +3978,23 @@
  */
 static void write_dev_flush(struct btrfs_device *device)
 {
-	struct request_queue *q = bdev_get_queue(device->bdev);
 	struct bio *bio = device->flush_bio;
 
+#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+	/*
+	 * When a disk has write caching disabled, we skip submission of a bio
+	 * with flush and sync requests before writing the superblock, since
+	 * it's not needed. However when the integrity checker is enabled, this
+	 * results in reports that there are metadata blocks referred by a
+	 * superblock that were not properly flushed. So don't skip the bio
+	 * submission only when the integrity checker is enabled for the sake
+	 * of simplicity, since this is a debug tool and not meant for use in
+	 * non-debug builds.
+	 */
+	struct request_queue *q = bdev_get_queue(device->bdev);
 	if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 		return;
+#endif
 
 	bio_reset(bio);
 	bio->bi_end_io = btrfs_end_empty_barrier;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fb8cc96..92138ac 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3985,6 +3985,10 @@
 	bool need_unlock; /* for mut. excl. ops lock */
 	int ret;
 
+	if (!arg)
+		btrfs_warn(fs_info,
+	"IOC_BALANCE ioctl (v1) is deprecated and will be removed in kernel 5.18");
+
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 65cb076..0fb90cb 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -125,6 +125,7 @@
 static int copy_compressed_data_to_page(char *compressed_data,
 					size_t compressed_size,
 					struct page **out_pages,
+					unsigned long max_nr_page,
 					u32 *cur_out,
 					const u32 sectorsize)
 {
@@ -133,6 +134,9 @@
 	struct page *cur_page;
 	char *kaddr;
 
+	if ((*cur_out / PAGE_SIZE) >= max_nr_page)
+		return -E2BIG;
+
 	/*
 	 * We never allow a segment header crossing sector boundary, previous
 	 * run should ensure we have enough space left inside the sector.
@@ -161,6 +165,10 @@
 				     orig_out + compressed_size - *cur_out);
 
 		kunmap(cur_page);
+
+		if ((*cur_out / PAGE_SIZE) >= max_nr_page)
+			return -E2BIG;
+
 		cur_page = out_pages[*cur_out / PAGE_SIZE];
 		/* Allocate a new page */
 		if (!cur_page) {
@@ -203,6 +211,7 @@
 	const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize;
 	struct page *page_in = NULL;
 	char *sizes_ptr;
+	const unsigned long max_nr_page = *out_pages;
 	int ret = 0;
 	/* Points to the file offset of input data */
 	u64 cur_in = start;
@@ -210,6 +219,7 @@
 	u32 cur_out = 0;
 	u32 len = *total_out;
 
+	ASSERT(max_nr_page > 0);
 	*out_pages = 0;
 	*total_out = 0;
 	*total_in = 0;
@@ -248,7 +258,8 @@
 		}
 
 		ret = copy_compressed_data_to_page(workspace->cbuf, out_len,
-						   pages, &cur_out, sectorsize);
+						   pages, max_nr_page,
+						   &cur_out, sectorsize);
 		if (ret < 0)
 			goto out;
 
@@ -279,6 +290,8 @@
 	*total_out = cur_out;
 	*total_in = cur_in - start;
 out:
+	if (page_in)
+		put_page(page_in);
 	*out_pages = DIV_ROUND_UP(cur_out, PAGE_SIZE);
 	return ret;
 }
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index cf82ea6..8f6ceea 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -73,8 +73,8 @@
 	u64			physical_for_dev_replace;
 	atomic_t		refs;
 	u8			mirror_num;
-	int			have_csum:1;
-	int			io_error:1;
+	unsigned int		have_csum:1;
+	unsigned int		io_error:1;
 	u8			csum[BTRFS_CSUM_SIZE];
 
 	struct scrub_recover	*recover;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 61ac57b..0997e3c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7559,6 +7559,19 @@
 	fs_info->fs_devices->total_rw_bytes = 0;
 
 	/*
+	 * Lockdep complains about possible circular locking dependency between
+	 * a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores
+	 * used for freeze procection of a fs (struct super_block.s_writers),
+	 * which we take when starting a transaction, and extent buffers of the
+	 * chunk tree if we call read_one_dev() while holding a lock on an
+	 * extent buffer of the chunk tree. Since we are mounting the filesystem
+	 * and at this point there can't be any concurrent task modifying the
+	 * chunk tree, to keep it simple, just skip locking on the chunk tree.
+	 */
+	ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
+	path->skip_locking = 1;
+
+	/*
 	 * Read all device items, and then all the chunk items. All
 	 * device items are found before any chunk item (their object id
 	 * is smaller than the lowest possible object id for a chunk
@@ -7583,10 +7596,6 @@
 				goto error;
 			break;
 		}
-		/*
-		 * The nodes on level 1 are not locked but we don't need to do
-		 * that during mount time as nothing else can access the tree
-		 */
 		node = path->nodes[1];
 		if (node) {
 			if (last_ra_node != node->start) {
@@ -7614,7 +7623,6 @@
 			 * requirement for chunk allocation, see the comment on
 			 * top of btrfs_chunk_alloc() for details.
 			 */
-			ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
 			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 			ret = read_one_chunk(&found_key, leaf, chunk);
 			if (ret)
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index f06b680..fc42dd0 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -28,10 +28,10 @@
 /* 307s to avoid pathologically clashing with transaction commit */
 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
 
-static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level,
+static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
 						 size_t src_len)
 {
-	ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
+	zstd_parameters params = zstd_get_params(level, src_len);
 
 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
@@ -48,8 +48,8 @@
 	unsigned long last_used; /* jiffies */
 	struct list_head list;
 	struct list_head lru_list;
-	ZSTD_inBuffer in_buf;
-	ZSTD_outBuffer out_buf;
+	zstd_in_buffer in_buf;
+	zstd_out_buffer out_buf;
 };
 
 /*
@@ -155,12 +155,12 @@
 	unsigned int level;
 
 	for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
-		ZSTD_parameters params =
+		zstd_parameters params =
 			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
 		size_t level_size =
 			max_t(size_t,
-			      ZSTD_CStreamWorkspaceBound(params.cParams),
-			      ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
+			      zstd_cstream_workspace_bound(&params.cParams),
+			      zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
 
 		max_size = max_t(size_t, max_size, level_size);
 		zstd_ws_mem_sizes[level - 1] = max_size;
@@ -371,7 +371,7 @@
 		unsigned long *total_in, unsigned long *total_out)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
-	ZSTD_CStream *stream;
+	zstd_cstream *stream;
 	int ret = 0;
 	int nr_pages = 0;
 	struct page *in_page = NULL;  /* The current page to read */
@@ -381,7 +381,7 @@
 	unsigned long len = *total_out;
 	const unsigned long nr_dest_pages = *out_pages;
 	unsigned long max_out = nr_dest_pages * PAGE_SIZE;
-	ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
+	zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
 							   len);
 
 	*out_pages = 0;
@@ -389,10 +389,10 @@
 	*total_in = 0;
 
 	/* Initialize the stream */
-	stream = ZSTD_initCStream(params, len, workspace->mem,
+	stream = zstd_init_cstream(&params, len, workspace->mem,
 			workspace->size);
 	if (!stream) {
-		pr_warn("BTRFS: ZSTD_initCStream failed\n");
+		pr_warn("BTRFS: zstd_init_cstream failed\n");
 		ret = -EIO;
 		goto out;
 	}
@@ -418,11 +418,11 @@
 	while (1) {
 		size_t ret2;
 
-		ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
+		ret2 = zstd_compress_stream(stream, &workspace->out_buf,
 				&workspace->in_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_compress_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto out;
 		}
@@ -487,10 +487,10 @@
 	while (1) {
 		size_t ret2;
 
-		ret2 = ZSTD_endStream(stream, &workspace->out_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_endStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		ret2 = zstd_end_stream(stream, &workspace->out_buf);
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_end_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto out;
 		}
@@ -548,17 +548,17 @@
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	struct page **pages_in = cb->compressed_pages;
 	size_t srclen = cb->compressed_len;
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	int ret = 0;
 	unsigned long page_in_index = 0;
 	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
 	unsigned long buf_start;
 	unsigned long total_out = 0;
 
-	stream = ZSTD_initDStream(
+	stream = zstd_init_dstream(
 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
 	if (!stream) {
-		pr_debug("BTRFS: ZSTD_initDStream failed\n");
+		pr_debug("BTRFS: zstd_init_dstream failed\n");
 		ret = -EIO;
 		goto done;
 	}
@@ -574,11 +574,11 @@
 	while (1) {
 		size_t ret2;
 
-		ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
 				&workspace->in_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto done;
 		}
@@ -624,16 +624,16 @@
 		size_t destlen)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	int ret = 0;
 	size_t ret2;
 	unsigned long total_out = 0;
 	unsigned long pg_offset = 0;
 
-	stream = ZSTD_initDStream(
+	stream = zstd_init_dstream(
 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
 	if (!stream) {
-		pr_warn("BTRFS: ZSTD_initDStream failed\n");
+		pr_warn("BTRFS: zstd_init_dstream failed\n");
 		ret = -EIO;
 		goto finish;
 	}
@@ -657,15 +657,15 @@
 
 		/* Check if the frame is over and we still need more input */
 		if (ret2 == 0) {
-			pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
+			pr_debug("BTRFS: zstd_decompress_stream ended early\n");
 			ret = -EIO;
 			goto finish;
 		}
-		ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
 				&workspace->in_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto finish;
 		}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 99b80b5..e53c854 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -63,7 +63,7 @@
 	 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
 
 static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
-					struct page *page, void **_fsdata);
+					struct folio *folio, void **_fsdata);
 
 static inline struct ceph_snap_context *page_snap_context(struct page *page)
 {
@@ -317,13 +317,14 @@
 };
 
 /* read a single page, without unlocking it. */
-static int ceph_readpage(struct file *file, struct page *page)
+static int ceph_readpage(struct file *file, struct page *subpage)
 {
+	struct folio *folio = page_folio(subpage);
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_vino vino = ceph_vino(inode);
-	u64 off = page_offset(page);
-	u64 len = thp_size(page);
+	size_t len = folio_size(folio);
+	u64 off = folio_file_pos(folio);
 
 	if (ci->i_inline_version != CEPH_INLINE_NONE) {
 		/*
@@ -331,19 +332,19 @@
 		 * into page cache while getting Fcr caps.
 		 */
 		if (off == 0) {
-			unlock_page(page);
+			folio_unlock(folio);
 			return -EINVAL;
 		}
-		zero_user_segment(page, 0, thp_size(page));
-		SetPageUptodate(page);
-		unlock_page(page);
+		zero_user_segment(&folio->page, 0, folio_size(folio));
+		folio_mark_uptodate(folio);
+		folio_unlock(folio);
 		return 0;
 	}
 
-	dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
-	     vino.ino, vino.snap, file, off, len, page, page->index);
+	dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n",
+	     vino.ino, vino.snap, file, off, len, folio, folio_index(folio));
 
-	return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
+	return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL);
 }
 
 static void ceph_readahead(struct readahead_control *ractl)
@@ -724,7 +725,7 @@
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-	if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
+	if (ceph_inode_is_shutdown(inode)) {
 		if (ci->i_wrbuffer_ref > 0) {
 			pr_warn_ratelimited(
 				"writepage_start %p %lld forced umount\n",
@@ -1145,12 +1146,12 @@
 ceph_find_incompatible(struct page *page)
 {
 	struct inode *inode = page->mapping->host;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
-	if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-		dout(" page %p forced umount\n", page);
-		return ERR_PTR(-EIO);
+	if (ceph_inode_is_shutdown(inode)) {
+		dout(" page %p %llx:%llx is shutdown\n", page,
+		     ceph_vinop(inode));
+		return ERR_PTR(-ESTALE);
 	}
 
 	for (;;) {
@@ -1187,18 +1188,18 @@
 }
 
 static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
-					struct page *page, void **_fsdata)
+					struct folio *folio, void **_fsdata)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_snap_context *snapc;
 
-	snapc = ceph_find_incompatible(page);
+	snapc = ceph_find_incompatible(folio_page(folio, 0));
 	if (snapc) {
 		int r;
 
-		unlock_page(page);
-		put_page(page);
+		folio_unlock(folio);
+		folio_put(folio);
 		if (IS_ERR(snapc))
 			return PTR_ERR(snapc);
 
@@ -1216,12 +1217,12 @@
  * clean, or already dirty within the same snap context.
  */
 static int ceph_write_begin(struct file *file, struct address_space *mapping,
-			    loff_t pos, unsigned len, unsigned flags,
+			    loff_t pos, unsigned len, unsigned aop_flags,
 			    struct page **pagep, void **fsdata)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct page *page = NULL;
+	struct folio *folio = NULL;
 	pgoff_t index = pos >> PAGE_SHIFT;
 	int r;
 
@@ -1230,39 +1231,43 @@
 	 * for inline_version sent to the MDS.
 	 */
 	if (ci->i_inline_version != CEPH_INLINE_NONE) {
-		page = grab_cache_page_write_begin(mapping, index, flags);
-		if (!page)
+		unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+		if (aop_flags & AOP_FLAG_NOFS)
+			fgp_flags |= FGP_NOFS;
+		folio = __filemap_get_folio(mapping, index, fgp_flags,
+					    mapping_gfp_mask(mapping));
+		if (!folio)
 			return -ENOMEM;
 
 		/*
 		 * The inline_version on a new inode is set to 1. If that's the
-		 * case, then the page is brand new and isn't yet Uptodate.
+		 * case, then the folio is brand new and isn't yet Uptodate.
 		 */
 		r = 0;
 		if (index == 0 && ci->i_inline_version != 1) {
-			if (!PageUptodate(page)) {
+			if (!folio_test_uptodate(folio)) {
 				WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
 					  ci->i_inline_version);
 				r = -EINVAL;
 			}
 			goto out;
 		}
-		zero_user_segment(page, 0, thp_size(page));
-		SetPageUptodate(page);
+		zero_user_segment(&folio->page, 0, folio_size(folio));
+		folio_mark_uptodate(folio);
 		goto out;
 	}
 
-	r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
+	r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
 			      &ceph_netfs_read_ops, NULL);
 out:
 	if (r == 0)
-		wait_on_page_fscache(page);
+		folio_wait_fscache(folio);
 	if (r < 0) {
-		if (page)
-			put_page(page);
+		if (folio)
+			folio_put(folio);
 	} else {
-		WARN_ON_ONCE(!PageLocked(page));
-		*pagep = page;
+		WARN_ON_ONCE(!folio_test_locked(folio));
+		*pagep = &folio->page;
 	}
 	return r;
 }
@@ -1273,32 +1278,33 @@
  */
 static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  loff_t pos, unsigned len, unsigned copied,
-			  struct page *page, void *fsdata)
+			  struct page *subpage, void *fsdata)
 {
+	struct folio *folio = page_folio(subpage);
 	struct inode *inode = file_inode(file);
 	bool check_cap = false;
 
-	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
-	     inode, page, (int)pos, (int)copied, (int)len);
+	dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file,
+	     inode, folio, (int)pos, (int)copied, (int)len);
 
-	if (!PageUptodate(page)) {
+	if (!folio_test_uptodate(folio)) {
 		/* just return that nothing was copied on a short copy */
 		if (copied < len) {
 			copied = 0;
 			goto out;
 		}
-		SetPageUptodate(page);
+		folio_mark_uptodate(folio);
 	}
 
 	/* did file size increase? */
 	if (pos+copied > i_size_read(inode))
 		check_cap = ceph_inode_set_size(inode, pos+copied);
 
-	set_page_dirty(page);
+	folio_mark_dirty(folio);
 
 out:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 
 	if (check_cap)
 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
@@ -1306,17 +1312,6 @@
 	return copied;
 }
 
-/*
- * we set .direct_IO to indicate direct io is supported, but since we
- * intercept O_DIRECT reads and writes early, this function should
- * never get called.
- */
-static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
-{
-	WARN_ON(1);
-	return -EINVAL;
-}
-
 const struct address_space_operations ceph_aops = {
 	.readpage = ceph_readpage,
 	.readahead = ceph_readahead,
@@ -1327,7 +1322,7 @@
 	.set_page_dirty = ceph_set_page_dirty,
 	.invalidatepage = ceph_invalidatepage,
 	.releasepage = ceph_releasepage,
-	.direct_IO = ceph_direct_io,
+	.direct_IO = noop_direct_IO,
 };
 
 static void ceph_block_sigs(sigset_t *oldset)
@@ -1356,6 +1351,9 @@
 	sigset_t oldset;
 	vm_fault_t ret = VM_FAULT_SIGBUS;
 
+	if (ceph_inode_is_shutdown(inode))
+		return ret;
+
 	ceph_block_sigs(&oldset);
 
 	dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
@@ -1447,6 +1445,9 @@
 	sigset_t oldset;
 	vm_fault_t ret = VM_FAULT_SIGBUS;
 
+	if (ceph_inode_is_shutdown(inode))
+		return ret;
+
 	prealloc_cf = ceph_alloc_cap_flush();
 	if (!prealloc_cf)
 		return VM_FAULT_OOM;
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 9cfadbb..457afda 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -12,12 +12,6 @@
 #include "super.h"
 #include "cache.h"
 
-struct ceph_aux_inode {
-	u64 	version;
-	u64	mtime_sec;
-	u64	mtime_nsec;
-};
-
 struct fscache_netfs ceph_cache_netfs = {
 	.name		= "ceph",
 	.version	= 0,
@@ -109,20 +103,14 @@
 	void *cookie_netfs_data, const void *data, uint16_t dlen,
 	loff_t object_size)
 {
-	struct ceph_aux_inode aux;
 	struct ceph_inode_info* ci = cookie_netfs_data;
 	struct inode* inode = &ci->vfs_inode;
 
-	if (dlen != sizeof(aux) ||
+	if (dlen != sizeof(ci->i_version) ||
 	    i_size_read(inode) != object_size)
 		return FSCACHE_CHECKAUX_OBSOLETE;
 
-	memset(&aux, 0, sizeof(aux));
-	aux.version = ci->i_version;
-	aux.mtime_sec = inode->i_mtime.tv_sec;
-	aux.mtime_nsec = inode->i_mtime.tv_nsec;
-
-	if (memcmp(data, &aux, sizeof(aux)) != 0)
+	if (*(u64 *)data != ci->i_version)
 		return FSCACHE_CHECKAUX_OBSOLETE;
 
 	dout("ceph inode 0x%p cached okay\n", ci);
@@ -139,7 +127,6 @@
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_aux_inode aux;
 
 	/* No caching for filesystem */
 	if (!fsc->fscache)
@@ -151,14 +138,10 @@
 
 	inode_lock_nested(inode, I_MUTEX_CHILD);
 	if (!ci->fscache) {
-		memset(&aux, 0, sizeof(aux));
-		aux.version = ci->i_version;
-		aux.mtime_sec = inode->i_mtime.tv_sec;
-		aux.mtime_nsec = inode->i_mtime.tv_nsec;
 		ci->fscache = fscache_acquire_cookie(fsc->fscache,
 						     &ceph_fscache_inode_object_def,
 						     &ci->i_vino, sizeof(ci->i_vino),
-						     &aux, sizeof(aux),
+						     &ci->i_version, sizeof(ci->i_version),
 						     ci, i_size_read(inode), false);
 	}
 	inode_unlock(inode);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8f537f1..b9460b6 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1188,11 +1188,11 @@
 
 	lockdep_assert_held(&ci->i_ceph_lock);
 
-	fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+	fsc = ceph_inode_to_client(&ci->vfs_inode);
 	WARN_ON_ONCE(ci->i_auth_cap == cap &&
 		     !list_empty(&ci->i_dirty_item) &&
 		     !fsc->blocklisted &&
-		     READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
+		     !ceph_inode_is_shutdown(&ci->vfs_inode));
 
 	__ceph_remove_cap(cap, queue_release);
 }
@@ -1968,8 +1968,8 @@
 		}
 	}
 
-	dout("check_caps %p file_want %s used %s dirty %s flushing %s"
-	     " issued %s revoking %s retain %s %s%s\n", inode,
+	dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s"
+	     " issued %s revoking %s retain %s %s%s\n", ceph_vinop(inode),
 	     ceph_cap_string(file_wanted),
 	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
 	     ceph_cap_string(ci->i_flushing_caps),
@@ -1990,7 +1990,8 @@
 	    (revoking & (CEPH_CAP_FILE_CACHE|
 			 CEPH_CAP_FILE_LAZYIO)) && /*  or revoking cache */
 	    !tried_invalidate) {
-		dout("check_caps trying to invalidate on %p\n", inode);
+		dout("check_caps trying to invalidate on %llx.%llx\n",
+		     ceph_vinop(inode));
 		if (try_nonblocking_invalidate(inode) < 0) {
 			dout("check_caps queuing invalidate\n");
 			queue_invalidate = true;
@@ -2629,9 +2630,9 @@
  *
  * Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
  * or a negative error code. There are 3 speical error codes:
- *  -EAGAIN: need to sleep but non-blocking is specified
- *  -EFBIG:  ask caller to call check_max_size() and try again.
- *  -ESTALE: ask caller to call ceph_renew_caps() and try again.
+ *  -EAGAIN:  need to sleep but non-blocking is specified
+ *  -EFBIG:   ask caller to call check_max_size() and try again.
+ *  -EUCLEAN: ask caller to call ceph_renew_caps() and try again.
  */
 enum {
 	/* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
@@ -2679,7 +2680,7 @@
 			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
 			     inode, endoff, ci->i_max_size);
 			if (endoff > ci->i_requested_max_size)
-				ret = ci->i_auth_cap ? -EFBIG : -ESTALE;
+				ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN;
 			goto out_unlock;
 		}
 		/*
@@ -2749,9 +2750,9 @@
 			goto out_unlock;
 		}
 
-		if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-			dout("get_cap_refs %p forced umount\n", inode);
-			ret = -EIO;
+		if (ceph_inode_is_shutdown(inode)) {
+			dout("get_cap_refs %p inode is shutdown\n", inode);
+			ret = -ESTALE;
 			goto out_unlock;
 		}
 		mds_wanted = __ceph_caps_mds_wanted(ci, false);
@@ -2759,7 +2760,7 @@
 			dout("get_cap_refs %p need %s > mds_wanted %s\n",
 			     inode, ceph_cap_string(need),
 			     ceph_cap_string(mds_wanted));
-			ret = -ESTALE;
+			ret = -EUCLEAN;
 			goto out_unlock;
 		}
 
@@ -2843,7 +2844,7 @@
 
 	ret = try_get_cap_refs(inode, need, want, 0, flags, got);
 	/* three special error codes */
-	if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE)
+	if (ret == -EAGAIN || ret == -EFBIG || ret == -EUCLEAN)
 		ret = 0;
 	return ret;
 }
@@ -2926,7 +2927,7 @@
 		}
 
 		if (ret < 0) {
-			if (ret == -EFBIG || ret == -ESTALE) {
+			if (ret == -EFBIG || ret == -EUCLEAN) {
 				int ret2 = ceph_wait_on_async_create(inode);
 				if (ret2 < 0)
 					return ret2;
@@ -2935,7 +2936,7 @@
 				check_max_size(inode, endoff);
 				continue;
 			}
-			if (ret == -ESTALE) {
+			if (ret == -EUCLEAN) {
 				/* session was killed, try renew caps */
 				ret = ceph_renew_caps(inode, flags);
 				if (ret == 0)
@@ -4315,7 +4316,7 @@
 				      i_dirty_item);
 		inode = &ci->vfs_inode;
 		ihold(inode);
-		dout("flush_dirty_caps %p\n", inode);
+		dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
 		spin_unlock(&mdsc->cap_dirty_lock);
 		ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
 		iput(inode);
@@ -4560,3 +4561,119 @@
 	spin_unlock(&dentry->d_lock);
 	return ret;
 }
+
+static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_cap_snap *capsnap;
+	int capsnap_release = 0;
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+
+	while (!list_empty(&ci->i_cap_snaps)) {
+		capsnap = list_first_entry(&ci->i_cap_snaps,
+					   struct ceph_cap_snap, ci_item);
+		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
+		ceph_put_snap_context(capsnap->context);
+		ceph_put_cap_snap(capsnap);
+		capsnap_release++;
+	}
+	wake_up_all(&ci->i_cap_wq);
+	wake_up_all(&mdsc->cap_flushing_wq);
+	return capsnap_release;
+}
+
+int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate)
+{
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	bool is_auth;
+	bool dirty_dropped = false;
+	int iputs = 0;
+
+	lockdep_assert_held(&ci->i_ceph_lock);
+
+	dout("removing cap %p, ci is %p, inode is %p\n",
+	     cap, ci, &ci->vfs_inode);
+
+	is_auth = (cap == ci->i_auth_cap);
+	__ceph_remove_cap(cap, false);
+	if (is_auth) {
+		struct ceph_cap_flush *cf;
+
+		if (ceph_inode_is_shutdown(inode)) {
+			if (inode->i_data.nrpages > 0)
+				*invalidate = true;
+			if (ci->i_wrbuffer_ref > 0)
+				mapping_set_error(&inode->i_data, -EIO);
+		}
+
+		spin_lock(&mdsc->cap_dirty_lock);
+
+		/* trash all of the cap flushes for this inode */
+		while (!list_empty(&ci->i_cap_flush_list)) {
+			cf = list_first_entry(&ci->i_cap_flush_list,
+					      struct ceph_cap_flush, i_list);
+			list_del_init(&cf->g_list);
+			list_del_init(&cf->i_list);
+			if (!cf->is_capsnap)
+				ceph_free_cap_flush(cf);
+		}
+
+		if (!list_empty(&ci->i_dirty_item)) {
+			pr_warn_ratelimited(
+				" dropping dirty %s state for %p %lld\n",
+				ceph_cap_string(ci->i_dirty_caps),
+				inode, ceph_ino(inode));
+			ci->i_dirty_caps = 0;
+			list_del_init(&ci->i_dirty_item);
+			dirty_dropped = true;
+		}
+		if (!list_empty(&ci->i_flushing_item)) {
+			pr_warn_ratelimited(
+				" dropping dirty+flushing %s state for %p %lld\n",
+				ceph_cap_string(ci->i_flushing_caps),
+				inode, ceph_ino(inode));
+			ci->i_flushing_caps = 0;
+			list_del_init(&ci->i_flushing_item);
+			mdsc->num_cap_flushing--;
+			dirty_dropped = true;
+		}
+		spin_unlock(&mdsc->cap_dirty_lock);
+
+		if (dirty_dropped) {
+			mapping_set_error(inode->i_mapping, -EIO);
+
+			if (ci->i_wrbuffer_ref_head == 0 &&
+			    ci->i_wr_ref == 0 &&
+			    ci->i_dirty_caps == 0 &&
+			    ci->i_flushing_caps == 0) {
+				ceph_put_snap_context(ci->i_head_snapc);
+				ci->i_head_snapc = NULL;
+			}
+		}
+
+		if (atomic_read(&ci->i_filelock_ref) > 0) {
+			/* make further file lock syscall return -EIO */
+			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
+					    inode, ceph_ino(inode));
+		}
+
+		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
+			cf = ci->i_prealloc_cap_flush;
+			ci->i_prealloc_cap_flush = NULL;
+			if (!cf->is_capsnap)
+				ceph_free_cap_flush(cf);
+		}
+
+		if (!list_empty(&ci->i_cap_snaps))
+			iputs = remove_capsnaps(mdsc, inode);
+	}
+	if (dirty_dropped)
+		++iputs;
+	return iputs;
+}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 38b78b45..3cf7c9c 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -146,82 +146,93 @@
 		   name, total, avg, _min, max, sum);			\
 }
 
-static int metric_show(struct seq_file *s, void *p)
+static int metrics_file_show(struct seq_file *s, void *p)
 {
 	struct ceph_fs_client *fsc = s->private;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_client_metric *m = &mdsc->metric;
-	int nr_caps = 0;
-	s64 total, sum, avg, min, max, sq;
-	u64 sum_sz, avg_sz, min_sz, max_sz;
+	struct ceph_client_metric *m = &fsc->mdsc->metric;
 
-	sum = percpu_counter_sum(&m->total_inodes);
 	seq_printf(s, "item                               total\n");
 	seq_printf(s, "------------------------------------------\n");
-	seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
-		   atomic64_read(&m->opened_files), sum);
-	seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
-		   atomic64_read(&m->total_caps), sum);
-	seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
-		   percpu_counter_sum(&m->opened_inodes), sum);
+	seq_printf(s, "%-35s%lld\n", "total inodes",
+		   percpu_counter_sum(&m->total_inodes));
+	seq_printf(s, "%-35s%lld\n", "opened files",
+		   atomic64_read(&m->opened_files));
+	seq_printf(s, "%-35s%lld\n", "pinned i_caps",
+		   atomic64_read(&m->total_caps));
+	seq_printf(s, "%-35s%lld\n", "opened inodes",
+		   percpu_counter_sum(&m->opened_inodes));
+	return 0;
+}
 
-	seq_printf(s, "\n");
+static const char * const metric_str[] = {
+	"read",
+	"write",
+	"metadata",
+	"copyfrom"
+};
+static int metrics_latency_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client_metric *cm = &fsc->mdsc->metric;
+	struct ceph_metric *m;
+	s64 total, sum, avg, min, max, sq;
+	int i;
+
 	seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
 	seq_printf(s, "-----------------------------------------------------------------------------------\n");
 
-	spin_lock(&m->read_metric_lock);
-	total = m->total_reads;
-	sum = m->read_latency_sum;
-	avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
-	min = m->read_latency_min;
-	max = m->read_latency_max;
-	sq = m->read_latency_sq_sum;
-	spin_unlock(&m->read_metric_lock);
-	CEPH_LAT_METRIC_SHOW("read", total, avg, min, max, sq);
+	for (i = 0; i < METRIC_MAX; i++) {
+		m = &cm->metric[i];
+		spin_lock(&m->lock);
+		total = m->total;
+		sum = m->latency_sum;
+		avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
+		min = m->latency_min;
+		max = m->latency_max;
+		sq = m->latency_sq_sum;
+		spin_unlock(&m->lock);
+		CEPH_LAT_METRIC_SHOW(metric_str[i], total, avg, min, max, sq);
+	}
 
-	spin_lock(&m->write_metric_lock);
-	total = m->total_writes;
-	sum = m->write_latency_sum;
-	avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
-	min = m->write_latency_min;
-	max = m->write_latency_max;
-	sq = m->write_latency_sq_sum;
-	spin_unlock(&m->write_metric_lock);
-	CEPH_LAT_METRIC_SHOW("write", total, avg, min, max, sq);
+	return 0;
+}
 
-	spin_lock(&m->metadata_metric_lock);
-	total = m->total_metadatas;
-	sum = m->metadata_latency_sum;
-	avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
-	min = m->metadata_latency_min;
-	max = m->metadata_latency_max;
-	sq = m->metadata_latency_sq_sum;
-	spin_unlock(&m->metadata_metric_lock);
-	CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, sq);
+static int metrics_size_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client_metric *cm = &fsc->mdsc->metric;
+	struct ceph_metric *m;
+	s64 total;
+	u64 sum, avg, min, max;
+	int i;
 
-	seq_printf(s, "\n");
 	seq_printf(s, "item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)\n");
 	seq_printf(s, "----------------------------------------------------------------------------------------\n");
 
-	spin_lock(&m->read_metric_lock);
-	total = m->total_reads;
-	sum_sz = m->read_size_sum;
-	avg_sz = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum_sz, total) : 0;
-	min_sz = m->read_size_min;
-	max_sz = m->read_size_max;
-	spin_unlock(&m->read_metric_lock);
-	CEPH_SZ_METRIC_SHOW("read", total, avg_sz, min_sz, max_sz, sum_sz);
+	for (i = 0; i < METRIC_MAX; i++) {
+		/* skip 'metadata' as it doesn't use the size metric */
+		if (i == METRIC_METADATA)
+			continue;
+		m = &cm->metric[i];
+		spin_lock(&m->lock);
+		total = m->total;
+		sum = m->size_sum;
+		avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
+		min = m->size_min;
+		max = m->size_max;
+		spin_unlock(&m->lock);
+		CEPH_SZ_METRIC_SHOW(metric_str[i], total, avg, min, max, sum);
+	}
 
-	spin_lock(&m->write_metric_lock);
-	total = m->total_writes;
-	sum_sz = m->write_size_sum;
-	avg_sz = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum_sz, total) : 0;
-	min_sz = m->write_size_min;
-	max_sz = m->write_size_max;
-	spin_unlock(&m->write_metric_lock);
-	CEPH_SZ_METRIC_SHOW("write", total, avg_sz, min_sz, max_sz, sum_sz);
+	return 0;
+}
 
-	seq_printf(s, "\n");
+static int metrics_caps_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client_metric *m = &fsc->mdsc->metric;
+	int nr_caps = 0;
+
 	seq_printf(s, "item          total           miss            hit\n");
 	seq_printf(s, "-------------------------------------------------\n");
 
@@ -350,8 +361,11 @@
 DEFINE_SHOW_ATTRIBUTE(mdsc);
 DEFINE_SHOW_ATTRIBUTE(caps);
 DEFINE_SHOW_ATTRIBUTE(mds_sessions);
-DEFINE_SHOW_ATTRIBUTE(metric);
 DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(metrics_file);
+DEFINE_SHOW_ATTRIBUTE(metrics_latency);
+DEFINE_SHOW_ATTRIBUTE(metrics_size);
+DEFINE_SHOW_ATTRIBUTE(metrics_caps);
 
 
 /*
@@ -385,8 +399,9 @@
 	debugfs_remove(fsc->debugfs_mdsmap);
 	debugfs_remove(fsc->debugfs_mds_sessions);
 	debugfs_remove(fsc->debugfs_caps);
-	debugfs_remove(fsc->debugfs_metric);
+	debugfs_remove(fsc->debugfs_status);
 	debugfs_remove(fsc->debugfs_mdsc);
+	debugfs_remove_recursive(fsc->debugfs_metrics_dir);
 }
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
@@ -426,12 +441,6 @@
 						fsc,
 						&mdsc_fops);
 
-	fsc->debugfs_metric = debugfs_create_file("metrics",
-						  0400,
-						  fsc->client->debugfs_dir,
-						  fsc,
-						  &metric_fops);
-
 	fsc->debugfs_caps = debugfs_create_file("caps",
 						0400,
 						fsc->client->debugfs_dir,
@@ -443,6 +452,18 @@
 						  fsc->client->debugfs_dir,
 						  fsc,
 						  &status_fops);
+
+	fsc->debugfs_metrics_dir = debugfs_create_dir("metrics",
+						      fsc->client->debugfs_dir);
+
+	debugfs_create_file("file", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_file_fops);
+	debugfs_create_file("latency", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_latency_fops);
+	debugfs_create_file("size", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_size_fops);
+	debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_caps_fops);
 }
 
 
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 1d65934..e0fa66ac 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -157,6 +157,11 @@
 		ceph_mdsc_put_request(req);
 		if (!inode)
 			return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
+	} else {
+		if (ceph_inode_is_shutdown(inode)) {
+			iput(inode);
+			return ERR_PTR(-ESTALE);
+		}
 	}
 	return inode;
 }
@@ -223,8 +228,13 @@
 		return ERR_PTR(-ESTALE);
 
 	inode = ceph_find_inode(sb, vino);
-	if (inode)
+	if (inode) {
+		if (ceph_inode_is_shutdown(inode)) {
+			iput(inode);
+			return ERR_PTR(-ESTALE);
+		}
 		return d_obtain_alias(inode);
+	}
 
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
 				       USE_ANY_MDS);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b129ea5..02a0a0f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -525,6 +525,7 @@
 
 	if (result) {
 		struct dentry *dentry = req->r_dentry;
+		struct inode *inode = d_inode(dentry);
 		int pathlen = 0;
 		u64 base = 0;
 		char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
@@ -534,7 +535,8 @@
 		if (!d_unhashed(dentry))
 			d_drop(dentry);
 
-		/* FIXME: start returning I/O errors on all accesses? */
+		ceph_inode_shutdown(inode);
+
 		pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n",
 			base, IS_ERR(path) ? "<<bad>>" : path, result);
 		ceph_mdsc_free_path(path, pathlen);
@@ -556,7 +558,7 @@
 		}
 		ceph_kick_flushing_inode_caps(req->r_session, ci);
 		spin_unlock(&ci->i_ceph_lock);
-	} else {
+	} else if (!result) {
 		pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__,
 			req->r_deleg_ino);
 	}
@@ -845,6 +847,7 @@
 	ssize_t ret;
 	u64 off = iocb->ki_pos;
 	u64 len = iov_iter_count(to);
+	u64 i_size;
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
@@ -868,7 +871,6 @@
 		struct page **pages;
 		int num_pages;
 		size_t page_off;
-		u64 i_size;
 		bool more;
 		int idx;
 		size_t left;
@@ -951,11 +953,14 @@
 	}
 
 	if (off > iocb->ki_pos) {
-		if (ret >= 0 &&
-		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
+		if (off >= i_size) {
 			*retry_op = CHECK_EOF;
-		ret = off - iocb->ki_pos;
-		iocb->ki_pos = off;
+			ret = i_size - iocb->ki_pos;
+			iocb->ki_pos = i_size;
+		} else {
+			ret = off - iocb->ki_pos;
+			iocb->ki_pos = off;
+		}
 	}
 
 	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
@@ -1526,6 +1531,9 @@
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
 	     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
 
+	if (ceph_inode_is_shutdown(inode))
+		return -ESTALE;
+
 	if (direct_lock)
 		ceph_start_io_direct(inode);
 	else
@@ -1678,6 +1686,9 @@
 	loff_t pos;
 	loff_t limit = max(i_size_read(inode), fsc->max_file_size);
 
+	if (ceph_inode_is_shutdown(inode))
+		return -ESTALE;
+
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
@@ -2200,6 +2211,54 @@
 	return 0;
 }
 
+static struct ceph_osd_request *
+ceph_alloc_copyfrom_request(struct ceph_osd_client *osdc,
+			    u64 src_snapid,
+			    struct ceph_object_id *src_oid,
+			    struct ceph_object_locator *src_oloc,
+			    struct ceph_object_id *dst_oid,
+			    struct ceph_object_locator *dst_oloc,
+			    u32 truncate_seq, u64 truncate_size)
+{
+	struct ceph_osd_request *req;
+	int ret;
+	u32 src_fadvise_flags =
+		CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+		CEPH_OSD_OP_FLAG_FADVISE_NOCACHE;
+	u32 dst_fadvise_flags =
+		CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+		CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
+
+	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+	ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+	ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+	ret = osd_req_op_copy_from_init(req, src_snapid, 0,
+					src_oid, src_oloc,
+					src_fadvise_flags,
+					dst_fadvise_flags,
+					truncate_seq,
+					truncate_size,
+					CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+	if (ret)
+		goto out;
+
+	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	return req;
+
+out:
+	ceph_osdc_put_request(req);
+	return ERR_PTR(ret);
+}
+
 static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off,
 				    struct ceph_inode_info *dst_ci, u64 *dst_off,
 				    struct ceph_fs_client *fsc,
@@ -2207,6 +2266,8 @@
 {
 	struct ceph_object_locator src_oloc, dst_oloc;
 	struct ceph_object_id src_oid, dst_oid;
+	struct ceph_osd_client *osdc;
+	struct ceph_osd_request *req;
 	size_t bytes = 0;
 	u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
 	u32 src_objlen, dst_objlen;
@@ -2217,6 +2278,7 @@
 	src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
 	dst_oloc.pool = dst_ci->i_layout.pool_id;
 	dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
+	osdc = &fsc->client->osdc;
 
 	while (len >= object_size) {
 		ceph_calc_file_object_mapping(&src_ci->i_layout, *src_off,
@@ -2232,17 +2294,22 @@
 		ceph_oid_printf(&dst_oid, "%llx.%08llx",
 				dst_ci->i_vino.ino, dst_objnum);
 		/* Do an object remote copy */
-		ret = ceph_osdc_copy_from(&fsc->client->osdc,
-					  src_ci->i_vino.snap, 0,
-					  &src_oid, &src_oloc,
-					  CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-					  CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
-					  &dst_oid, &dst_oloc,
-					  CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-					  CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
-					  dst_ci->i_truncate_seq,
-					  dst_ci->i_truncate_size,
-					  CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+		req = ceph_alloc_copyfrom_request(osdc, src_ci->i_vino.snap,
+						  &src_oid, &src_oloc,
+						  &dst_oid, &dst_oloc,
+						  dst_ci->i_truncate_seq,
+						  dst_ci->i_truncate_size);
+		if (IS_ERR(req))
+			ret = PTR_ERR(req);
+		else {
+			ceph_osdc_start_request(osdc, req, false);
+			ret = ceph_osdc_wait_request(osdc, req);
+			ceph_update_copyfrom_metrics(&fsc->mdsc->metric,
+						     req->r_start_latency,
+						     req->r_end_latency,
+						     object_size, ret);
+			ceph_osdc_put_request(req);
+		}
 		if (ret) {
 			if (ret == -EOPNOTSUPP) {
 				fsc->have_copy_from2 = false;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1c75741..e3322fc 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1841,15 +1841,14 @@
 static void ceph_do_invalidate_pages(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	u32 orig_gen;
 	int check = 0;
 
 	mutex_lock(&ci->i_truncate_mutex);
 
-	if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-		pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
-				    inode, ceph_ino(inode));
+	if (ceph_inode_is_shutdown(inode)) {
+		pr_warn_ratelimited("%s: inode %llx.%llx is shut down\n",
+				    __func__, ceph_vinop(inode));
 		mapping_set_error(inode->i_mapping, -EIO);
 		truncate_pagecache(inode, 0);
 		mutex_unlock(&ci->i_truncate_mutex);
@@ -1871,7 +1870,8 @@
 
 	ceph_fscache_invalidate(inode);
 	if (invalidate_inode_pages2(inode->i_mapping) < 0) {
-		pr_err("invalidate_pages %p fails\n", inode);
+		pr_err("invalidate_inode_pages2 %llx.%llx failed\n",
+		       ceph_vinop(inode));
 	}
 
 	spin_lock(&ci->i_ceph_lock);
@@ -2103,12 +2103,14 @@
 		loff_t isize = i_size_read(inode);
 
 		dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
-		if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size > isize) {
-			i_size_write(inode, attr->ia_size);
-			inode->i_blocks = calc_inode_blocks(attr->ia_size);
-			ci->i_reported_size = attr->ia_size;
-			dirtied |= CEPH_CAP_FILE_EXCL;
-			ia_valid |= ATTR_MTIME;
+		if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
+			if (attr->ia_size > isize) {
+				i_size_write(inode, attr->ia_size);
+				inode->i_blocks = calc_inode_blocks(attr->ia_size);
+				ci->i_reported_size = attr->ia_size;
+				dirtied |= CEPH_CAP_FILE_EXCL;
+				ia_valid |= ATTR_MTIME;
+			}
 		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
 			   attr->ia_size != isize) {
 			req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
@@ -2217,6 +2219,9 @@
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
+	if (ceph_inode_is_shutdown(inode))
+		return -ESTALE;
+
 	err = setattr_prepare(&init_user_ns, dentry, attr);
 	if (err != 0)
 		return err;
@@ -2347,6 +2352,9 @@
 	u32 valid_mask = STATX_BASIC_STATS;
 	int err = 0;
 
+	if (ceph_inode_is_shutdown(inode))
+		return -ESTALE;
+
 	/* Skip the getattr altogether if we're asked not to sync */
 	if (!(flags & AT_STATX_DONT_SYNC)) {
 		err = ceph_do_getattr(inode,
@@ -2394,3 +2402,27 @@
 	stat->result_mask = request_mask & valid_mask;
 	return err;
 }
+
+void ceph_inode_shutdown(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct rb_node *p;
+	int iputs = 0;
+	bool invalidate = false;
+
+	spin_lock(&ci->i_ceph_lock);
+	ci->i_ceph_flags |= CEPH_I_SHUTDOWN;
+	p = rb_first(&ci->i_caps);
+	while (p) {
+		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
+
+		p = rb_next(p);
+		iputs += ceph_purge_inode_cap(inode, cap, &invalidate);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (invalidate)
+		ceph_queue_invalidate(inode);
+	while (iputs--)
+		iput(inode);
+}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index d8c3106..d1f154a 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -241,6 +241,9 @@
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
 
+	if (ceph_inode_is_shutdown(inode))
+		return -ESTALE;
+
 	dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
 
 	/* set wait bit as appropriate, then make command as Ceph expects it*/
@@ -303,6 +306,9 @@
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
 
+	if (ceph_inode_is_shutdown(inode))
+		return -ESTALE;
+
 	dout("ceph_flock, fl_file: %p\n", fl->fl_file);
 
 	spin_lock(&ci->i_ceph_lock);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d64413a..250aad3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1590,129 +1590,23 @@
 	return ret;
 }
 
-static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap_snap *capsnap;
-	int capsnap_release = 0;
-
-	lockdep_assert_held(&ci->i_ceph_lock);
-
-	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
-
-	while (!list_empty(&ci->i_cap_snaps)) {
-		capsnap = list_first_entry(&ci->i_cap_snaps,
-					   struct ceph_cap_snap, ci_item);
-		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
-		ceph_put_snap_context(capsnap->context);
-		ceph_put_cap_snap(capsnap);
-		capsnap_release++;
-	}
-	wake_up_all(&ci->i_cap_wq);
-	wake_up_all(&mdsc->cap_flushing_wq);
-	return capsnap_release;
-}
-
 static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 				  void *arg)
 {
-	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	LIST_HEAD(to_remove);
-	bool dirty_dropped = false;
 	bool invalidate = false;
-	int capsnap_release = 0;
+	int iputs;
 
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
 	spin_lock(&ci->i_ceph_lock);
-	__ceph_remove_cap(cap, false);
-	if (!ci->i_auth_cap) {
-		struct ceph_cap_flush *cf;
-
-		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-			if (inode->i_data.nrpages > 0)
-				invalidate = true;
-			if (ci->i_wrbuffer_ref > 0)
-				mapping_set_error(&inode->i_data, -EIO);
-		}
-
-		while (!list_empty(&ci->i_cap_flush_list)) {
-			cf = list_first_entry(&ci->i_cap_flush_list,
-					      struct ceph_cap_flush, i_list);
-			list_move(&cf->i_list, &to_remove);
-		}
-
-		spin_lock(&mdsc->cap_dirty_lock);
-
-		list_for_each_entry(cf, &to_remove, i_list)
-			list_del_init(&cf->g_list);
-
-		if (!list_empty(&ci->i_dirty_item)) {
-			pr_warn_ratelimited(
-				" dropping dirty %s state for %p %lld\n",
-				ceph_cap_string(ci->i_dirty_caps),
-				inode, ceph_ino(inode));
-			ci->i_dirty_caps = 0;
-			list_del_init(&ci->i_dirty_item);
-			dirty_dropped = true;
-		}
-		if (!list_empty(&ci->i_flushing_item)) {
-			pr_warn_ratelimited(
-				" dropping dirty+flushing %s state for %p %lld\n",
-				ceph_cap_string(ci->i_flushing_caps),
-				inode, ceph_ino(inode));
-			ci->i_flushing_caps = 0;
-			list_del_init(&ci->i_flushing_item);
-			mdsc->num_cap_flushing--;
-			dirty_dropped = true;
-		}
-		spin_unlock(&mdsc->cap_dirty_lock);
-
-		if (dirty_dropped) {
-			mapping_set_error(inode->i_mapping, -EIO);
-
-			if (ci->i_wrbuffer_ref_head == 0 &&
-			    ci->i_wr_ref == 0 &&
-			    ci->i_dirty_caps == 0 &&
-			    ci->i_flushing_caps == 0) {
-				ceph_put_snap_context(ci->i_head_snapc);
-				ci->i_head_snapc = NULL;
-			}
-		}
-
-		if (atomic_read(&ci->i_filelock_ref) > 0) {
-			/* make further file lock syscall return -EIO */
-			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
-			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
-					    inode, ceph_ino(inode));
-		}
-
-		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
-			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
-			ci->i_prealloc_cap_flush = NULL;
-		}
-
-		if (!list_empty(&ci->i_cap_snaps))
-			capsnap_release = remove_capsnaps(mdsc, inode);
-	}
+	iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
 	spin_unlock(&ci->i_ceph_lock);
-	while (!list_empty(&to_remove)) {
-		struct ceph_cap_flush *cf;
-		cf = list_first_entry(&to_remove,
-				      struct ceph_cap_flush, i_list);
-		list_del_init(&cf->i_list);
-		if (!cf->is_capsnap)
-			ceph_free_cap_flush(cf);
-	}
 
 	wake_up_all(&ci->i_cap_wq);
 	if (invalidate)
 		ceph_queue_invalidate(inode);
-	if (dirty_dropped)
-		iput(inode);
-	while (capsnap_release--)
+	while (iputs--)
 		iput(inode);
 	return 0;
 }
@@ -3467,9 +3361,14 @@
 
 	if (msg_version >= 3) {
 		u32 len;
-		/* version >= 2, metadata */
-		if (__decode_session_metadata(&p, end, &blocklisted) < 0)
+		/* version >= 2 and < 5, decode metadata, skip otherwise
+		 * as it's handled via flags.
+		 */
+		if (msg_version >= 5)
+			ceph_decode_skip_map(&p, end, string, string, bad);
+		else if (__decode_session_metadata(&p, end, &blocklisted) < 0)
 			goto bad;
+
 		/* version >= 3, feature bits */
 		ceph_decode_32_safe(&p, end, len, bad);
 		if (len) {
@@ -3478,6 +3377,18 @@
 		}
 	}
 
+	if (msg_version >= 5) {
+		u32 flags;
+		/* version >= 4, struct_v, struct_cv, len, metric_spec */
+	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
+		/* version >= 5, flags   */
+                ceph_decode_32_safe(&p, end, flags, bad);
+		if (flags & CEPH_SESSION_BLOCKLISTED) {
+		        pr_warn("mds%d session blocklisted\n", session->s_mds);
+			blocklisted = true;
+		}
+	}
+
 	mutex_lock(&mdsc->mutex);
 	if (op == CEPH_SESSION_CLOSE) {
 		ceph_get_mds_session(session);
@@ -5072,7 +4983,8 @@
 	return;
 
 bad:
-	pr_err("error decoding fsmap\n");
+	pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
+	ceph_umount_begin(mdsc->fsc->sb);
 err_out:
 	mutex_lock(&mdsc->mutex);
 	mdsc->mdsmap_err = err;
@@ -5139,7 +5051,8 @@
 bad_unlock:
 	mutex_unlock(&mdsc->mutex);
 bad:
-	pr_err("error decoding mdsmap %d\n", err);
+	pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
+	ceph_umount_begin(mdsc->fsc->sb);
 	return;
 }
 
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 61d67cb..3038773 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -263,10 +263,6 @@
 				goto nomem;
 			for (j = 0; j < num_export_targets; j++) {
 				target = ceph_decode_32(&pexport_targets);
-				if (target >= m->possible_max_rank) {
-					err = -EIO;
-					goto corrupt;
-				}
 				info->export_targets[j] = target;
 			}
 		} else {
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index 04d5df2..c57699d 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -62,7 +62,7 @@
 	read->header.ver = 1;
 	read->header.compat = 1;
 	read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
-	sum = m->read_latency_sum;
+	sum = m->metric[METRIC_READ].latency_sum;
 	jiffies_to_timespec64(sum, &ts);
 	read->sec = cpu_to_le32(ts.tv_sec);
 	read->nsec = cpu_to_le32(ts.tv_nsec);
@@ -74,7 +74,7 @@
 	write->header.ver = 1;
 	write->header.compat = 1;
 	write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
-	sum = m->write_latency_sum;
+	sum = m->metric[METRIC_WRITE].latency_sum;
 	jiffies_to_timespec64(sum, &ts);
 	write->sec = cpu_to_le32(ts.tv_sec);
 	write->nsec = cpu_to_le32(ts.tv_nsec);
@@ -86,7 +86,7 @@
 	meta->header.ver = 1;
 	meta->header.compat = 1;
 	meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
-	sum = m->metadata_latency_sum;
+	sum = m->metric[METRIC_METADATA].latency_sum;
 	jiffies_to_timespec64(sum, &ts);
 	meta->sec = cpu_to_le32(ts.tv_sec);
 	meta->nsec = cpu_to_le32(ts.tv_nsec);
@@ -141,8 +141,8 @@
 	rsize->header.ver = 1;
 	rsize->header.compat = 1;
 	rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
-	rsize->total_ops = cpu_to_le64(m->total_reads);
-	rsize->total_size = cpu_to_le64(m->read_size_sum);
+	rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
+	rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
 	items++;
 
 	/* encode the write io size metric */
@@ -151,8 +151,8 @@
 	wsize->header.ver = 1;
 	wsize->header.compat = 1;
 	wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
-	wsize->total_ops = cpu_to_le64(m->total_writes);
-	wsize->total_size = cpu_to_le64(m->write_size_sum);
+	wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
+	wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
 	items++;
 
 	put_unaligned_le32(items, &head->num);
@@ -220,7 +220,8 @@
 
 int ceph_metric_init(struct ceph_client_metric *m)
 {
-	int ret;
+	struct ceph_metric *metric;
+	int ret, i;
 
 	if (!m)
 		return -EINVAL;
@@ -243,32 +244,18 @@
 	if (ret)
 		goto err_i_caps_mis;
 
-	spin_lock_init(&m->read_metric_lock);
-	m->read_latency_sq_sum = 0;
-	m->read_latency_min = KTIME_MAX;
-	m->read_latency_max = 0;
-	m->total_reads = 0;
-	m->read_latency_sum = 0;
-	m->read_size_min = U64_MAX;
-	m->read_size_max = 0;
-	m->read_size_sum = 0;
-
-	spin_lock_init(&m->write_metric_lock);
-	m->write_latency_sq_sum = 0;
-	m->write_latency_min = KTIME_MAX;
-	m->write_latency_max = 0;
-	m->total_writes = 0;
-	m->write_latency_sum = 0;
-	m->write_size_min = U64_MAX;
-	m->write_size_max = 0;
-	m->write_size_sum = 0;
-
-	spin_lock_init(&m->metadata_metric_lock);
-	m->metadata_latency_sq_sum = 0;
-	m->metadata_latency_min = KTIME_MAX;
-	m->metadata_latency_max = 0;
-	m->total_metadatas = 0;
-	m->metadata_latency_sum = 0;
+	for (i = 0; i < METRIC_MAX; i++) {
+		metric = &m->metric[i];
+		spin_lock_init(&metric->lock);
+		metric->size_sum = 0;
+		metric->size_min = U64_MAX;
+		metric->size_max = 0;
+		metric->total = 0;
+		metric->latency_sum = 0;
+		metric->latency_sq_sum = 0;
+		metric->latency_min = KTIME_MAX;
+		metric->latency_max = 0;
+	}
 
 	atomic64_set(&m->opened_files, 0);
 	ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL);
@@ -338,9 +325,9 @@
 	*sq_sump += sq;
 }
 
-void ceph_update_read_metrics(struct ceph_client_metric *m,
-			      ktime_t r_start, ktime_t r_end,
-			      unsigned int size, int rc)
+void ceph_update_metrics(struct ceph_metric *m,
+			 ktime_t r_start, ktime_t r_end,
+			 unsigned int size, int rc)
 {
 	ktime_t lat = ktime_sub(r_end, r_start);
 	ktime_t total;
@@ -348,63 +335,12 @@
 	if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
 		return;
 
-	spin_lock(&m->read_metric_lock);
-	total = ++m->total_reads;
-	m->read_size_sum += size;
-	m->read_latency_sum += lat;
-	METRIC_UPDATE_MIN_MAX(m->read_size_min,
-			      m->read_size_max,
-			      size);
-	METRIC_UPDATE_MIN_MAX(m->read_latency_min,
-			      m->read_latency_max,
-			      lat);
-	__update_stdev(total, m->read_latency_sum,
-		       &m->read_latency_sq_sum, lat);
-	spin_unlock(&m->read_metric_lock);
-}
-
-void ceph_update_write_metrics(struct ceph_client_metric *m,
-			       ktime_t r_start, ktime_t r_end,
-			       unsigned int size, int rc)
-{
-	ktime_t lat = ktime_sub(r_end, r_start);
-	ktime_t total;
-
-	if (unlikely(rc && rc != -ETIMEDOUT))
-		return;
-
-	spin_lock(&m->write_metric_lock);
-	total = ++m->total_writes;
-	m->write_size_sum += size;
-	m->write_latency_sum += lat;
-	METRIC_UPDATE_MIN_MAX(m->write_size_min,
-			      m->write_size_max,
-			      size);
-	METRIC_UPDATE_MIN_MAX(m->write_latency_min,
-			      m->write_latency_max,
-			      lat);
-	__update_stdev(total, m->write_latency_sum,
-		       &m->write_latency_sq_sum, lat);
-	spin_unlock(&m->write_metric_lock);
-}
-
-void ceph_update_metadata_metrics(struct ceph_client_metric *m,
-				  ktime_t r_start, ktime_t r_end,
-				  int rc)
-{
-	ktime_t lat = ktime_sub(r_end, r_start);
-	ktime_t total;
-
-	if (unlikely(rc && rc != -ENOENT))
-		return;
-
-	spin_lock(&m->metadata_metric_lock);
-	total = ++m->total_metadatas;
-	m->metadata_latency_sum += lat;
-	METRIC_UPDATE_MIN_MAX(m->metadata_latency_min,
-			      m->metadata_latency_max,
-			      lat);
-	__update_stdev(total, m->metadata_latency_sum,
-		       &m->metadata_latency_sq_sum, lat);
-	spin_unlock(&m->metadata_metric_lock);
+	spin_lock(&m->lock);
+	total = ++m->total;
+	m->size_sum += size;
+	METRIC_UPDATE_MIN_MAX(m->size_min, m->size_max, size);
+	m->latency_sum += lat;
+	METRIC_UPDATE_MIN_MAX(m->latency_min, m->latency_max, lat);
+	__update_stdev(total, m->latency_sum, &m->latency_sq_sum, lat);
+	spin_unlock(&m->lock);
 }
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index 0133955..bb45608 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -125,6 +125,26 @@
 	__le32 num;	/* the number of metrics that will be sent */
 } __packed;
 
+enum metric_type {
+	METRIC_READ,
+	METRIC_WRITE,
+	METRIC_METADATA,
+	METRIC_COPYFROM,
+	METRIC_MAX
+};
+
+struct ceph_metric {
+	spinlock_t lock;
+	u64 total;
+	u64 size_sum;
+	u64 size_min;
+	u64 size_max;
+	ktime_t latency_sum;
+	ktime_t latency_sq_sum;
+	ktime_t latency_min;
+	ktime_t latency_max;
+};
+
 /* This is the global metrics */
 struct ceph_client_metric {
 	atomic64_t            total_dentries;
@@ -135,32 +155,7 @@
 	struct percpu_counter i_caps_hit;
 	struct percpu_counter i_caps_mis;
 
-	spinlock_t read_metric_lock;
-	u64 total_reads;
-	u64 read_size_sum;
-	u64 read_size_min;
-	u64 read_size_max;
-	ktime_t read_latency_sum;
-	ktime_t read_latency_sq_sum;
-	ktime_t read_latency_min;
-	ktime_t read_latency_max;
-
-	spinlock_t write_metric_lock;
-	u64 total_writes;
-	u64 write_size_sum;
-	u64 write_size_min;
-	u64 write_size_max;
-	ktime_t write_latency_sum;
-	ktime_t write_latency_sq_sum;
-	ktime_t write_latency_min;
-	ktime_t write_latency_max;
-
-	spinlock_t metadata_metric_lock;
-	u64 total_metadatas;
-	ktime_t metadata_latency_sum;
-	ktime_t metadata_latency_sq_sum;
-	ktime_t metadata_latency_min;
-	ktime_t metadata_latency_max;
+	struct ceph_metric metric[METRIC_MAX];
 
 	/* The total number of directories and files that are opened */
 	atomic64_t opened_files;
@@ -195,13 +190,36 @@
 	percpu_counter_inc(&m->i_caps_mis);
 }
 
-extern void ceph_update_read_metrics(struct ceph_client_metric *m,
-				     ktime_t r_start, ktime_t r_end,
-				     unsigned int size, int rc);
-extern void ceph_update_write_metrics(struct ceph_client_metric *m,
-				      ktime_t r_start, ktime_t r_end,
-				      unsigned int size, int rc);
-extern void ceph_update_metadata_metrics(struct ceph_client_metric *m,
-				         ktime_t r_start, ktime_t r_end,
-					 int rc);
+extern void ceph_update_metrics(struct ceph_metric *m,
+				ktime_t r_start, ktime_t r_end,
+				unsigned int size, int rc);
+
+static inline void ceph_update_read_metrics(struct ceph_client_metric *m,
+					    ktime_t r_start, ktime_t r_end,
+					    unsigned int size, int rc)
+{
+	ceph_update_metrics(&m->metric[METRIC_READ],
+			    r_start, r_end, size, rc);
+}
+static inline void ceph_update_write_metrics(struct ceph_client_metric *m,
+					     ktime_t r_start, ktime_t r_end,
+					     unsigned int size, int rc)
+{
+	ceph_update_metrics(&m->metric[METRIC_WRITE],
+			    r_start, r_end, size, rc);
+}
+static inline void ceph_update_metadata_metrics(struct ceph_client_metric *m,
+						ktime_t r_start, ktime_t r_end,
+						int rc)
+{
+	ceph_update_metrics(&m->metric[METRIC_METADATA],
+			    r_start, r_end, 0, rc);
+}
+static inline void ceph_update_copyfrom_metrics(struct ceph_client_metric *m,
+						ktime_t r_start, ktime_t r_end,
+						unsigned int size, int rc)
+{
+	ceph_update_metrics(&m->metric[METRIC_COPYFROM],
+			    r_start, r_end, size, rc);
+}
 #endif /* _FS_CEPH_MDS_METRIC_H */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index fd8742b..bab6123 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -52,8 +52,7 @@
 	struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
 	struct ceph_mon_client *monc = &fsc->client->monc;
 	struct ceph_statfs st;
-	u64 fsid;
-	int err;
+	int i, err;
 	u64 data_pool;
 
 	if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
@@ -99,12 +98,14 @@
 	buf->f_namelen = NAME_MAX;
 
 	/* Must convert the fsid, for consistent values across arches */
+	buf->f_fsid.val[0] = 0;
 	mutex_lock(&monc->mutex);
-	fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^
-	       le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1));
+	for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
+		buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
 	mutex_unlock(&monc->mutex);
 
-	buf->f_fsid = u64_to_fsid(fsid);
+	/* fold the fs_cluster_id into the upper bits */
+	buf->f_fsid.val[1] = monc->fs_cluster_id;
 
 	return 0;
 }
@@ -577,8 +578,8 @@
 	if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
 		seq_show_option(m, "recover_session", "clean");
 
-	if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
-		seq_puts(m, ",nowsync");
+	if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
+		seq_puts(m, ",wsync");
 
 	if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
 		seq_printf(m, ",wsize=%u", fsopt->wsize);
@@ -842,7 +843,7 @@
  * ceph_umount_begin - initiate forced umount.  Tear down the
  * mount, skipping steps that may hang while waiting for server(s).
  */
-static void ceph_umount_begin(struct super_block *sb)
+void ceph_umount_begin(struct super_block *sb)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 14f951c..ac331aa 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -48,7 +48,8 @@
 
 #define CEPH_MOUNT_OPT_DEFAULT			\
 	(CEPH_MOUNT_OPT_DCACHE |		\
-	 CEPH_MOUNT_OPT_NOCOPYFROM)
+	 CEPH_MOUNT_OPT_NOCOPYFROM |		\
+	 CEPH_MOUNT_OPT_ASYNC_DIROPS)
 
 #define ceph_set_mount_opt(fsc, opt) \
 	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt
@@ -128,9 +129,9 @@
 	struct dentry *debugfs_congestion_kb;
 	struct dentry *debugfs_bdi;
 	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
-	struct dentry *debugfs_metric;
 	struct dentry *debugfs_status;
 	struct dentry *debugfs_mds_sessions;
+	struct dentry *debugfs_metrics_dir;
 #endif
 
 #ifdef CONFIG_CEPH_FSCACHE
@@ -580,6 +581,7 @@
 #define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
 #define CEPH_ASYNC_CREATE_BIT	(12)	  /* async create in flight for this */
 #define CEPH_I_ASYNC_CREATE	(1 << CEPH_ASYNC_CREATE_BIT)
+#define CEPH_I_SHUTDOWN		(1 << 13) /* inode is no longer usable */
 
 /*
  * Masks of ceph inode work.
@@ -939,6 +941,7 @@
 				struct ceph_snapid_map *sm);
 extern void ceph_trim_snapid_map(struct ceph_mds_client *mdsc);
 extern void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc);
+void ceph_umount_begin(struct super_block *sb);
 
 
 /*
@@ -1027,6 +1030,16 @@
 extern int ceph_getattr(struct user_namespace *mnt_userns,
 			const struct path *path, struct kstat *stat,
 			u32 request_mask, unsigned int flags);
+void ceph_inode_shutdown(struct inode *inode);
+
+static inline bool ceph_inode_is_shutdown(struct inode *inode)
+{
+	unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	int state = READ_ONCE(fsc->mount_state);
+
+	return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN;
+}
 
 /* xattr.c */
 int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
@@ -1198,6 +1211,7 @@
 extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
 extern int ceph_pool_perm_check(struct inode *inode, int need);
 extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
+int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate);
 
 /* file.c */
 extern const struct file_operations ceph_file_fops;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index de2c12b..d282caf 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -271,7 +271,8 @@
 	c = 0;
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
-		if (server->is_channel)
+		/* channel info will be printed as a part of sessions below */
+		if (CIFS_SERVER_IS_CHAN(server))
 			continue;
 
 		c++;
@@ -358,6 +359,8 @@
 			seq_printf(m, " signed");
 		if (server->posix_ext_supported)
 			seq_printf(m, " posix");
+		if (server->nosharesock)
+			seq_printf(m, " nosharesock");
 
 		if (server->rdma)
 			seq_printf(m, "\nRDMA ");
@@ -412,12 +415,14 @@
 				   from_kuid(&init_user_ns, ses->linux_uid),
 				   from_kuid(&init_user_ns, ses->cred_uid));
 
+			spin_lock(&ses->chan_lock);
 			if (ses->chan_count > 1) {
 				seq_printf(m, "\n\n\tExtra Channels: %zu ",
 					   ses->chan_count-1);
 				for (j = 1; j < ses->chan_count; j++)
 					cifs_dump_channel(m, j, &ses->chans[j]);
 			}
+			spin_unlock(&ses->chan_lock);
 
 			seq_puts(m, "\n\n\tShares: ");
 			j = 0;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 007427b..b0864da 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -307,12 +307,8 @@
 static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
 {
 	struct cifs_sb_info *cifs_sb;
-	struct cifs_ses *ses;
-	struct cifs_tcon *tcon;
 	void *page;
-	char *full_path, *root_path;
-	unsigned int xid;
-	int rc;
+	char *full_path;
 	struct vfsmount *mnt;
 
 	cifs_dbg(FYI, "in %s\n", __func__);
@@ -324,8 +320,6 @@
 	 * the double backslashes usually used in the UNC. This function
 	 * gives us the latter, so we must adjust the result.
 	 */
-	mnt = ERR_PTR(-ENOMEM);
-
 	cifs_sb = CIFS_SB(mntpt->d_sb);
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) {
 		mnt = ERR_PTR(-EREMOTE);
@@ -341,60 +335,11 @@
 	}
 
 	convert_delimiter(full_path, '\\');
-
 	cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path);
 
-	if (!cifs_sb_master_tlink(cifs_sb)) {
-		cifs_dbg(FYI, "%s: master tlink is NULL\n", __func__);
-		goto free_full_path;
-	}
-
-	tcon = cifs_sb_master_tcon(cifs_sb);
-	if (!tcon) {
-		cifs_dbg(FYI, "%s: master tcon is NULL\n", __func__);
-		goto free_full_path;
-	}
-
-	root_path = kstrdup(tcon->treeName, GFP_KERNEL);
-	if (!root_path) {
-		mnt = ERR_PTR(-ENOMEM);
-		goto free_full_path;
-	}
-	cifs_dbg(FYI, "%s: root path: %s\n", __func__, root_path);
-
-	ses = tcon->ses;
-	xid = get_xid();
-
-	/*
-	 * If DFS root has been expired, then unconditionally fetch it again to
-	 * refresh DFS referral cache.
-	 */
-	rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
-			    root_path + 1, NULL, NULL);
-	if (!rc) {
-		rc = dfs_cache_find(xid, ses, cifs_sb->local_nls,
-				    cifs_remap(cifs_sb), full_path + 1,
-				    NULL, NULL);
-	}
-
-	free_xid(xid);
-
-	if (rc) {
-		mnt = ERR_PTR(rc);
-		goto free_root_path;
-	}
-	/*
-	 * OK - we were able to get and cache a referral for @full_path.
-	 *
-	 * Now, pass it down to cifs_mount() and it will retry every available
-	 * node server in case of failures - no need to do it here.
-	 */
 	mnt = cifs_dfs_do_mount(mntpt, cifs_sb, full_path);
-	cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__,
-		 full_path + 1, mnt);
+	cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__, full_path + 1, mnt);
 
-free_root_path:
-	kfree(root_path);
 free_full_path:
 	free_dentry_path(page);
 cdda_exit:
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index f9740752..013a4bd 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -61,11 +61,6 @@
 	/* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */
 	char *prepath;
 
-	/*
-	 * Canonical DFS path initially provided by the mount call. We might connect to something
-	 * different via DFS but we want to keep it to do failover properly.
-	 */
-	char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */
 	/* randomly generated 128-bit number for indexing dfs mount groups in referral cache */
 	uuid_t dfs_mount_id;
 	/*
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index 12bde7b..23a1ed2 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -393,26 +393,14 @@
 
 static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const char *name, int state)
 {
-	int i;
-
 	switch (state) {
 	case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
 		cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
-		for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
-			spin_lock(&GlobalMid_Lock);
-			if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
-				swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
-			spin_unlock(&GlobalMid_Lock);
-		}
+		cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
 		break;
 	case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
 		cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
-		for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
-			spin_lock(&GlobalMid_Lock);
-			if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
-				swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
-			spin_unlock(&GlobalMid_Lock);
-		}
+		cifs_ses_mark_for_reconnect(swnreg->tcon->ses);
 		break;
 	case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
 		cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index b50da19..9e5d9e1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,5 +152,5 @@
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.33"
+#define CIFS_VERSION   "2.34"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index abff31d..be74606 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
+#include <linux/utsname.h>
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
 #include <crypto/internal/hash.h>
@@ -75,7 +76,8 @@
 #define SMB_ECHO_INTERVAL_MAX 600
 #define SMB_ECHO_INTERVAL_DEFAULT 60
 
-/* dns resolution interval in seconds */
+/* dns resolution intervals in seconds */
+#define SMB_DNS_RESOLVE_INTERVAL_MIN     120
 #define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600
 
 /* maximum number of PDUs in one compound */
@@ -99,6 +101,8 @@
 #define XATTR_DOS_ATTRIB "user.DOSATTRIB"
 #endif
 
+#define CIFS_MAX_WORKSTATION_LEN  (__NEW_UTS_LEN + 1)  /* reasonable max for client */
+
 /*
  * CIFS vfs client Status information (based on what we know.)
  */
@@ -592,6 +596,7 @@
 	struct list_head pending_mid_q;
 	bool noblocksnd;		/* use blocking sendmsg */
 	bool noautotune;		/* do not autotune send buf sizes */
+	bool nosharesock;
 	bool tcp_nodelay;
 	unsigned int credits;  /* send no more requests at once */
 	unsigned int max_credits; /* can override large 32000 default at mnt */
@@ -685,13 +690,34 @@
 	 */
 	int nr_targets;
 	bool noblockcnt; /* use non-blocking connect() */
-	bool is_channel; /* if a session channel */
+
+	/*
+	 * If this is a session channel,
+	 * primary_server holds the ref-counted
+	 * pointer to primary channel connection for the session.
+	 */
+#define CIFS_SERVER_IS_CHAN(server)	(!!(server)->primary_server)
+	struct TCP_Server_Info *primary_server;
+
 #ifdef CONFIG_CIFS_SWN_UPCALL
 	bool use_swn_dstaddr;
 	struct sockaddr_storage swn_dstaddr;
 #endif
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	bool is_dfs_conn; /* if a dfs connection */
+	struct mutex refpath_lock; /* protects leaf_fullpath */
+	/*
+	 * Canonical DFS full paths that were used to chase referrals in mount and reconnect.
+	 *
+	 * origin_fullpath: first or original referral path
+	 * leaf_fullpath: last referral path (might be changed due to nested links in reconnect)
+	 *
+	 * current_fullpath: pointer to either origin_fullpath or leaf_fullpath
+	 * NOTE: cannot be accessed outside cifs_reconnect() and smb2_reconnect()
+	 *
+	 * format: \\HOST\SHARE\[OPTIONAL PATH]
+	 */
+	char *origin_fullpath, *leaf_fullpath, *current_fullpath;
 #endif
 };
 
@@ -908,6 +934,7 @@
 				   and after mount option parsing we fill it */
 	char *domainName;
 	char *password;
+	char *workstation_name;
 	struct session_key auth_key;
 	struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
 	enum securityEnum sectype; /* what security flavor was specified? */
@@ -933,16 +960,21 @@
 	 * iface_lock should be taken when accessing any of these fields
 	 */
 	spinlock_t iface_lock;
+	/* ========= begin: protected by iface_lock ======== */
 	struct cifs_server_iface *iface_list;
 	size_t iface_count;
 	unsigned long iface_last_update; /* jiffies */
+	/* ========= end: protected by iface_lock ======== */
 
+	spinlock_t chan_lock;
+	/* ========= begin: protected by chan_lock ======== */
 #define CIFS_MAX_CHANNELS 16
 	struct cifs_chan chans[CIFS_MAX_CHANNELS];
 	struct cifs_chan *binding_chan;
 	size_t chan_count;
 	size_t chan_max;
 	atomic_t chan_seq; /* round robin state */
+	/* ========= end: protected by chan_lock ======== */
 };
 
 /*
@@ -1091,7 +1123,6 @@
 	struct cached_fid crfid; /* Cached root fid */
 	/* BB add field for back pointer to sb struct(s)? */
 #ifdef CONFIG_CIFS_DFS_UPCALL
-	char *dfs_path; /* canonical DFS path */
 	struct list_head ulist; /* cache update list */
 #endif
 };
@@ -1942,4 +1973,14 @@
 		tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT);
 }
 
+static inline bool cifs_is_referral_server(struct cifs_tcon *tcon,
+					   const struct dfs_info3_param *ref)
+{
+	/*
+	 * Check if all targets are capable of handling DFS referrals as per
+	 * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
+	 */
+	return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER));
+}
+
 #endif	/* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index d0f85b66..4f5a3e8 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -269,8 +269,9 @@
 
 extern void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon,
 				const char *path);
-
-extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
+extern struct TCP_Server_Info *
+cifs_get_tcp_session(struct smb3_fs_context *ctx,
+		     struct TCP_Server_Info *primary_server);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
 extern void cifs_put_tcon(struct cifs_tcon *tcon);
@@ -598,6 +599,7 @@
 bool is_server_using_iface(struct TCP_Server_Info *server,
 			   struct cifs_server_iface *iface);
 bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface);
+void cifs_ses_mark_for_reconnect(struct cifs_ses *ses);
 
 void extract_unc_hostname(const char *unc, const char **h, size_t *len);
 int copy_path_name(char *dst, const char *src);
@@ -607,7 +609,7 @@
 
 struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server);
 void cifs_put_tcp_super(struct super_block *sb);
-int update_super_prepath(struct cifs_tcon *tcon, char *prefix);
+int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix);
 char *extract_hostname(const char *unc);
 char *extract_sharename(const char *unc);
 
@@ -634,4 +636,7 @@
 		return options;
 }
 
+struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
+void cifs_put_tcon_super(struct super_block *sb);
+
 #endif			/* _CIFSPROTO_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0abbff4..6b70502 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -61,6 +61,20 @@
 /* Drop the connection to not overload the server */
 #define NUM_STATUS_IO_TIMEOUT   5
 
+struct mount_ctx {
+	struct cifs_sb_info *cifs_sb;
+	struct smb3_fs_context *fs_ctx;
+	unsigned int xid;
+	struct TCP_Server_Info *server;
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+	struct cifs_ses *root_ses;
+	uuid_t mount_id;
+	char *origin_fullpath, *leaf_fullpath;
+#endif
+};
+
 static int ip_connect(struct TCP_Server_Info *server);
 static int generic_ip_connect(struct TCP_Server_Info *server);
 static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
@@ -115,7 +129,7 @@
 			 * To make sure we don't use the cached entry, retry 1s
 			 * after expiry.
 			 */
-			ttl = (expiry - now + 1);
+			ttl = max_t(unsigned long, expiry - now, SMB_DNS_RESOLVE_INTERVAL_MIN) + 1;
 	}
 	rc = !rc ? -1 : 0;
 
@@ -148,139 +162,38 @@
 	mutex_unlock(&server->srv_mutex);
 }
 
-#ifdef CONFIG_CIFS_DFS_UPCALL
-/* These functions must be called with server->srv_mutex held */
-static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
-				       struct cifs_sb_info *cifs_sb,
-				       struct dfs_cache_tgt_list *tgt_list,
-				       struct dfs_cache_tgt_iterator **tgt_it)
-{
-	const char *name;
-	int rc;
-
-	if (!cifs_sb || !cifs_sb->origin_fullpath)
-		return;
-
-	if (!*tgt_it) {
-		*tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
-	} else {
-		*tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
-		if (!*tgt_it)
-			*tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
-	}
-
-	cifs_dbg(FYI, "%s: UNC: %s\n", __func__, cifs_sb->origin_fullpath);
-
-	name = dfs_cache_get_tgt_name(*tgt_it);
-
-	kfree(server->hostname);
-
-	server->hostname = extract_hostname(name);
-	if (IS_ERR(server->hostname)) {
-		cifs_dbg(FYI,
-			 "%s: failed to extract hostname from target: %ld\n",
-			 __func__, PTR_ERR(server->hostname));
-		return;
-	}
-
-	rc = reconn_set_ipaddr_from_hostname(server);
-	if (rc) {
-		cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
-			 __func__, rc);
-	}
-}
-
-static inline int reconn_setup_dfs_targets(struct cifs_sb_info *cifs_sb,
-					   struct dfs_cache_tgt_list *tl)
-{
-	if (!cifs_sb->origin_fullpath)
-		return -EOPNOTSUPP;
-	return dfs_cache_noreq_find(cifs_sb->origin_fullpath + 1, NULL, tl);
-}
-#endif
-
-/*
- * cifs tcp session reconnection
+/**
+ * Mark all sessions and tcons for reconnect.
  *
- * mark tcp session as reconnecting so temporarily locked
- * mark all smb sessions as reconnecting for tcp session
- * reconnect tcp session
- * wake up waiters on reconnection? - (not needed currently)
+ * @server needs to be previously set to CifsNeedReconnect.
  */
-int
-cifs_reconnect(struct TCP_Server_Info *server)
+static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server)
 {
-	int rc = 0;
-	struct list_head *tmp, *tmp2;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
-	struct mid_q_entry *mid_entry;
+	struct mid_q_entry *mid, *nmid;
 	struct list_head retry_list;
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	struct super_block *sb = NULL;
-	struct cifs_sb_info *cifs_sb = NULL;
-	struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
-	struct dfs_cache_tgt_iterator *tgt_it = NULL;
-#endif
+	struct TCP_Server_Info *pserver;
 
-	spin_lock(&GlobalMid_Lock);
-	server->nr_targets = 1;
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	spin_unlock(&GlobalMid_Lock);
-	sb = cifs_get_tcp_super(server);
-	if (IS_ERR(sb)) {
-		rc = PTR_ERR(sb);
-		cifs_dbg(FYI, "%s: will not do DFS failover: rc = %d\n",
-			 __func__, rc);
-		sb = NULL;
-	} else {
-		cifs_sb = CIFS_SB(sb);
-		rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list);
-		if (rc) {
-			cifs_sb = NULL;
-			if (rc != -EOPNOTSUPP) {
-				cifs_server_dbg(VFS, "%s: no target servers for DFS failover\n",
-						__func__);
-			}
-		} else {
-			server->nr_targets = dfs_cache_get_nr_tgts(&tgt_list);
-		}
-	}
-	cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__,
-		 server->nr_targets);
-	spin_lock(&GlobalMid_Lock);
-#endif
-	if (server->tcpStatus == CifsExiting) {
-		/* the demux thread will exit normally
-		next time through the loop */
-		spin_unlock(&GlobalMid_Lock);
-#ifdef CONFIG_CIFS_DFS_UPCALL
-		dfs_cache_free_tgts(&tgt_list);
-		cifs_put_tcp_super(sb);
-#endif
-		wake_up(&server->response_q);
-		return rc;
-	} else
-		server->tcpStatus = CifsNeedReconnect;
-	spin_unlock(&GlobalMid_Lock);
 	server->maxBuf = 0;
 	server->max_read = 0;
 
 	cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
 	trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
+	/*
+	 * before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they
+	 * are not used until reconnected.
+	 */
+	cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__);
 
-	/* before reconnecting the tcp session, mark the smb session (uid)
-		and the tid bad so they are not used until reconnected */
-	cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n",
-		 __func__);
+	/* If server is a channel, select the primary channel */
+	pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
+
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each(tmp, &server->smb_ses_list) {
-		ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
 		ses->need_reconnect = true;
-		list_for_each(tmp2, &ses->tcon_list) {
-			tcon = list_entry(tmp2, struct cifs_tcon, tcon_list);
+		list_for_each_entry(tcon, &ses->tcon_list, tcon_list)
 			tcon->need_reconnect = true;
-		}
 		if (ses->tcon_ipc)
 			ses->tcon_ipc->need_reconnect = true;
 	}
@@ -290,11 +203,11 @@
 	cifs_dbg(FYI, "%s: tearing down socket\n", __func__);
 	mutex_lock(&server->srv_mutex);
 	if (server->ssocket) {
-		cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n",
-			 server->ssocket->state, server->ssocket->flags);
+		cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state,
+			 server->ssocket->flags);
 		kernel_sock_shutdown(server->ssocket, SHUT_WR);
-		cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n",
-			 server->ssocket->state, server->ssocket->flags);
+		cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", server->ssocket->state,
+			 server->ssocket->flags);
 		sock_release(server->ssocket);
 		server->ssocket = NULL;
 	}
@@ -309,23 +222,21 @@
 	INIT_LIST_HEAD(&retry_list);
 	cifs_dbg(FYI, "%s: moving mids to private list\n", __func__);
 	spin_lock(&GlobalMid_Lock);
-	list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
-		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-		kref_get(&mid_entry->refcount);
-		if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
-			mid_entry->mid_state = MID_RETRY_NEEDED;
-		list_move(&mid_entry->qhead, &retry_list);
-		mid_entry->mid_flags |= MID_DELETED;
+	list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) {
+		kref_get(&mid->refcount);
+		if (mid->mid_state == MID_REQUEST_SUBMITTED)
+			mid->mid_state = MID_RETRY_NEEDED;
+		list_move(&mid->qhead, &retry_list);
+		mid->mid_flags |= MID_DELETED;
 	}
 	spin_unlock(&GlobalMid_Lock);
 	mutex_unlock(&server->srv_mutex);
 
 	cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
-	list_for_each_safe(tmp, tmp2, &retry_list) {
-		mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-		list_del_init(&mid_entry->qhead);
-		mid_entry->callback(mid_entry);
-		cifs_mid_q_entry_release(mid_entry);
+	list_for_each_entry_safe(mid, nmid, &retry_list, qhead) {
+		list_del_init(&mid->qhead);
+		mid->callback(mid);
+		cifs_mid_q_entry_release(mid);
 	}
 
 	if (cifs_rdma_enabled(server)) {
@@ -333,38 +244,48 @@
 		smbd_destroy(server);
 		mutex_unlock(&server->srv_mutex);
 	}
+}
+
+static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets)
+{
+	spin_lock(&GlobalMid_Lock);
+	server->nr_targets = num_targets;
+	if (server->tcpStatus == CifsExiting) {
+		/* the demux thread will exit normally next time through the loop */
+		spin_unlock(&GlobalMid_Lock);
+		wake_up(&server->response_q);
+		return false;
+	}
+	server->tcpStatus = CifsNeedReconnect;
+	spin_unlock(&GlobalMid_Lock);
+	return true;
+}
+
+/*
+ * cifs tcp session reconnection
+ *
+ * mark tcp session as reconnecting so temporarily locked
+ * mark all smb sessions as reconnecting for tcp session
+ * reconnect tcp session
+ * wake up waiters on reconnection? - (not needed currently)
+ */
+static int __cifs_reconnect(struct TCP_Server_Info *server)
+{
+	int rc = 0;
+
+	if (!cifs_tcp_ses_needs_reconnect(server, 1))
+		return 0;
+
+	cifs_mark_tcp_ses_conns_for_reconnect(server);
 
 	do {
 		try_to_freeze();
-
 		mutex_lock(&server->srv_mutex);
 
-
 		if (!cifs_swn_set_server_dstaddr(server)) {
-#ifdef CONFIG_CIFS_DFS_UPCALL
-		if (cifs_sb && cifs_sb->origin_fullpath)
-			/*
-			 * Set up next DFS target server (if any) for reconnect. If DFS
-			 * feature is disabled, then we will retry last server we
-			 * connected to before.
-			 */
-			reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
-		else {
-#endif
-			/*
-			 * Resolve the hostname again to make sure that IP address is up-to-date.
-			 */
+			/* resolve the hostname again to make sure that IP address is up-to-date */
 			rc = reconn_set_ipaddr_from_hostname(server);
-			if (rc) {
-				cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
-						__func__, rc);
-			}
-
-#ifdef CONFIG_CIFS_DFS_UPCALL
-		}
-#endif
-
-
+			cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc);
 		}
 
 		if (cifs_rdma_enabled(server))
@@ -372,8 +293,8 @@
 		else
 			rc = generic_ip_connect(server);
 		if (rc) {
-			cifs_dbg(FYI, "reconnect error %d\n", rc);
 			mutex_unlock(&server->srv_mutex);
+			cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
 			msleep(3000);
 		} else {
 			atomic_inc(&tcpSesReconnectCount);
@@ -387,19 +308,6 @@
 		}
 	} while (server->tcpStatus == CifsNeedReconnect);
 
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	if (tgt_it) {
-		rc = dfs_cache_noreq_update_tgthint(cifs_sb->origin_fullpath + 1,
-						    tgt_it);
-		if (rc) {
-			cifs_server_dbg(VFS, "%s: failed to update DFS target hint: rc = %d\n",
-				 __func__, rc);
-		}
-		dfs_cache_free_tgts(&tgt_list);
-	}
-
-	cifs_put_tcp_super(sb);
-#endif
 	if (server->tcpStatus == CifsNeedNegotiate)
 		mod_delayed_work(cifsiod_wq, &server->echo, 0);
 
@@ -407,6 +315,147 @@
 	return rc;
 }
 
+#ifdef CONFIG_CIFS_DFS_UPCALL
+static int __reconnect_target_unlocked(struct TCP_Server_Info *server, const char *target)
+{
+	int rc;
+	char *hostname;
+
+	if (!cifs_swn_set_server_dstaddr(server)) {
+		if (server->hostname != target) {
+			hostname = extract_hostname(target);
+			if (!IS_ERR(hostname)) {
+				kfree(server->hostname);
+				server->hostname = hostname;
+			} else {
+				cifs_dbg(FYI, "%s: couldn't extract hostname or address from dfs target: %ld\n",
+					 __func__, PTR_ERR(hostname));
+				cifs_dbg(FYI, "%s: default to last target server: %s\n", __func__,
+					 server->hostname);
+			}
+		}
+		/* resolve the hostname again to make sure that IP address is up-to-date. */
+		rc = reconn_set_ipaddr_from_hostname(server);
+		cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc);
+	}
+	/* Reconnect the socket */
+	if (cifs_rdma_enabled(server))
+		rc = smbd_reconnect(server);
+	else
+		rc = generic_ip_connect(server);
+
+	return rc;
+}
+
+static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_cache_tgt_list *tl,
+				     struct dfs_cache_tgt_iterator **target_hint)
+{
+	int rc;
+	struct dfs_cache_tgt_iterator *tit;
+
+	*target_hint = NULL;
+
+	/* If dfs target list is empty, then reconnect to last server */
+	tit = dfs_cache_get_tgt_iterator(tl);
+	if (!tit)
+		return __reconnect_target_unlocked(server, server->hostname);
+
+	/* Otherwise, try every dfs target in @tl */
+	for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) {
+		rc = __reconnect_target_unlocked(server, dfs_cache_get_tgt_name(tit));
+		if (!rc) {
+			*target_hint = tit;
+			break;
+		}
+	}
+	return rc;
+}
+
+static int reconnect_dfs_server(struct TCP_Server_Info *server)
+{
+	int rc = 0;
+	const char *refpath = server->current_fullpath + 1;
+	struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+	struct dfs_cache_tgt_iterator *target_hint = NULL;
+	int num_targets = 0;
+
+	/*
+	 * Determine the number of dfs targets the referral path in @cifs_sb resolves to.
+	 *
+	 * smb2_reconnect() needs to know how long it should wait based upon the number of dfs
+	 * targets (server->nr_targets).  It's also possible that the cached referral was cleared
+	 * through /proc/fs/cifs/dfscache or the target list is empty due to server settings after
+	 * refreshing the referral, so, in this case, default it to 1.
+	 */
+	if (!dfs_cache_noreq_find(refpath, NULL, &tl))
+		num_targets = dfs_cache_get_nr_tgts(&tl);
+	if (!num_targets)
+		num_targets = 1;
+
+	if (!cifs_tcp_ses_needs_reconnect(server, num_targets))
+		return 0;
+
+	cifs_mark_tcp_ses_conns_for_reconnect(server);
+
+	do {
+		try_to_freeze();
+		mutex_lock(&server->srv_mutex);
+
+		rc = reconnect_target_unlocked(server, &tl, &target_hint);
+		if (rc) {
+			/* Failed to reconnect socket */
+			mutex_unlock(&server->srv_mutex);
+			cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
+			msleep(3000);
+			continue;
+		}
+		/*
+		 * Socket was created.  Update tcp session status to CifsNeedNegotiate so that a
+		 * process waiting for reconnect will know it needs to re-establish session and tcon
+		 * through the reconnected target server.
+		 */
+		atomic_inc(&tcpSesReconnectCount);
+		set_credits(server, 1);
+		spin_lock(&GlobalMid_Lock);
+		if (server->tcpStatus != CifsExiting)
+			server->tcpStatus = CifsNeedNegotiate;
+		spin_unlock(&GlobalMid_Lock);
+		cifs_swn_reset_server_dstaddr(server);
+		mutex_unlock(&server->srv_mutex);
+	} while (server->tcpStatus == CifsNeedReconnect);
+
+	if (target_hint)
+		dfs_cache_noreq_update_tgthint(refpath, target_hint);
+
+	dfs_cache_free_tgts(&tl);
+
+	/* Need to set up echo worker again once connection has been established */
+	if (server->tcpStatus == CifsNeedNegotiate)
+		mod_delayed_work(cifsiod_wq, &server->echo, 0);
+
+	wake_up(&server->response_q);
+	return rc;
+}
+
+int cifs_reconnect(struct TCP_Server_Info *server)
+{
+	/* If tcp session is not an dfs connection, then reconnect to last target server */
+	spin_lock(&cifs_tcp_ses_lock);
+	if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) {
+		spin_unlock(&cifs_tcp_ses_lock);
+		return __cifs_reconnect(server);
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	return reconnect_dfs_server(server);
+}
+#else
+int cifs_reconnect(struct TCP_Server_Info *server)
+{
+	return __cifs_reconnect(server);
+}
+#endif
+
 static void
 cifs_echo_request(struct work_struct *work)
 {
@@ -665,13 +714,14 @@
 	 * Trying to handle/dequeue a mid after the send_recv()
 	 * function has finished processing it is a bug.
 	 */
-	if (mid->mid_flags & MID_DELETED)
+	if (mid->mid_flags & MID_DELETED) {
+		spin_unlock(&GlobalMid_Lock);
 		pr_warn_once("trying to dequeue a deleted mid\n");
-	else {
+	} else {
 		list_del_init(&mid->qhead);
 		mid->mid_flags |= MID_DELETED;
+		spin_unlock(&GlobalMid_Lock);
 	}
-	spin_unlock(&GlobalMid_Lock);
 }
 
 static unsigned int
@@ -794,6 +844,10 @@
 		 */
 	}
 
+#ifdef CONFIG_CIFS_DFS_UPCALL
+	kfree(server->origin_fullpath);
+	kfree(server->leaf_fullpath);
+#endif
 	kfree(server);
 
 	length = atomic_dec_return(&tcpSesAllocCount);
@@ -1220,6 +1274,10 @@
 	if (ctx->nosharesock)
 		return 0;
 
+	/* this server does not share socket */
+	if (server->nosharesock)
+		return 0;
+
 	/* If multidialect negotiation see if existing sessions match one */
 	if (strcmp(ctx->vals->version_string, SMB3ANY_VERSION_STRING) == 0) {
 		if (server->vals->protocol_id < SMB30_PROT_ID)
@@ -1283,7 +1341,7 @@
 		 * Skip ses channels since they're only handled in lower layers
 		 * (e.g. cifs_send_recv).
 		 */
-		if (server->is_channel || !match_server(server, ctx))
+		if (CIFS_SERVER_IS_CHAN(server) || !match_server(server, ctx))
 			continue;
 
 		++server->srv_count;
@@ -1314,6 +1372,10 @@
 	list_del_init(&server->tcp_ses_list);
 	spin_unlock(&cifs_tcp_ses_lock);
 
+	/* For secondary channels, we pick up ref-count on the primary server */
+	if (CIFS_SERVER_IS_CHAN(server))
+		cifs_put_tcp_session(server->primary_server, from_reconnect);
+
 	cancel_delayed_work_sync(&server->echo);
 	cancel_delayed_work_sync(&server->resolve);
 
@@ -1333,7 +1395,10 @@
 	spin_unlock(&GlobalMid_Lock);
 
 	cifs_crypto_secmech_release(server);
-	cifs_fscache_release_client_cookie(server);
+
+	/* fscache server cookies are based on primary channel only */
+	if (!CIFS_SERVER_IS_CHAN(server))
+		cifs_fscache_release_client_cookie(server);
 
 	kfree(server->session_key.response);
 	server->session_key.response = NULL;
@@ -1346,7 +1411,8 @@
 }
 
 struct TCP_Server_Info *
-cifs_get_tcp_session(struct smb3_fs_context *ctx)
+cifs_get_tcp_session(struct smb3_fs_context *ctx,
+		     struct TCP_Server_Info *primary_server)
 {
 	struct TCP_Server_Info *tcp_ses = NULL;
 	int rc;
@@ -1370,6 +1436,9 @@
 		goto out_err;
 	}
 
+	if (ctx->nosharesock)
+		tcp_ses->nosharesock = true;
+
 	tcp_ses->ops = ctx->ops;
 	tcp_ses->vals = ctx->vals;
 	cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
@@ -1383,6 +1452,12 @@
 	tcp_ses->in_flight = 0;
 	tcp_ses->max_in_flight = 0;
 	tcp_ses->credits = 1;
+	if (primary_server) {
+		spin_lock(&cifs_tcp_ses_lock);
+		++primary_server->srv_count;
+		tcp_ses->primary_server = primary_server;
+		spin_unlock(&cifs_tcp_ses_lock);
+	}
 	init_waitqueue_head(&tcp_ses->response_q);
 	init_waitqueue_head(&tcp_ses->request_q);
 	INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
@@ -1403,6 +1478,9 @@
 	INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server);
 	INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
 	mutex_init(&tcp_ses->reconnect_mutex);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+	mutex_init(&tcp_ses->refpath_lock);
+#endif
 	memcpy(&tcp_ses->srcaddr, &ctx->srcaddr,
 	       sizeof(tcp_ses->srcaddr));
 	memcpy(&tcp_ses->dstaddr, &ctx->dstaddr,
@@ -1481,7 +1559,9 @@
 	list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
 	spin_unlock(&cifs_tcp_ses_lock);
 
-	cifs_fscache_get_client_cookie(tcp_ses);
+	/* fscache server cookies are based on primary channel only */
+	if (!CIFS_SERVER_IS_CHAN(tcp_ses))
+		cifs_fscache_get_client_cookie(tcp_ses);
 
 	/* queue echo request delayed work */
 	queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
@@ -1501,6 +1581,8 @@
 
 out_err:
 	if (tcp_ses) {
+		if (CIFS_SERVER_IS_CHAN(tcp_ses))
+			cifs_put_tcp_session(tcp_ses->primary_server, false);
 		kfree(tcp_ses->hostname);
 		if (tcp_ses->ssocket)
 			sock_release(tcp_ses->ssocket);
@@ -1519,8 +1601,12 @@
 	 * If an existing session is limited to less channels than
 	 * requested, it should not be reused
 	 */
-	if (ses->chan_max < ctx->max_channels)
+	spin_lock(&ses->chan_lock);
+	if (ses->chan_max < ctx->max_channels) {
+		spin_unlock(&ses->chan_lock);
 		return 0;
+	}
+	spin_unlock(&ses->chan_lock);
 
 	switch (ses->sectype) {
 	case Kerberos:
@@ -1655,6 +1741,7 @@
 void cifs_put_smb_ses(struct cifs_ses *ses)
 {
 	unsigned int rc, xid;
+	unsigned int chan_count;
 	struct TCP_Server_Info *server = ses->server;
 	cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
 
@@ -1696,12 +1783,24 @@
 	list_del_init(&ses->smb_ses_list);
 	spin_unlock(&cifs_tcp_ses_lock);
 
+	spin_lock(&ses->chan_lock);
+	chan_count = ses->chan_count;
+	spin_unlock(&ses->chan_lock);
+
 	/* close any extra channels */
-	if (ses->chan_count > 1) {
+	if (chan_count > 1) {
 		int i;
 
-		for (i = 1; i < ses->chan_count; i++)
+		for (i = 1; i < chan_count; i++) {
+			/*
+			 * note: for now, we're okay accessing ses->chans
+			 * without chan_lock. But when chans can go away, we'll
+			 * need to introduce ref counting to make sure that chan
+			 * is not freed from under us.
+			 */
 			cifs_put_tcp_session(ses->chans[i].server, 0);
+			ses->chans[i].server = NULL;
+		}
 	}
 
 	sesInfoFree(ses);
@@ -1885,16 +1984,18 @@
 			 ses->status);
 
 		mutex_lock(&ses->session_mutex);
-		rc = cifs_negotiate_protocol(xid, ses);
-		if (rc) {
-			mutex_unlock(&ses->session_mutex);
-			/* problem -- put our ses reference */
-			cifs_put_smb_ses(ses);
-			free_xid(xid);
-			return ERR_PTR(rc);
-		}
 		if (ses->need_reconnect) {
 			cifs_dbg(FYI, "Session needs reconnect\n");
+
+			rc = cifs_negotiate_protocol(xid, ses);
+			if (rc) {
+				mutex_unlock(&ses->session_mutex);
+				/* problem -- put our ses reference */
+				cifs_put_smb_ses(ses);
+				free_xid(xid);
+				return ERR_PTR(rc);
+			}
+
 			rc = cifs_setup_session(xid, ses,
 						ctx->local_nls);
 			if (rc) {
@@ -1942,6 +2043,12 @@
 		if (!ses->domainName)
 			goto get_ses_fail;
 	}
+	if (ctx->workstation_name) {
+		ses->workstation_name = kstrdup(ctx->workstation_name,
+						GFP_KERNEL);
+		if (!ses->workstation_name)
+			goto get_ses_fail;
+	}
 	if (ctx->domainauto)
 		ses->domainAuto = ctx->domainauto;
 	ses->cred_uid = ctx->cred_uid;
@@ -1952,9 +2059,11 @@
 	mutex_lock(&ses->session_mutex);
 
 	/* add server as first channel */
+	spin_lock(&ses->chan_lock);
 	ses->chans[0].server = server;
 	ses->chan_count = 1;
 	ses->chan_max = ctx->multichannel ? ctx->max_channels:1;
+	spin_unlock(&ses->chan_lock);
 
 	rc = cifs_negotiate_protocol(xid, ses);
 	if (!rc)
@@ -2286,8 +2395,6 @@
 	list_add(&tcon->tcon_list, &ses->tcon_list);
 	spin_unlock(&cifs_tcp_ses_lock);
 
-	cifs_fscache_get_super_cookie(tcon);
-
 	return tcon;
 
 out_fail:
@@ -2849,73 +2956,64 @@
 }
 
 /* Release all succeed connections */
-static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
-				   unsigned int xid,
-				   struct TCP_Server_Info *server,
-				   struct cifs_ses *ses, struct cifs_tcon *tcon)
+static inline void mount_put_conns(struct mount_ctx *mnt_ctx)
 {
 	int rc = 0;
 
-	if (tcon)
-		cifs_put_tcon(tcon);
-	else if (ses)
-		cifs_put_smb_ses(ses);
-	else if (server)
-		cifs_put_tcp_session(server, 0);
-	cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
-	free_xid(xid);
+	if (mnt_ctx->tcon)
+		cifs_put_tcon(mnt_ctx->tcon);
+	else if (mnt_ctx->ses)
+		cifs_put_smb_ses(mnt_ctx->ses);
+	else if (mnt_ctx->server)
+		cifs_put_tcp_session(mnt_ctx->server, 0);
+	mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+	free_xid(mnt_ctx->xid);
 }
 
 /* Get connections for tcp, ses and tcon */
-static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
-			   unsigned int *xid,
-			   struct TCP_Server_Info **nserver,
-			   struct cifs_ses **nses, struct cifs_tcon **ntcon)
+static int mount_get_conns(struct mount_ctx *mnt_ctx)
 {
 	int rc = 0;
-	struct TCP_Server_Info *server;
-	struct cifs_ses *ses;
-	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server = NULL;
+	struct cifs_ses *ses = NULL;
+	struct cifs_tcon *tcon = NULL;
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	unsigned int xid;
 
-	*nserver = NULL;
-	*nses = NULL;
-	*ntcon = NULL;
-
-	*xid = get_xid();
+	xid = get_xid();
 
 	/* get a reference to a tcp session */
-	server = cifs_get_tcp_session(ctx);
+	server = cifs_get_tcp_session(ctx, NULL);
 	if (IS_ERR(server)) {
 		rc = PTR_ERR(server);
-		return rc;
+		server = NULL;
+		goto out;
 	}
 
-	*nserver = server;
-
 	/* get a reference to a SMB session */
 	ses = cifs_get_smb_ses(server, ctx);
 	if (IS_ERR(ses)) {
 		rc = PTR_ERR(ses);
-		return rc;
+		ses = NULL;
+		goto out;
 	}
 
-	*nses = ses;
-
 	if ((ctx->persistent == true) && (!(ses->server->capabilities &
 					    SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) {
 		cifs_server_dbg(VFS, "persistent handles not supported by server\n");
-		return -EOPNOTSUPP;
+		rc = -EOPNOTSUPP;
+		goto out;
 	}
 
 	/* search for existing tcon to this server share */
 	tcon = cifs_get_tcon(ses, ctx);
 	if (IS_ERR(tcon)) {
 		rc = PTR_ERR(tcon);
-		return rc;
+		tcon = NULL;
+		goto out;
 	}
 
-	*ntcon = tcon;
-
 	/* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
 	if (tcon->posix_extensions)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
@@ -2926,17 +3024,19 @@
 		 * reset of caps checks mount to see if unix extensions disabled
 		 * for just this mount.
 		 */
-		reset_cifs_unix_caps(*xid, tcon, cifs_sb, ctx);
+		reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx);
 		if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
 		    (le64_to_cpu(tcon->fsUnixInfo.Capability) &
-		     CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP))
-			return -EACCES;
+		     CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
+			rc = -EACCES;
+			goto out;
+		}
 	} else
 		tcon->unix_ext = 0; /* server does not support them */
 
 	/* do not care if a following call succeed - informational */
 	if (!tcon->pipe && server->ops->qfs_tcon) {
-		server->ops->qfs_tcon(*xid, tcon, cifs_sb);
+		server->ops->qfs_tcon(xid, tcon, cifs_sb);
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
 			if (tcon->fsDevInfo.DeviceCharacteristics &
 			    cpu_to_le32(FILE_READ_ONLY_DEVICE))
@@ -2946,6 +3046,12 @@
 				cifs_dbg(VFS, "read only mount of RW share\n");
 			/* no need to log a RW mount of a typical RW share */
 		}
+		/*
+		 * The cookie is initialized from volume info returned above.
+		 * Inside cifs_fscache_get_super_cookie it checks
+		 * that we do not get super cookie twice.
+		 */
+		cifs_fscache_get_super_cookie(tcon);
 	}
 
 	/*
@@ -2960,7 +3066,13 @@
 	    (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
 		cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
 
-	return 0;
+out:
+	mnt_ctx->server = server;
+	mnt_ctx->ses = ses;
+	mnt_ctx->tcon = tcon;
+	mnt_ctx->xid = xid;
+
+	return rc;
 }
 
 static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
@@ -2990,18 +3102,17 @@
 }
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
-static int mount_get_dfs_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
-			       unsigned int *xid, struct TCP_Server_Info **nserver,
-			       struct cifs_ses **nses, struct cifs_tcon **ntcon)
+/* Get unique dfs connections */
+static int mount_get_dfs_conns(struct mount_ctx *mnt_ctx)
 {
 	int rc;
 
-	ctx->nosharesock = true;
-	rc = mount_get_conns(ctx, cifs_sb, xid, nserver, nses, ntcon);
-	if (*nserver) {
+	mnt_ctx->fs_ctx->nosharesock = true;
+	rc = mount_get_conns(mnt_ctx);
+	if (mnt_ctx->server) {
 		cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__);
 		spin_lock(&cifs_tcp_ses_lock);
-		(*nserver)->is_dfs_conn = true;
+		mnt_ctx->server->is_dfs_conn = true;
 		spin_unlock(&cifs_tcp_ses_lock);
 	}
 	return rc;
@@ -3043,190 +3154,38 @@
 }
 
 /*
- * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb
+ * expand_dfs_referral - Update cifs_sb from dfs referral path
  *
- * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated
- * to a string containing updated options for the submount.  Otherwise it
- * will be left untouched.
- *
- * Returns the rc from get_dfs_path to the caller, which can be used to
- * determine whether there were referrals.
+ * cifs_sb->ctx->mount_options will be (re-)allocated to a string containing updated options for the
+ * submount.  Otherwise it will be left untouched.
  */
-static int
-expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
-		    struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
-		    char *ref_path)
+static int expand_dfs_referral(struct mount_ctx *mnt_ctx, const char *full_path,
+			       struct dfs_info3_param *referral)
 {
 	int rc;
-	struct dfs_info3_param referral = {0};
-	char *full_path = NULL, *mdata = NULL;
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+	char *fake_devname = NULL, *mdata = NULL;
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
-		return -EREMOTE;
-
-	full_path = build_unc_path_to_root(ctx, cifs_sb, true);
-	if (IS_ERR(full_path))
-		return PTR_ERR(full_path);
-
-	rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
-			    ref_path, &referral, NULL);
-	if (!rc) {
-		char *fake_devname = NULL;
-
-		mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
-						   full_path + 1, &referral,
-						   &fake_devname);
-		free_dfs_info_param(&referral);
-
-		if (IS_ERR(mdata)) {
-			rc = PTR_ERR(mdata);
-			mdata = NULL;
-		} else {
-			/*
-			 * We can not clear out the whole structure since we
-			 * no longer have an explicit function to parse
-			 * a mount-string. Instead we need to clear out the
-			 * individual fields that are no longer valid.
-			 */
-			kfree(ctx->prepath);
-			ctx->prepath = NULL;
-			rc = cifs_setup_volume_info(ctx, mdata, fake_devname);
-		}
-		kfree(fake_devname);
-		kfree(cifs_sb->ctx->mount_options);
-		cifs_sb->ctx->mount_options = mdata;
-	}
-	kfree(full_path);
-	return rc;
-}
-
-static int get_next_dfs_tgt(struct dfs_cache_tgt_list *tgt_list,
-			    struct dfs_cache_tgt_iterator **tgt_it)
-{
-	if (!*tgt_it)
-		*tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
-	else
-		*tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
-	return !*tgt_it ? -EHOSTDOWN : 0;
-}
-
-static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
-			   struct smb3_fs_context *fake_ctx, struct smb3_fs_context *ctx)
-{
-	const char *tgt = dfs_cache_get_tgt_name(tgt_it);
-	int len = strlen(tgt) + 2;
-	char *new_unc;
-
-	new_unc = kmalloc(len, GFP_KERNEL);
-	if (!new_unc)
-		return -ENOMEM;
-	scnprintf(new_unc, len, "\\%s", tgt);
-
-	kfree(ctx->UNC);
-	ctx->UNC = new_unc;
-
-	if (fake_ctx->prepath) {
-		kfree(ctx->prepath);
-		ctx->prepath = fake_ctx->prepath;
-		fake_ctx->prepath = NULL;
-	}
-	memcpy(&ctx->dstaddr, &fake_ctx->dstaddr, sizeof(ctx->dstaddr));
-
-	return 0;
-}
-
-static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
-			   struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
-			   unsigned int *xid, struct TCP_Server_Info **server,
-			   struct cifs_ses **ses, struct cifs_tcon **tcon)
-{
-	int rc;
-	char *npath = NULL;
-	struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
-	struct dfs_cache_tgt_iterator *tgt_it = NULL;
-	struct smb3_fs_context tmp_ctx = {NULL};
-
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
-		return -EOPNOTSUPP;
-
-	npath = dfs_cache_canonical_path(path, cifs_sb->local_nls, cifs_remap(cifs_sb));
-	if (IS_ERR(npath))
-		return PTR_ERR(npath);
-
-	cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, npath, full_path);
-
-	rc = dfs_cache_noreq_find(npath, NULL, &tgt_list);
-	if (rc)
-		goto out;
-	/*
-	 * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
-	 * test connection against new DFS targets.
-	 */
-	rc = smb3_fs_context_dup(&tmp_ctx, ctx);
-	if (rc)
-		goto out;
-
-	for (;;) {
-		struct dfs_info3_param ref = {0};
-		char *fake_devname = NULL, *mdata = NULL;
-
-		/* Get next DFS target server - if any */
-		rc = get_next_dfs_tgt(&tgt_list, &tgt_it);
-		if (rc)
-			break;
-
-		rc = dfs_cache_get_tgt_referral(npath, tgt_it, &ref);
-		if (rc)
-			break;
-
-		cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
-			 tmp_ctx.prepath);
-
-		mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref,
-						   &fake_devname);
-		free_dfs_info_param(&ref);
-
-		if (IS_ERR(mdata)) {
-			rc = PTR_ERR(mdata);
-			mdata = NULL;
-		} else
-			rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname);
-
-		kfree(mdata);
-		kfree(fake_devname);
-
-		if (rc)
-			break;
-
-		cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
-			 tmp_ctx.prepath);
-
-		mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
-		rc = mount_get_dfs_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
-		if (!rc || (*server && *ses)) {
-			/*
-			 * We were able to connect to new target server. Update current context with
-			 * new target server.
-			 */
-			rc = update_vol_info(tgt_it, &tmp_ctx, ctx);
-			break;
-		}
-	}
-	if (!rc) {
-		cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
-			 tmp_ctx.prepath);
+	mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, referral,
+					   &fake_devname);
+	if (IS_ERR(mdata)) {
+		rc = PTR_ERR(mdata);
+		mdata = NULL;
+	} else {
 		/*
-		 * Update DFS target hint in DFS referral cache with the target server we
-		 * successfully reconnected to.
+		 * We can not clear out the whole structure since we no longer have an explicit
+		 * function to parse a mount-string. Instead we need to clear out the individual
+		 * fields that are no longer valid.
 		 */
-		rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls,
-					      cifs_remap(cifs_sb), path, tgt_it);
+		kfree(ctx->prepath);
+		ctx->prepath = NULL;
+		rc = cifs_setup_volume_info(ctx, mdata, fake_devname);
 	}
+	kfree(fake_devname);
+	kfree(cifs_sb->ctx->mount_options);
+	cifs_sb->ctx->mount_options = mdata;
 
-out:
-	kfree(npath);
-	smb3_cleanup_fs_context_contents(&tmp_ctx);
-	dfs_cache_free_tgts(&tgt_list);
 	return rc;
 }
 #endif
@@ -3333,12 +3292,14 @@
  * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is,
  * otherwise 0.
  */
-static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-			  const unsigned int xid,
-			  struct TCP_Server_Info *server,
-			  struct cifs_tcon *tcon)
+static int is_path_remote(struct mount_ctx *mnt_ctx)
 {
 	int rc;
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	struct TCP_Server_Info *server = mnt_ctx->server;
+	unsigned int xid = mnt_ctx->xid;
+	struct cifs_tcon *tcon = mnt_ctx->tcon;
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
 	char *full_path;
 
 	if (!server->ops->is_path_accessible)
@@ -3376,111 +3337,26 @@
 }
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
-static void set_root_ses(struct cifs_sb_info *cifs_sb, const uuid_t *mount_id, struct cifs_ses *ses,
-			 struct cifs_ses **root_ses)
+static void set_root_ses(struct mount_ctx *mnt_ctx)
 {
-	if (ses) {
+	if (mnt_ctx->ses) {
 		spin_lock(&cifs_tcp_ses_lock);
-		ses->ses_count++;
+		mnt_ctx->ses->ses_count++;
 		spin_unlock(&cifs_tcp_ses_lock);
-		dfs_cache_add_refsrv_session(mount_id, ses);
+		dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses);
 	}
-	*root_ses = ses;
+	mnt_ctx->root_ses = mnt_ctx->ses;
 }
 
-/* Set up next dfs prefix path in @dfs_path */
-static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-			    const unsigned int xid, struct TCP_Server_Info *server,
-			    struct cifs_tcon *tcon, char **dfs_path)
-{
-	char *path, *npath;
-	int added_treename = is_tcon_dfs(tcon);
-	int rc;
-
-	path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
-	if (!path)
-		return -ENOMEM;
-
-	rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
-	if (rc == -EREMOTE) {
-		struct smb3_fs_context v = {NULL};
-		/* if @path contains a tree name, skip it in the prefix path */
-		if (added_treename) {
-			rc = smb3_parse_devname(path, &v);
-			if (rc)
-				goto out;
-			npath = build_unc_path_to_root(&v, cifs_sb, true);
-			smb3_cleanup_fs_context_contents(&v);
-		} else {
-			v.UNC = ctx->UNC;
-			v.prepath = path + 1;
-			npath = build_unc_path_to_root(&v, cifs_sb, true);
-		}
-
-		if (IS_ERR(npath)) {
-			rc = PTR_ERR(npath);
-			goto out;
-		}
-
-		kfree(*dfs_path);
-		*dfs_path = npath;
-		rc = -EREMOTE;
-	}
-
-out:
-	kfree(path);
-	return rc;
-}
-
-/* Check if resolved targets can handle any DFS referrals */
-static int is_referral_server(const char *ref_path, struct cifs_sb_info *cifs_sb,
-			      struct cifs_tcon *tcon, bool *ref_server)
+static int is_dfs_mount(struct mount_ctx *mnt_ctx, bool *isdfs, struct dfs_cache_tgt_list *root_tl)
 {
 	int rc;
-	struct dfs_info3_param ref = {0};
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
 
-	cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+	*isdfs = true;
 
-	if (is_tcon_dfs(tcon)) {
-		*ref_server = true;
-	} else {
-		char *npath;
-
-		npath = dfs_cache_canonical_path(ref_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
-		if (IS_ERR(npath))
-			return PTR_ERR(npath);
-
-		rc = dfs_cache_noreq_find(npath, &ref, NULL);
-		kfree(npath);
-		if (rc) {
-			cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
-			return rc;
-		}
-		cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags);
-		/*
-		 * Check if all targets are capable of handling DFS referrals as per
-		 * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
-		 */
-		*ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER);
-		free_dfs_info_param(&ref);
-	}
-	return 0;
-}
-
-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
-{
-	int rc = 0;
-	unsigned int xid;
-	struct TCP_Server_Info *server = NULL;
-	struct cifs_ses *ses = NULL, *root_ses = NULL;
-	struct cifs_tcon *tcon = NULL;
-	int count = 0;
-	uuid_t mount_id = {0};
-	char *ref_path = NULL, *full_path = NULL;
-	char *oldmnt = NULL;
-	bool ref_server = false;
-
-	rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+	rc = mount_get_conns(mnt_ctx);
 	/*
 	 * If called with 'nodfs' mount option, then skip DFS resolving.  Otherwise unconditionally
 	 * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
@@ -3489,167 +3365,261 @@
 	 * to respond with PATH_NOT_COVERED to requests that include the prefix.
 	 */
 	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
-	    dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
-			   NULL)) {
+	    dfs_cache_find(mnt_ctx->xid, mnt_ctx->ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+			   ctx->UNC + 1, NULL, root_tl)) {
 		if (rc)
-			goto error;
+			return rc;
 		/* Check if it is fully accessible and then mount it */
-		rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+		rc = is_path_remote(mnt_ctx);
 		if (!rc)
-			goto out;
-		if (rc != -EREMOTE)
-			goto error;
+			*isdfs = false;
+		else if (rc != -EREMOTE)
+			return rc;
+	}
+	return 0;
+}
+
+static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path,
+			      const char *ref_path, struct dfs_cache_tgt_iterator *tit)
+{
+	int rc;
+	struct dfs_info3_param ref = {};
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	char *oldmnt = cifs_sb->ctx->mount_options;
+
+	rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref);
+	if (rc)
+		goto out;
+
+	rc = expand_dfs_referral(mnt_ctx, full_path, &ref);
+	if (rc)
+		goto out;
+
+	/* Connect to new target only if we were redirected (e.g. mount options changed) */
+	if (oldmnt != cifs_sb->ctx->mount_options) {
+		mount_put_conns(mnt_ctx);
+		rc = mount_get_dfs_conns(mnt_ctx);
+	}
+	if (!rc) {
+		if (cifs_is_referral_server(mnt_ctx->tcon, &ref))
+			set_root_ses(mnt_ctx);
+		rc = dfs_cache_update_tgthint(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
+					      cifs_remap(cifs_sb), ref_path, tit);
 	}
 
-	mount_put_conns(cifs_sb, xid, server, ses, tcon);
-	/*
-	 * Ignore error check here because we may failover to other targets from cached a
-	 * referral.
-	 */
-	(void)mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+out:
+	free_dfs_info_param(&ref);
+	return rc;
+}
 
-	/* Get path of DFS root */
-	ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
-	if (IS_ERR(ref_path)) {
-		rc = PTR_ERR(ref_path);
-		ref_path = NULL;
-		goto error;
+static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list *root_tl)
+{
+	int rc;
+	char *full_path;
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+	struct dfs_cache_tgt_iterator *tit;
+
+	/* Put initial connections as they might be shared with other mounts.  We need unique dfs
+	 * connections per mount to properly failover, so mount_get_dfs_conns() must be used from
+	 * now on.
+	 */
+	mount_put_conns(mnt_ctx);
+	mount_get_dfs_conns(mnt_ctx);
+
+	full_path = build_unc_path_to_root(ctx, cifs_sb, true);
+	if (IS_ERR(full_path))
+		return PTR_ERR(full_path);
+
+	mnt_ctx->origin_fullpath = dfs_cache_canonical_path(ctx->UNC, cifs_sb->local_nls,
+							    cifs_remap(cifs_sb));
+	if (IS_ERR(mnt_ctx->origin_fullpath)) {
+		rc = PTR_ERR(mnt_ctx->origin_fullpath);
+		mnt_ctx->origin_fullpath = NULL;
+		goto out;
 	}
 
-	uuid_gen(&mount_id);
-	set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
-	do {
-		/* Save full path of last DFS path we used to resolve final target server */
-		kfree(full_path);
-		full_path = build_unc_path_to_root(ctx, cifs_sb, !!count);
-		if (IS_ERR(full_path)) {
-			rc = PTR_ERR(full_path);
-			full_path = NULL;
+	/* Try all dfs root targets */
+	for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(root_tl);
+	     tit; tit = dfs_cache_get_next_tgt(root_tl, tit)) {
+		rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->origin_fullpath + 1, tit);
+		if (!rc) {
+			mnt_ctx->leaf_fullpath = kstrdup(mnt_ctx->origin_fullpath, GFP_KERNEL);
+			if (!mnt_ctx->leaf_fullpath)
+				rc = -ENOMEM;
 			break;
 		}
-		/* Chase referral */
-		oldmnt = cifs_sb->ctx->mount_options;
-		rc = expand_dfs_referral(xid, root_ses, ctx, cifs_sb, ref_path + 1);
-		if (rc)
-			break;
-		/* Connect to new DFS target only if we were redirected */
-		if (oldmnt != cifs_sb->ctx->mount_options) {
-			mount_put_conns(cifs_sb, xid, server, ses, tcon);
-			rc = mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
-		}
-		if (rc && !server && !ses) {
-			/* Failed to connect. Try to connect to other targets in the referral. */
-			rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, ctx, root_ses, &xid,
-					     &server, &ses, &tcon);
-		}
-		if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses)
-			break;
-		if (!tcon)
-			continue;
+	}
 
-		/* Make sure that requests go through new root servers */
-		rc = is_referral_server(ref_path + 1, cifs_sb, tcon, &ref_server);
-		if (rc)
-			break;
-		if (ref_server)
-			set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
-
-		/* Get next dfs path and then continue chasing them if -EREMOTE */
-		rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
-		/* Prevent recursion on broken link referrals */
-		if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
-			rc = -ELOOP;
-	} while (rc == -EREMOTE);
-
-	if (rc || !tcon || !ses)
-		goto error;
-
-	kfree(ref_path);
-	/*
-	 * Store DFS full path in both superblock and tree connect structures.
-	 *
-	 * For DFS root mounts, the prefix path (cifs_sb->prepath) is preserved during reconnect so
-	 * only the root path is set in cifs_sb->origin_fullpath and tcon->dfs_path. And for DFS
-	 * links, the prefix path is included in both and may be changed during reconnect.  See
-	 * cifs_tree_connect().
-	 */
-	ref_path = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
+out:
 	kfree(full_path);
-	full_path = NULL;
+	return rc;
+}
 
-	if (IS_ERR(ref_path)) {
-		rc = PTR_ERR(ref_path);
-		ref_path = NULL;
-		goto error;
+static int __follow_dfs_link(struct mount_ctx *mnt_ctx)
+{
+	int rc;
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+	char *full_path;
+	struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+	struct dfs_cache_tgt_iterator *tit;
+
+	full_path = build_unc_path_to_root(ctx, cifs_sb, true);
+	if (IS_ERR(full_path))
+		return PTR_ERR(full_path);
+
+	kfree(mnt_ctx->leaf_fullpath);
+	mnt_ctx->leaf_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls,
+							  cifs_remap(cifs_sb));
+	if (IS_ERR(mnt_ctx->leaf_fullpath)) {
+		rc = PTR_ERR(mnt_ctx->leaf_fullpath);
+		mnt_ctx->leaf_fullpath = NULL;
+		goto out;
 	}
-	cifs_sb->origin_fullpath = ref_path;
 
-	ref_path = kstrdup(cifs_sb->origin_fullpath, GFP_KERNEL);
-	if (!ref_path) {
-		rc = -ENOMEM;
-		goto error;
+	/* Get referral from dfs link */
+	rc = dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
+			    cifs_remap(cifs_sb), mnt_ctx->leaf_fullpath + 1, NULL, &tl);
+	if (rc)
+		goto out;
+
+	/* Try all dfs link targets */
+	for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl);
+	     tit; tit = dfs_cache_get_next_tgt(&tl, tit)) {
+		rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit);
+		if (!rc) {
+			rc = is_path_remote(mnt_ctx);
+			break;
+		}
 	}
-	spin_lock(&cifs_tcp_ses_lock);
-	tcon->dfs_path = ref_path;
-	ref_path = NULL;
-	spin_unlock(&cifs_tcp_ses_lock);
 
+out:
+	kfree(full_path);
+	dfs_cache_free_tgts(&tl);
+	return rc;
+}
+
+static int follow_dfs_link(struct mount_ctx *mnt_ctx)
+{
+	int rc;
+	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+	char *full_path;
+	int num_links = 0;
+
+	full_path = build_unc_path_to_root(ctx, cifs_sb, true);
+	if (IS_ERR(full_path))
+		return PTR_ERR(full_path);
+
+	kfree(mnt_ctx->origin_fullpath);
+	mnt_ctx->origin_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls,
+							    cifs_remap(cifs_sb));
+	kfree(full_path);
+
+	if (IS_ERR(mnt_ctx->origin_fullpath)) {
+		rc = PTR_ERR(mnt_ctx->origin_fullpath);
+		mnt_ctx->origin_fullpath = NULL;
+		return rc;
+	}
+
+	do {
+		rc = __follow_dfs_link(mnt_ctx);
+		if (!rc || rc != -EREMOTE)
+			break;
+	} while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS);
+
+	return rc;
+}
+
+/* Set up DFS referral paths for failover */
+static void setup_server_referral_paths(struct mount_ctx *mnt_ctx)
+{
+	struct TCP_Server_Info *server = mnt_ctx->server;
+
+	server->origin_fullpath = mnt_ctx->origin_fullpath;
+	server->leaf_fullpath = mnt_ctx->leaf_fullpath;
+	server->current_fullpath = mnt_ctx->leaf_fullpath;
+	mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL;
+}
+
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
+{
+	int rc;
+	struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, };
+	struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+	bool isdfs;
+
+	rc = is_dfs_mount(&mnt_ctx, &isdfs, &tl);
+	if (rc)
+		goto error;
+	if (!isdfs)
+		goto out;
+
+	uuid_gen(&mnt_ctx.mount_id);
+	rc = connect_dfs_root(&mnt_ctx, &tl);
+	dfs_cache_free_tgts(&tl);
+
+	if (rc)
+		goto error;
+
+	rc = is_path_remote(&mnt_ctx);
+	if (rc == -EREMOTE)
+		rc = follow_dfs_link(&mnt_ctx);
+	if (rc)
+		goto error;
+
+	setup_server_referral_paths(&mnt_ctx);
 	/*
-	 * After reconnecting to a different server, unique ids won't
-	 * match anymore, so we disable serverino. This prevents
-	 * dentry revalidation to think the dentry are stale (ESTALE).
+	 * After reconnecting to a different server, unique ids won't match anymore, so we disable
+	 * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
 	 */
 	cifs_autodisable_serverino(cifs_sb);
 	/*
-	 * Force the use of prefix path to support failover on DFS paths that
-	 * resolve to targets that have different prefix paths.
+	 * Force the use of prefix path to support failover on DFS paths that resolve to targets
+	 * that have different prefix paths.
 	 */
 	cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
 	kfree(cifs_sb->prepath);
 	cifs_sb->prepath = ctx->prepath;
 	ctx->prepath = NULL;
-	uuid_copy(&cifs_sb->dfs_mount_id, &mount_id);
+	uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id);
 
 out:
-	free_xid(xid);
-	cifs_try_adding_channels(cifs_sb, ses);
-	return mount_setup_tlink(cifs_sb, ses, tcon);
+	free_xid(mnt_ctx.xid);
+	cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
+	return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
 
 error:
-	kfree(ref_path);
-	kfree(full_path);
-	kfree(cifs_sb->origin_fullpath);
-	dfs_cache_put_refsrv_sessions(&mount_id);
-	mount_put_conns(cifs_sb, xid, server, ses, tcon);
+	dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id);
+	kfree(mnt_ctx.origin_fullpath);
+	kfree(mnt_ctx.leaf_fullpath);
+	mount_put_conns(&mnt_ctx);
 	return rc;
 }
 #else
 int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
 	int rc = 0;
-	unsigned int xid;
-	struct cifs_ses *ses;
-	struct cifs_tcon *tcon;
-	struct TCP_Server_Info *server;
+	struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, };
 
-	rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+	rc = mount_get_conns(&mnt_ctx);
 	if (rc)
 		goto error;
 
-	if (tcon) {
-		rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+	if (mnt_ctx.tcon) {
+		rc = is_path_remote(&mnt_ctx);
 		if (rc == -EREMOTE)
 			rc = -EOPNOTSUPP;
 		if (rc)
 			goto error;
 	}
 
-	free_xid(xid);
-
-	return mount_setup_tlink(cifs_sb, ses, tcon);
+	free_xid(mnt_ctx.xid);
+	return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
 
 error:
-	mount_put_conns(cifs_sb, xid, server, ses, tcon);
+	mount_put_conns(&mnt_ctx);
 	return rc;
 }
 #endif
@@ -3818,7 +3788,6 @@
 	kfree(cifs_sb->prepath);
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id);
-	kfree(cifs_sb->origin_fullpath);
 #endif
 	call_rcu(&cifs_sb->rcu, delayed_free);
 }
@@ -4145,104 +4114,234 @@
 }
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
+/* Update dfs referral path of superblock */
+static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb,
+				  const char *target)
+{
+	int rc = 0;
+	size_t len = strlen(target);
+	char *refpath, *npath;
+
+	if (unlikely(len < 2 || *target != '\\'))
+		return -EINVAL;
+
+	if (target[1] == '\\') {
+		len += 1;
+		refpath = kmalloc(len, GFP_KERNEL);
+		if (!refpath)
+			return -ENOMEM;
+
+		scnprintf(refpath, len, "%s", target);
+	} else {
+		len += sizeof("\\");
+		refpath = kmalloc(len, GFP_KERNEL);
+		if (!refpath)
+			return -ENOMEM;
+
+		scnprintf(refpath, len, "\\%s", target);
+	}
+
+	npath = dfs_cache_canonical_path(refpath, cifs_sb->local_nls, cifs_remap(cifs_sb));
+	kfree(refpath);
+
+	if (IS_ERR(npath)) {
+		rc = PTR_ERR(npath);
+	} else {
+		mutex_lock(&server->refpath_lock);
+		kfree(server->leaf_fullpath);
+		server->leaf_fullpath = npath;
+		mutex_unlock(&server->refpath_lock);
+		server->current_fullpath = server->leaf_fullpath;
+	}
+	return rc;
+}
+
+static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host,
+				       size_t tcp_host_len, char *share, bool *target_match)
+{
+	int rc = 0;
+	const char *dfs_host;
+	size_t dfs_host_len;
+
+	*target_match = true;
+	extract_unc_hostname(share, &dfs_host, &dfs_host_len);
+
+	/* Check if hostnames or addresses match */
+	if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
+		cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
+			 dfs_host, (int)tcp_host_len, tcp_host);
+		rc = match_target_ip(server, dfs_host, dfs_host_len, target_match);
+		if (rc)
+			cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
+	}
+	return rc;
+}
+
+static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon,
+				     struct cifs_sb_info *cifs_sb, char *tree, bool islink,
+				     struct dfs_cache_tgt_list *tl)
+{
+	int rc;
+	struct TCP_Server_Info *server = tcon->ses->server;
+	const struct smb_version_operations *ops = server->ops;
+	struct cifs_tcon *ipc = tcon->ses->tcon_ipc;
+	char *share = NULL, *prefix = NULL;
+	const char *tcp_host;
+	size_t tcp_host_len;
+	struct dfs_cache_tgt_iterator *tit;
+	bool target_match;
+
+	extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
+
+	tit = dfs_cache_get_tgt_iterator(tl);
+	if (!tit) {
+		rc = -ENOENT;
+		goto out;
+	}
+
+	/* Try to tree connect to all dfs targets */
+	for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) {
+		const char *target = dfs_cache_get_tgt_name(tit);
+		struct dfs_cache_tgt_list ntl = DFS_CACHE_TGT_LIST_INIT(ntl);
+
+		kfree(share);
+		kfree(prefix);
+		share = prefix = NULL;
+
+		/* Check if share matches with tcp ses */
+		rc = dfs_cache_get_tgt_share(server->current_fullpath + 1, tit, &share, &prefix);
+		if (rc) {
+			cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc);
+			break;
+		}
+
+		rc = target_share_matches_server(server, tcp_host, tcp_host_len, share,
+						 &target_match);
+		if (rc)
+			break;
+		if (!target_match) {
+			rc = -EHOSTUNREACH;
+			continue;
+		}
+
+		if (ipc->need_reconnect) {
+			scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+			rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls);
+			if (rc)
+				break;
+		}
+
+		scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
+		if (!islink) {
+			rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls);
+			break;
+		}
+		/*
+		 * If no dfs referrals were returned from link target, then just do a TREE_CONNECT
+		 * to it.  Otherwise, cache the dfs referral and then mark current tcp ses for
+		 * reconnect so either the demultiplex thread or the echo worker will reconnect to
+		 * newly resolved target.
+		 */
+		if (dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), target,
+				   NULL, &ntl)) {
+			rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls);
+			if (rc)
+				continue;
+			rc = dfs_cache_noreq_update_tgthint(server->current_fullpath + 1, tit);
+			if (!rc)
+				rc = cifs_update_super_prepath(cifs_sb, prefix);
+		} else {
+			/* Target is another dfs share */
+			rc = update_server_fullpath(server, cifs_sb, target);
+			dfs_cache_free_tgts(tl);
+
+			if (!rc) {
+				rc = -EREMOTE;
+				list_replace_init(&ntl.tl_list, &tl->tl_list);
+			} else
+				dfs_cache_free_tgts(&ntl);
+		}
+		break;
+	}
+
+out:
+	kfree(share);
+	kfree(prefix);
+
+	return rc;
+}
+
+static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon,
+				   struct cifs_sb_info *cifs_sb, char *tree, bool islink,
+				   struct dfs_cache_tgt_list *tl)
+{
+	int rc;
+	int num_links = 0;
+	struct TCP_Server_Info *server = tcon->ses->server;
+
+	do {
+		rc = __tree_connect_dfs_target(xid, tcon, cifs_sb, tree, islink, tl);
+		if (!rc || rc != -EREMOTE)
+			break;
+	} while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS);
+	/*
+	 * If we couldn't tree connect to any targets from last referral path, then retry from
+	 * original referral path.
+	 */
+	if (rc && server->current_fullpath != server->origin_fullpath) {
+		server->current_fullpath = server->origin_fullpath;
+		cifs_ses_mark_for_reconnect(tcon->ses);
+	}
+
+	dfs_cache_free_tgts(tl);
+	return rc;
+}
+
 int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
 {
 	int rc;
 	struct TCP_Server_Info *server = tcon->ses->server;
 	const struct smb_version_operations *ops = server->ops;
-	struct dfs_cache_tgt_list tl;
-	struct dfs_cache_tgt_iterator *it = NULL;
+	struct super_block *sb = NULL;
+	struct cifs_sb_info *cifs_sb;
+	struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
 	char *tree;
-	const char *tcp_host;
-	size_t tcp_host_len;
-	const char *dfs_host;
-	size_t dfs_host_len;
-	char *share = NULL, *prefix = NULL;
 	struct dfs_info3_param ref = {0};
-	bool isroot;
 
 	tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
 	if (!tree)
 		return -ENOMEM;
 
-	/* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
-	if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) {
-		if (tcon->ipc) {
-			scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
-			rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
-		} else {
-			rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
-		}
+	if (tcon->ipc) {
+		scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+		rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
 		goto out;
 	}
 
-	isroot = ref.server_type == DFS_TYPE_ROOT;
+	sb = cifs_get_tcp_super(server);
+	if (IS_ERR(sb)) {
+		rc = PTR_ERR(sb);
+		cifs_dbg(VFS, "%s: could not find superblock: %d\n", __func__, rc);
+		goto out;
+	}
+
+	cifs_sb = CIFS_SB(sb);
+
+	/* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
+	if (!server->current_fullpath ||
+	    dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) {
+		rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, cifs_sb->local_nls);
+		goto out;
+	}
+
+	rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, ref.server_type == DFS_TYPE_LINK,
+				     &tl);
 	free_dfs_info_param(&ref);
 
-	extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
-
-	for (it = dfs_cache_get_tgt_iterator(&tl); it; it = dfs_cache_get_next_tgt(&tl, it)) {
-		bool target_match;
-
-		kfree(share);
-		kfree(prefix);
-		share = NULL;
-		prefix = NULL;
-
-		rc = dfs_cache_get_tgt_share(tcon->dfs_path + 1, it, &share, &prefix);
-		if (rc) {
-			cifs_dbg(VFS, "%s: failed to parse target share %d\n",
-				 __func__, rc);
-			continue;
-		}
-
-		extract_unc_hostname(share, &dfs_host, &dfs_host_len);
-
-		if (dfs_host_len != tcp_host_len
-		    || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
-			cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
-				 dfs_host, (int)tcp_host_len, tcp_host);
-
-			rc = match_target_ip(server, dfs_host, dfs_host_len, &target_match);
-			if (rc) {
-				cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
-				break;
-			}
-
-			if (!target_match) {
-				cifs_dbg(FYI, "%s: skipping target\n", __func__);
-				continue;
-			}
-		}
-
-		if (tcon->ipc) {
-			scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", share);
-			rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
-		} else {
-			scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
-			rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
-			/* Only handle prefix paths of DFS link targets */
-			if (!rc && !isroot) {
-				rc = update_super_prepath(tcon, prefix);
-				break;
-			}
-		}
-		if (rc == -EREMOTE)
-			break;
-	}
-
-	kfree(share);
-	kfree(prefix);
-
-	if (!rc) {
-		if (it)
-			rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1, it);
-		else
-			rc = -ENOENT;
-	}
-	dfs_cache_free_tgts(&tl);
 out:
 	kfree(tree);
+	cifs_put_tcp_super(sb);
+
 	return rc;
 }
 #else
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 2837455..e9b0fa2 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -283,7 +283,7 @@
 			seq_printf(m,
 				   "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
 				   ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
-				   ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags,
+				   ce->ttl, ce->etime.tv_nsec, ce->hdr_flags, ce->ref_flags,
 				   IS_DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no",
 				   ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no");
 
@@ -1355,18 +1355,13 @@
 	}
 
 	cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
-	for (i = 0; i < tcon->ses->chan_count; i++) {
-		spin_lock(&GlobalMid_Lock);
-		if (tcon->ses->chans[i].server->tcpStatus != CifsExiting)
-			tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
-		spin_unlock(&GlobalMid_Lock);
-	}
+	cifs_ses_mark_for_reconnect(tcon->ses);
 }
 
 /* Refresh dfs referral of tcon and mark it for reconnect if needed */
-static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct cifs_tcon *tcon,
+			  bool force_refresh)
 {
-	const char *path = tcon->dfs_path + 1;
 	struct cifs_ses *ses;
 	struct cache_entry *ce;
 	struct dfs_info3_param *refs = NULL;
@@ -1422,6 +1417,20 @@
 	return rc;
 }
 
+static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+{
+	struct TCP_Server_Info *server = tcon->ses->server;
+
+	mutex_lock(&server->refpath_lock);
+	if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
+		__refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh);
+	mutex_unlock(&server->refpath_lock);
+
+	__refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh);
+
+	return 0;
+}
+
 /**
  * dfs_cache_remount_fs - remount a DFS share
  *
@@ -1435,6 +1444,7 @@
 int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
 {
 	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	struct mount_group *mg;
 	struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
 	int rc;
@@ -1443,13 +1453,15 @@
 		return -EINVAL;
 
 	tcon = cifs_sb_master_tcon(cifs_sb);
-	if (!tcon->dfs_path) {
-		cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__);
+	server = tcon->ses->server;
+
+	if (!server->origin_fullpath) {
+		cifs_dbg(FYI, "%s: not a dfs mount\n", __func__);
 		return 0;
 	}
 
 	if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
-		cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__);
+		cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1457,7 +1469,7 @@
 	mg = find_mount_group_locked(&cifs_sb->dfs_mount_id);
 	if (IS_ERR(mg)) {
 		mutex_unlock(&mount_group_list_lock);
-		cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__);
+		cifs_dbg(FYI, "%s: no ipc session for refreshing referral\n", __func__);
 		return PTR_ERR(mg);
 	}
 	kref_get(&mg->refcount);
@@ -1498,9 +1510,12 @@
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+		if (!server->is_dfs_conn)
+			continue;
+
 		list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
 			list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
-				if (tcon->dfs_path) {
+				if (!tcon->ipc && !tcon->need_reconnect) {
 					tcon->tc_count++;
 					list_add_tail(&tcon->ulist, &tcons);
 				}
@@ -1510,8 +1525,16 @@
 	spin_unlock(&cifs_tcp_ses_lock);
 
 	list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
+		struct TCP_Server_Info *server = tcon->ses->server;
+
 		list_del_init(&tcon->ulist);
-		refresh_tcon(sessions, tcon, false);
+
+		mutex_lock(&server->refpath_lock);
+		if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
+			__refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false);
+		mutex_unlock(&server->refpath_lock);
+
+		__refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false);
 		cifs_put_tcon(tcon);
 	}
 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1b855fc..9fee3af 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2692,12 +2692,23 @@
 	tcon = tlink_tcon(smbfile->tlink);
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
 		server = tcon->ses->server;
-		if (server->ops->flush)
-			rc = server->ops->flush(xid, tcon, &smbfile->fid);
-		else
+		if (server->ops->flush == NULL) {
 			rc = -ENOSYS;
+			goto strict_fsync_exit;
+		}
+
+		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
+			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
+			if (smbfile) {
+				rc = server->ops->flush(xid, tcon, &smbfile->fid);
+				cifsFileInfo_put(smbfile);
+			} else
+				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
+		} else
+			rc = server->ops->flush(xid, tcon, &smbfile->fid);
 	}
 
+strict_fsync_exit:
 	free_xid(xid);
 	return rc;
 }
@@ -2709,6 +2720,7 @@
 	struct cifs_tcon *tcon;
 	struct TCP_Server_Info *server;
 	struct cifsFileInfo *smbfile = file->private_data;
+	struct inode *inode = file_inode(file);
 	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
 
 	rc = file_write_and_wait_range(file, start, end);
@@ -2725,12 +2737,23 @@
 	tcon = tlink_tcon(smbfile->tlink);
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
 		server = tcon->ses->server;
-		if (server->ops->flush)
-			rc = server->ops->flush(xid, tcon, &smbfile->fid);
-		else
+		if (server->ops->flush == NULL) {
 			rc = -ENOSYS;
+			goto fsync_exit;
+		}
+
+		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
+			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
+			if (smbfile) {
+				rc = server->ops->flush(xid, tcon, &smbfile->fid);
+				cifsFileInfo_put(smbfile);
+			} else
+				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
+		} else
+			rc = server->ops->flush(xid, tcon, &smbfile->fid);
 	}
 
+fsync_exit:
 	free_xid(xid);
 	return rc;
 }
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 38d96a4..6a179ae 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -308,7 +308,9 @@
 	new_ctx->nodename = NULL;
 	new_ctx->username = NULL;
 	new_ctx->password = NULL;
+	new_ctx->server_hostname = NULL;
 	new_ctx->domainname = NULL;
+	new_ctx->workstation_name = NULL;
 	new_ctx->UNC = NULL;
 	new_ctx->source = NULL;
 	new_ctx->iocharset = NULL;
@@ -323,6 +325,7 @@
 	DUP_CTX_STR(UNC);
 	DUP_CTX_STR(source);
 	DUP_CTX_STR(domainname);
+	DUP_CTX_STR(workstation_name);
 	DUP_CTX_STR(nodename);
 	DUP_CTX_STR(iocharset);
 
@@ -459,6 +462,7 @@
 		return -EINVAL;
 
 	/* record the server hostname */
+	kfree(ctx->server_hostname);
 	ctx->server_hostname = kstrndup(devname + 2, pos - devname - 2, GFP_KERNEL);
 	if (!ctx->server_hostname)
 		return -ENOMEM;
@@ -720,6 +724,11 @@
 		cifs_errorf(fc, "can not change domainname during remount\n");
 		return -EINVAL;
 	}
+	if (new_ctx->workstation_name &&
+	    (!old_ctx->workstation_name || strcmp(new_ctx->workstation_name, old_ctx->workstation_name))) {
+		cifs_errorf(fc, "can not change workstation_name during remount\n");
+		return -EINVAL;
+	}
 	if (new_ctx->nodename &&
 	    (!old_ctx->nodename || strcmp(new_ctx->nodename, old_ctx->nodename))) {
 		cifs_errorf(fc, "can not change nodename during remount\n");
@@ -753,7 +762,8 @@
 		return rc;
 
 	/*
-	 * We can not change UNC/username/password/domainname/nodename/iocharset
+	 * We can not change UNC/username/password/domainname/
+	 * workstation_name/nodename/iocharset
 	 * during reconnect so ignore what we have in the new context and
 	 * just use what we already have in cifs_sb->ctx.
 	 */
@@ -762,6 +772,7 @@
 	STEAL_STRING(cifs_sb, ctx, username);
 	STEAL_STRING(cifs_sb, ctx, password);
 	STEAL_STRING(cifs_sb, ctx, domainname);
+	STEAL_STRING(cifs_sb, ctx, workstation_name);
 	STEAL_STRING(cifs_sb, ctx, nodename);
 	STEAL_STRING(cifs_sb, ctx, iocharset);
 
@@ -1414,13 +1425,22 @@
 
 int smb3_init_fs_context(struct fs_context *fc)
 {
+	int rc;
 	struct smb3_fs_context *ctx;
 	char *nodename = utsname()->nodename;
 	int i;
 
 	ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
-	if (unlikely(!ctx))
-		return -ENOMEM;
+	if (unlikely(!ctx)) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
+
+	ctx->workstation_name = kstrdup(nodename, GFP_KERNEL);
+	if (unlikely(!ctx->workstation_name)) {
+		rc = -ENOMEM;
+		goto err_exit;
+	}
 
 	/*
 	 * does not have to be perfect mapping since field is
@@ -1493,6 +1513,14 @@
 	fc->fs_private = ctx;
 	fc->ops = &smb3_fs_context_ops;
 	return 0;
+
+err_exit:
+	if (ctx) {
+		kfree(ctx->workstation_name);
+		kfree(ctx);
+	}
+
+	return rc;
 }
 
 void
@@ -1518,6 +1546,8 @@
 	ctx->source = NULL;
 	kfree(ctx->domainname);
 	ctx->domainname = NULL;
+	kfree(ctx->workstation_name);
+	ctx->workstation_name = NULL;
 	kfree(ctx->nodename);
 	ctx->nodename = NULL;
 	kfree(ctx->iocharset);
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index b2d22cf..e54090d 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -170,6 +170,7 @@
 	char *server_hostname;
 	char *UNC;
 	char *nodename;
+	char *workstation_name;
 	char *iocharset;  /* local code page for mapping to and from Unicode */
 	char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
 	char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 8eedd20..7e409a3 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -87,6 +87,14 @@
 	char *sharename;
 	struct cifs_fscache_super_auxdata auxdata;
 
+	/*
+	 * Check if cookie was already initialized so don't reinitialize it.
+	 * In the future, as we integrate with newer fscache features,
+	 * we may want to instead add a check if cookie has changed
+	 */
+	if (tcon->fscache == NULL)
+		return;
+
 	sharename = extract_sharename(tcon->treeName);
 	if (IS_ERR(sharename)) {
 		cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index ba2c3e8..5148d48 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -75,6 +75,7 @@
 		INIT_LIST_HEAD(&ret_buf->tcon_list);
 		mutex_init(&ret_buf->session_mutex);
 		spin_lock_init(&ret_buf->iface_lock);
+		spin_lock_init(&ret_buf->chan_lock);
 	}
 	return ret_buf;
 }
@@ -94,6 +95,7 @@
 	kfree_sensitive(buf_to_free->password);
 	kfree(buf_to_free->user_name);
 	kfree(buf_to_free->domainName);
+	kfree(buf_to_free->workstation_name);
 	kfree_sensitive(buf_to_free->auth_key.response);
 	kfree(buf_to_free->iface_list);
 	kfree_sensitive(buf_to_free);
@@ -138,9 +140,6 @@
 	kfree(buf_to_free->nativeFileSystem);
 	kfree_sensitive(buf_to_free->password);
 	kfree(buf_to_free->crfid.fid);
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	kfree(buf_to_free->dfs_path);
-#endif
 	kfree(buf_to_free);
 }
 
@@ -1287,69 +1286,20 @@
 	return rc;
 }
 
-static void tcon_super_cb(struct super_block *sb, void *arg)
+int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
 {
-	struct super_cb_data *sd = arg;
-	struct cifs_tcon *tcon = sd->data;
-	struct cifs_sb_info *cifs_sb;
-
-	if (sd->sb)
-		return;
-
-	cifs_sb = CIFS_SB(sb);
-	if (tcon->dfs_path && cifs_sb->origin_fullpath &&
-	    !strcasecmp(tcon->dfs_path, cifs_sb->origin_fullpath))
-		sd->sb = sb;
-}
-
-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon)
-{
-	return __cifs_get_super(tcon_super_cb, tcon);
-}
-
-static inline void cifs_put_tcon_super(struct super_block *sb)
-{
-	__cifs_put_super(sb);
-}
-#else
-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void cifs_put_tcon_super(struct super_block *sb)
-{
-}
-#endif
-
-int update_super_prepath(struct cifs_tcon *tcon, char *prefix)
-{
-	struct super_block *sb;
-	struct cifs_sb_info *cifs_sb;
-	int rc = 0;
-
-	sb = cifs_get_tcon_super(tcon);
-	if (IS_ERR(sb))
-		return PTR_ERR(sb);
-
-	cifs_sb = CIFS_SB(sb);
-
 	kfree(cifs_sb->prepath);
 
 	if (prefix && *prefix) {
 		cifs_sb->prepath = kstrdup(prefix, GFP_ATOMIC);
-		if (!cifs_sb->prepath) {
-			rc = -ENOMEM;
-			goto out;
-		}
+		if (!cifs_sb->prepath)
+			return -ENOMEM;
 
 		convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb));
 	} else
 		cifs_sb->prepath = NULL;
 
 	cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
-
-out:
-	cifs_put_tcon_super(sb);
-	return rc;
+	return 0;
 }
+#endif
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 25a2b8e..fe707f4 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -119,7 +119,9 @@
  */
 
 int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
-void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
+int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
+				 struct cifs_ses *ses,
+				 const struct nls_table *nls_cp);
 int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
 			struct cifs_ses *ses,
 			const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 23e02db..af63548 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -54,42 +54,54 @@
 {
 	int i;
 
+	spin_lock(&ses->chan_lock);
 	for (i = 0; i < ses->chan_count; i++) {
-		if (is_server_using_iface(ses->chans[i].server, iface))
+		if (is_server_using_iface(ses->chans[i].server, iface)) {
+			spin_unlock(&ses->chan_lock);
 			return true;
+		}
 	}
+	spin_unlock(&ses->chan_lock);
 	return false;
 }
 
 /* returns number of channels added */
 int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 {
-	int old_chan_count = ses->chan_count;
-	int left = ses->chan_max - ses->chan_count;
+	int old_chan_count, new_chan_count;
+	int left;
 	int i = 0;
 	int rc = 0;
 	int tries = 0;
 	struct cifs_server_iface *ifaces = NULL;
 	size_t iface_count;
 
-	if (left <= 0) {
-		cifs_dbg(FYI,
-			 "ses already at max_channels (%zu), nothing to open\n",
-			 ses->chan_max);
-		return 0;
-	}
-
 	if (ses->server->dialect < SMB30_PROT_ID) {
 		cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n");
 		return 0;
 	}
 
-	if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
-		cifs_dbg(VFS, "server %s does not support multichannel\n", ses->server->hostname);
-		ses->chan_max = 1;
+	spin_lock(&ses->chan_lock);
+
+	new_chan_count = old_chan_count = ses->chan_count;
+	left = ses->chan_max - ses->chan_count;
+
+	if (left <= 0) {
+		cifs_dbg(FYI,
+			 "ses already at max_channels (%zu), nothing to open\n",
+			 ses->chan_max);
+		spin_unlock(&ses->chan_lock);
 		return 0;
 	}
 
+	if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+		ses->chan_max = 1;
+		spin_unlock(&ses->chan_lock);
+		cifs_dbg(VFS, "server %s does not support multichannel\n", ses->server->hostname);
+		return 0;
+	}
+	spin_unlock(&ses->chan_lock);
+
 	/*
 	 * Make a copy of the iface list at the time and use that
 	 * instead so as to not hold the iface spinlock for opening
@@ -142,10 +154,11 @@
 		cifs_dbg(FYI, "successfully opened new channel on iface#%d\n",
 			 i);
 		left--;
+		new_chan_count++;
 	}
 
 	kfree(ifaces);
-	return ses->chan_count - old_chan_count;
+	return new_chan_count - old_chan_count;
 }
 
 /*
@@ -157,10 +170,14 @@
 {
 	int i;
 
+	spin_lock(&ses->chan_lock);
 	for (i = 0; i < ses->chan_count; i++) {
-		if (ses->chans[i].server == server)
+		if (ses->chans[i].server == server) {
+			spin_unlock(&ses->chan_lock);
 			return &ses->chans[i];
+		}
 	}
+	spin_unlock(&ses->chan_lock);
 	return NULL;
 }
 
@@ -168,6 +185,7 @@
 cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 		     struct cifs_server_iface *iface)
 {
+	struct TCP_Server_Info *chan_server;
 	struct cifs_chan *chan;
 	struct smb3_fs_context ctx = {NULL};
 	static const char unc_fmt[] = "\\%s\\foo";
@@ -204,6 +222,7 @@
 	/* Auth */
 	ctx.domainauto = ses->domainAuto;
 	ctx.domainname = ses->domainName;
+	ctx.server_hostname = ses->server->hostname;
 	ctx.username = ses->user_name;
 	ctx.password = ses->password;
 	ctx.sectype = ses->sectype;
@@ -240,18 +259,19 @@
 	       SMB2_CLIENT_GUID_SIZE);
 	ctx.use_client_guid = true;
 
-	mutex_lock(&ses->session_mutex);
+	chan_server = cifs_get_tcp_session(&ctx, ses->server);
 
+	mutex_lock(&ses->session_mutex);
+	spin_lock(&ses->chan_lock);
 	chan = ses->binding_chan = &ses->chans[ses->chan_count];
-	chan->server = cifs_get_tcp_session(&ctx);
+	chan->server = chan_server;
 	if (IS_ERR(chan->server)) {
 		rc = PTR_ERR(chan->server);
 		chan->server = NULL;
+		spin_unlock(&ses->chan_lock);
 		goto out;
 	}
-	spin_lock(&cifs_tcp_ses_lock);
-	chan->server->is_channel = true;
-	spin_unlock(&cifs_tcp_ses_lock);
+	spin_unlock(&ses->chan_lock);
 
 	/*
 	 * We need to allocate the server crypto now as we will need
@@ -283,8 +303,11 @@
 	 * ses to the new server.
 	 */
 
+	spin_lock(&ses->chan_lock);
 	ses->chan_count++;
 	atomic_set(&ses->chan_seq, 0);
+	spin_unlock(&ses->chan_lock);
+
 out:
 	ses->binding = false;
 	ses->binding_chan = NULL;
@@ -296,6 +319,19 @@
 	return rc;
 }
 
+/* Mark all session channels for reconnect */
+void cifs_ses_mark_for_reconnect(struct cifs_ses *ses)
+{
+	int i;
+
+	for (i = 0; i < ses->chan_count; i++) {
+		spin_lock(&GlobalMid_Lock);
+		if (ses->chans[i].server->tcpStatus != CifsExiting)
+			ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+		spin_unlock(&GlobalMid_Lock);
+	}
+}
+
 static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 {
 	__u32 capabilities = 0;
@@ -599,18 +635,85 @@
 	return 0;
 }
 
+static int size_of_ntlmssp_blob(struct cifs_ses *ses, int base_size)
+{
+	int sz = base_size + ses->auth_key.len
+		- CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+	if (ses->domainName)
+		sz += sizeof(__le16) * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+	else
+		sz += sizeof(__le16);
+
+	if (ses->user_name)
+		sz += sizeof(__le16) * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+	else
+		sz += sizeof(__le16);
+
+	sz += sizeof(__le16) * strnlen(ses->workstation_name, CIFS_MAX_WORKSTATION_LEN);
+
+	return sz;
+}
+
+static inline void cifs_security_buffer_from_str(SECURITY_BUFFER *pbuf,
+						 char *str_value,
+						 int str_length,
+						 unsigned char *pstart,
+						 unsigned char **pcur,
+						 const struct nls_table *nls_cp)
+{
+	unsigned char *tmp = pstart;
+	int len;
+
+	if (!pbuf)
+		return;
+
+	if (!pcur)
+		pcur = &tmp;
+
+	if (!str_value) {
+		pbuf->BufferOffset = cpu_to_le32(*pcur - pstart);
+		pbuf->Length = 0;
+		pbuf->MaximumLength = 0;
+		*pcur += sizeof(__le16);
+	} else {
+		len = cifs_strtoUTF16((__le16 *)*pcur,
+				      str_value,
+				      str_length,
+				      nls_cp);
+		len *= sizeof(__le16);
+		pbuf->BufferOffset = cpu_to_le32(*pcur - pstart);
+		pbuf->Length = cpu_to_le16(len);
+		pbuf->MaximumLength = cpu_to_le16(len);
+		*pcur += len;
+	}
+}
+
 /* BB Move to ntlmssp.c eventually */
 
-/* We do not malloc the blob, it is passed in pbuffer, because
-   it is fixed size, and small, making this approach cleaner */
-void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
-					 struct cifs_ses *ses)
+int build_ntlmssp_negotiate_blob(unsigned char **pbuffer,
+				 u16 *buflen,
+				 struct cifs_ses *ses,
+				 const struct nls_table *nls_cp)
 {
+	int rc = 0;
 	struct TCP_Server_Info *server = cifs_ses_server(ses);
-	NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
+	NEGOTIATE_MESSAGE *sec_blob;
 	__u32 flags;
+	unsigned char *tmp;
+	int len;
 
-	memset(pbuffer, 0, sizeof(NEGOTIATE_MESSAGE));
+	len = size_of_ntlmssp_blob(ses, sizeof(NEGOTIATE_MESSAGE));
+	*pbuffer = kmalloc(len, GFP_KERNEL);
+	if (!*pbuffer) {
+		rc = -ENOMEM;
+		cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
+		*buflen = 0;
+		goto setup_ntlm_neg_ret;
+	}
+	sec_blob = (NEGOTIATE_MESSAGE *)*pbuffer;
+
+	memset(*pbuffer, 0, sizeof(NEGOTIATE_MESSAGE));
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
 	sec_blob->MessageType = NtLmNegotiate;
 
@@ -624,34 +727,25 @@
 	if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
 		flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
 
+	tmp = *pbuffer + sizeof(NEGOTIATE_MESSAGE);
 	sec_blob->NegotiateFlags = cpu_to_le32(flags);
 
-	sec_blob->WorkstationName.BufferOffset = 0;
-	sec_blob->WorkstationName.Length = 0;
-	sec_blob->WorkstationName.MaximumLength = 0;
+	/* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */
+	cifs_security_buffer_from_str(&sec_blob->DomainName,
+				      NULL,
+				      CIFS_MAX_DOMAINNAME_LEN,
+				      *pbuffer, &tmp,
+				      nls_cp);
 
-	/* Domain name is sent on the Challenge not Negotiate NTLMSSP request */
-	sec_blob->DomainName.BufferOffset = 0;
-	sec_blob->DomainName.Length = 0;
-	sec_blob->DomainName.MaximumLength = 0;
-}
+	cifs_security_buffer_from_str(&sec_blob->WorkstationName,
+				      NULL,
+				      CIFS_MAX_WORKSTATION_LEN,
+				      *pbuffer, &tmp,
+				      nls_cp);
 
-static int size_of_ntlmssp_blob(struct cifs_ses *ses)
-{
-	int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
-		- CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
-
-	if (ses->domainName)
-		sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
-	else
-		sz += 2;
-
-	if (ses->user_name)
-		sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
-	else
-		sz += 2;
-
-	return sz;
+	*buflen = tmp - *pbuffer;
+setup_ntlm_neg_ret:
+	return rc;
 }
 
 int build_ntlmssp_auth_blob(unsigned char **pbuffer,
@@ -663,6 +757,7 @@
 	AUTHENTICATE_MESSAGE *sec_blob;
 	__u32 flags;
 	unsigned char *tmp;
+	int len;
 
 	rc = setup_ntlmv2_rsp(ses, nls_cp);
 	if (rc) {
@@ -670,7 +765,9 @@
 		*buflen = 0;
 		goto setup_ntlmv2_ret;
 	}
-	*pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+
+	len = size_of_ntlmssp_blob(ses, sizeof(AUTHENTICATE_MESSAGE));
+	*pbuffer = kmalloc(len, GFP_KERNEL);
 	if (!*pbuffer) {
 		rc = -ENOMEM;
 		cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
@@ -686,7 +783,7 @@
 		NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
 		NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
 		NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
-		NTLMSSP_NEGOTIATE_SEAL;
+		NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
 	if (ses->server->sign)
 		flags |= NTLMSSP_NEGOTIATE_SIGN;
 	if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
@@ -719,42 +816,23 @@
 		sec_blob->NtChallengeResponse.MaximumLength = 0;
 	}
 
-	if (ses->domainName == NULL) {
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-		sec_blob->DomainName.Length = 0;
-		sec_blob->DomainName.MaximumLength = 0;
-		tmp += 2;
-	} else {
-		int len;
-		len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
-				      CIFS_MAX_DOMAINNAME_LEN, nls_cp);
-		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-		sec_blob->DomainName.Length = cpu_to_le16(len);
-		sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
-		tmp += len;
-	}
+	cifs_security_buffer_from_str(&sec_blob->DomainName,
+				      ses->domainName,
+				      CIFS_MAX_DOMAINNAME_LEN,
+				      *pbuffer, &tmp,
+				      nls_cp);
 
-	if (ses->user_name == NULL) {
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-		sec_blob->UserName.Length = 0;
-		sec_blob->UserName.MaximumLength = 0;
-		tmp += 2;
-	} else {
-		int len;
-		len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
-				      CIFS_MAX_USERNAME_LEN, nls_cp);
-		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-		sec_blob->UserName.Length = cpu_to_le16(len);
-		sec_blob->UserName.MaximumLength = cpu_to_le16(len);
-		tmp += len;
-	}
+	cifs_security_buffer_from_str(&sec_blob->UserName,
+				      ses->user_name,
+				      CIFS_MAX_USERNAME_LEN,
+				      *pbuffer, &tmp,
+				      nls_cp);
 
-	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-	sec_blob->WorkstationName.Length = 0;
-	sec_blob->WorkstationName.MaximumLength = 0;
-	tmp += 2;
+	cifs_security_buffer_from_str(&sec_blob->WorkstationName,
+				      ses->workstation_name,
+				      CIFS_MAX_WORKSTATION_LEN,
+				      *pbuffer, &tmp,
+				      nls_cp);
 
 	if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) ||
 		(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
@@ -1230,6 +1308,7 @@
 	struct cifs_ses *ses = sess_data->ses;
 	__u16 bytes_remaining;
 	char *bcc_ptr;
+	unsigned char *ntlmsspblob = NULL;
 	u16 blob_len;
 
 	cifs_dbg(FYI, "rawntlmssp session setup negotiate phase\n");
@@ -1253,10 +1332,15 @@
 	pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
 
 	/* Build security blob before we assemble the request */
-	build_ntlmssp_negotiate_blob(pSMB->req.SecurityBlob, ses);
-	sess_data->iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
-	sess_data->iov[1].iov_base = pSMB->req.SecurityBlob;
-	pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
+	rc = build_ntlmssp_negotiate_blob(&ntlmsspblob,
+				     &blob_len, ses,
+				     sess_data->nls_cp);
+	if (rc)
+		goto out;
+
+	sess_data->iov[1].iov_len = blob_len;
+	sess_data->iov[1].iov_base = ntlmsspblob;
+	pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
 
 	rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
 	if (rc)
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 8297703..fe5bfa2 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -46,6 +46,10 @@
 	struct smb2_file_link_info link_info;
 };
 
+/*
+ * note: If cfile is passed, the reference to it is dropped here.
+ * So make sure that you do not reuse cfile after return from this func.
+ */
 static int
 smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
 		 struct cifs_sb_info *cifs_sb, const char *full_path,
@@ -536,10 +540,11 @@
 		create_options |= OPEN_REPARSE_POINT;
 
 		/* Failed on a symbolic link - query a reparse point info */
+		cifs_get_readable_path(tcon, full_path, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 				      FILE_READ_ATTRIBUTES, FILE_OPEN,
 				      create_options, ACL_NO_MODE,
-				      smb2_data, SMB2_OP_QUERY_INFO, NULL);
+				      smb2_data, SMB2_OP_QUERY_INFO, cfile);
 	}
 	if (rc)
 		goto out;
@@ -587,10 +592,11 @@
 		create_options |= OPEN_REPARSE_POINT;
 
 		/* Failed on a symbolic link - query a reparse point info */
+		cifs_get_readable_path(tcon, full_path, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 				      FILE_READ_ATTRIBUTES, FILE_OPEN,
 				      create_options, ACL_NO_MODE,
-				      smb2_data, SMB2_OP_POSIX_QUERY_INFO, NULL);
+				      smb2_data, SMB2_OP_POSIX_QUERY_INFO, cfile);
 	}
 	if (rc)
 		goto out;
@@ -707,10 +713,12 @@
 		   struct cifs_sb_info *cifs_sb, bool set_alloc)
 {
 	__le64 eof = cpu_to_le64(size);
+	struct cifsFileInfo *cfile;
 
+	cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
 	return smb2_compound_op(xid, tcon, cifs_sb, full_path,
 				FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE,
-				&eof, SMB2_OP_SET_EOF, NULL);
+				&eof, SMB2_OP_SET_EOF, cfile);
 }
 
 int
@@ -719,6 +727,8 @@
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct tcon_link *tlink;
+	struct cifs_tcon *tcon;
+	struct cifsFileInfo *cfile;
 	int rc;
 
 	if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
@@ -729,10 +739,12 @@
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
+	tcon = tlink_tcon(tlink);
 
-	rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path,
+	cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
+	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 			      FILE_WRITE_ATTRIBUTES, FILE_OPEN,
-			      0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, NULL);
+			      0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile);
 	cifs_put_tlink(tlink);
 	return rc;
 }
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 7acf71de..c5b1dea 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2844,6 +2844,7 @@
 	struct fsctl_get_dfs_referral_req *dfs_req = NULL;
 	struct get_dfs_referral_rsp *dfs_rsp = NULL;
 	u32 dfs_req_size = 0, dfs_rsp_size = 0;
+	int retry_count = 0;
 
 	cifs_dbg(FYI, "%s: path: %s\n", __func__, search_name);
 
@@ -2895,11 +2896,14 @@
 				true /* is_fsctl */,
 				(char *)dfs_req, dfs_req_size, CIFSMaxBufSize,
 				(char **)&dfs_rsp, &dfs_rsp_size);
-	} while (rc == -EAGAIN);
+		if (!is_retryable_error(rc))
+			break;
+		usleep_range(512, 2048);
+	} while (++retry_count < 5);
 
 	if (rc) {
-		if ((rc != -ENOENT) && (rc != -EOPNOTSUPP))
-			cifs_tcon_dbg(VFS, "ioctl error in %s rc=%d\n", __func__, rc);
+		if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP)
+			cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc);
 		goto out;
 	}
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d2ecb2e..8b36703 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -142,7 +142,7 @@
 smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 	       struct TCP_Server_Info *server)
 {
-	int rc;
+	int rc = 0;
 	struct nls_table *nls_codepage;
 	struct cifs_ses *ses;
 	int retries;
@@ -155,7 +155,11 @@
 	if (tcon == NULL)
 		return 0;
 
-	if (smb2_command == SMB2_TREE_CONNECT)
+	/*
+	 * Need to also skip SMB2_IOCTL because it is used for checking nested dfs links in
+	 * cifs_tree_connect().
+	 */
+	if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL)
 		return 0;
 
 	if (tcon->tidStatus == CifsExiting) {
@@ -253,7 +257,7 @@
 	/*
 	 * If we are reconnecting an extra channel, bind
 	 */
-	if (server->is_channel) {
+	if (CIFS_SERVER_IS_CHAN(server)) {
 		ses->binding = true;
 		ses->binding_chan = cifs_ses_find_chan(ses, server);
 	}
@@ -1456,7 +1460,7 @@
 	int rc;
 	struct cifs_ses *ses = sess_data->ses;
 	struct smb2_sess_setup_rsp *rsp = NULL;
-	char *ntlmssp_blob = NULL;
+	unsigned char *ntlmssp_blob = NULL;
 	bool use_spnego = false; /* else use raw ntlmssp */
 	u16 blob_length = 0;
 
@@ -1475,22 +1479,17 @@
 	if (rc)
 		goto out_err;
 
-	ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
-			       GFP_KERNEL);
-	if (ntlmssp_blob == NULL) {
-		rc = -ENOMEM;
-		goto out;
-	}
+	rc = build_ntlmssp_negotiate_blob(&ntlmssp_blob,
+					  &blob_length, ses,
+					  sess_data->nls_cp);
+	if (rc)
+		goto out_err;
 
-	build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
 	if (use_spnego) {
 		/* BB eventually need to add this */
 		cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
 		rc = -EOPNOTSUPP;
 		goto out;
-	} else {
-		blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
-		/* with raw NTLMSSP we don't encapsulate in SPNEGO */
 	}
 	sess_data->iov[1].iov_base = ntlmssp_blob;
 	sess_data->iov[1].iov_len = blob_length;
@@ -1841,7 +1840,7 @@
 	cifs_small_buf_release(req);
 	rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
 	trace_smb3_tcon(xid, tcon->tid, ses->Suid, tree, rc);
-	if (rc != 0) {
+	if ((rc != 0) || (rsp == NULL)) {
 		cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE);
 		tcon->need_reconnect = true;
 		goto tcon_error_exit;
@@ -2669,7 +2668,18 @@
 		goto err_free_rsp_buf;
 	}
 
+	/*
+	 * Although unlikely to be possible for rsp to be null and rc not set,
+	 * adding check below is slightly safer long term (and quiets Coverity
+	 * warning)
+	 */
 	rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
+	if (rsp == NULL) {
+		rc = -EIO;
+		kfree(pc_buf);
+		goto err_free_req;
+	}
+
 	trace_smb3_posix_mkdir_done(xid, le64_to_cpu(rsp->PersistentFileId),
 				    tcon->tid,
 				    ses->Suid, CREATE_NOT_FILE,
@@ -2942,7 +2952,9 @@
 			tcon->need_reconnect = true;
 		}
 		goto creat_exit;
-	} else
+	} else if (rsp == NULL) /* unlikely to happen, but safer to check */
+		goto creat_exit;
+	else
 		trace_smb3_open_done(xid, le64_to_cpu(rsp->PersistentFileId),
 				     tcon->tid,
 				     ses->Suid, oparms->create_options,
@@ -3163,6 +3175,16 @@
 	if ((plen == NULL) || (out_data == NULL))
 		goto ioctl_exit;
 
+	/*
+	 * Although unlikely to be possible for rsp to be null and rc not set,
+	 * adding check below is slightly safer long term (and quiets Coverity
+	 * warning)
+	 */
+	if (rsp == NULL) {
+		rc = -EIO;
+		goto ioctl_exit;
+	}
+
 	*plen = le32_to_cpu(rsp->OutputCount);
 
 	/* We check for obvious errors in the output buffer length and offset */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b737932..61ea3d3 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -1044,14 +1044,17 @@
 	if (!ses)
 		return NULL;
 
+	spin_lock(&ses->chan_lock);
 	if (!ses->binding) {
 		/* round robin */
 		if (ses->chan_count > 1) {
 			index = (uint)atomic_inc_return(&ses->chan_seq);
 			index %= ses->chan_count;
 		}
+		spin_unlock(&ses->chan_lock);
 		return ses->chans[index].server;
 	} else {
+		spin_unlock(&ses->chan_lock);
 		return cifs_ses_server(ses);
 	}
 }
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index 84da2c2..ec9a1d7 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -150,7 +150,7 @@
 	 * however in order to avoid some race conditions, add a
 	 * DBG_BUGON to observe this in advance.
 	 */
-	DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
+	DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
 
 	/* last refcount should be connected with its managed pslot.  */
 	erofs_workgroup_unfreeze(grp, 0);
@@ -165,15 +165,19 @@
 	unsigned int freed = 0;
 	unsigned long index;
 
+	xa_lock(&sbi->managed_pslots);
 	xa_for_each(&sbi->managed_pslots, index, grp) {
 		/* try to shrink each valid workgroup */
 		if (!erofs_try_to_release_workgroup(sbi, grp))
 			continue;
+		xa_unlock(&sbi->managed_pslots);
 
 		++freed;
 		if (!--nr_shrink)
-			break;
+			return freed;
+		xa_lock(&sbi->managed_pslots);
 	}
+	xa_unlock(&sbi->managed_pslots);
 	return freed;
 }
 
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index bcb1b91..9a249bf 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -96,16 +96,9 @@
 	DBG_BUGON(1);
 }
 
-/*
- * a compressed_pages[] placeholder in order to avoid
- * being filled with file pages for in-place decompression.
- */
-#define PAGE_UNALLOCATED     ((void *)0x5F0E4B1D)
-
 /* how to allocate cached pages for a pcluster */
 enum z_erofs_cache_alloctype {
 	DONTALLOC,	/* don't allocate any cached pages */
-	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
 	/*
 	 * try to use cached I/O if page allocation succeeds or fallback
 	 * to in-place I/O instead to avoid any direct reclaim.
@@ -267,10 +260,6 @@
 			/* I/O is needed, no possible to decompress directly */
 			standalone = false;
 			switch (type) {
-			case DELAYEDALLOC:
-				t = tagptr_init(compressed_page_t,
-						PAGE_UNALLOCATED);
-				break;
 			case TRYALLOC:
 				newpage = erofs_allocpage(pagepool, gfp);
 				if (!newpage)
@@ -371,8 +360,8 @@
 
 /* callers must be with collection lock held */
 static int z_erofs_attach_page(struct z_erofs_collector *clt,
-			       struct page *page,
-			       enum z_erofs_page_type type)
+			       struct page *page, enum z_erofs_page_type type,
+			       bool pvec_safereuse)
 {
 	int ret;
 
@@ -382,9 +371,9 @@
 	    z_erofs_try_inplace_io(clt, page))
 		return 0;
 
-	ret = z_erofs_pagevec_enqueue(&clt->vector, page, type);
+	ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
+				      pvec_safereuse);
 	clt->cl->vcnt += (unsigned int)ret;
-
 	return ret ? 0 : -EAGAIN;
 }
 
@@ -727,7 +716,8 @@
 		tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
 
 retry:
-	err = z_erofs_attach_page(clt, page, page_type);
+	err = z_erofs_attach_page(clt, page, page_type,
+				  clt->mode >= COLLECT_PRIMARY_FOLLOWED);
 	/* should allocate an additional short-lived page for pagevec */
 	if (err == -EAGAIN) {
 		struct page *const newpage =
@@ -735,7 +725,7 @@
 
 		set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
 		err = z_erofs_attach_page(clt, newpage,
-					  Z_EROFS_PAGE_TYPE_EXCLUSIVE);
+					  Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
 		if (!err)
 			goto retry;
 	}
@@ -1089,15 +1079,6 @@
 	if (!page)
 		goto out_allocpage;
 
-	/*
-	 * the cached page has not been allocated and
-	 * an placeholder is out there, prepare it now.
-	 */
-	if (page == PAGE_UNALLOCATED) {
-		tocache = true;
-		goto out_allocpage;
-	}
-
 	/* process the target tagged pointer */
 	t = tagptr_init(compressed_page_t, page);
 	justfound = tagptr_unfold_tags(t);
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 879df53..4a69515 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -179,4 +179,3 @@
 #define Z_EROFS_VMAP_GLOBAL_PAGES	2048
 
 #endif
-
diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h
index dfd7fe0..b05464f 100644
--- a/fs/erofs/zpvec.h
+++ b/fs/erofs/zpvec.h
@@ -106,11 +106,18 @@
 
 static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
 					   struct page *page,
-					   enum z_erofs_page_type type)
+					   enum z_erofs_page_type type,
+					   bool pvec_safereuse)
 {
-	if (!ctor->next && type)
-		if (ctor->index + 1 == ctor->nr)
+	if (!ctor->next) {
+		/* some pages cannot be reused as pvec safely without I/O */
+		if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
+			type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
+
+		if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
+		    ctor->index + 1 == ctor->nr)
 			return false;
+	}
 
 	if (ctor->index >= ctor->nr)
 		z_erofs_pagevec_ctor_pagedown(ctor, false);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 83e9bc0..f1693d4 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -653,7 +653,7 @@
 		return PTR_ERR(inode);
 	}
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err) {
 		iput(inode);
 		goto err_out;
@@ -705,9 +705,6 @@
 	}
 
 #ifdef CONFIG_QUOTA
-	/* Needed for iput() to work correctly and not trash data */
-	sbi->sb->s_flags |= SB_ACTIVE;
-
 	/*
 	 * Turn on quotas which were not enabled for read-only mounts if
 	 * filesystem has quota feature, so that they are updated correctly.
@@ -1162,7 +1159,8 @@
 	if (!is_journalled_quota(sbi))
 		return false;
 
-	down_write(&sbi->quota_sem);
+	if (!down_write_trylock(&sbi->quota_sem))
+		return true;
 	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
 		ret = false;
 	} else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 20a083d..49121a2 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -336,8 +336,8 @@
 
 static int zstd_init_compress_ctx(struct compress_ctx *cc)
 {
-	ZSTD_parameters params;
-	ZSTD_CStream *stream;
+	zstd_parameters params;
+	zstd_cstream *stream;
 	void *workspace;
 	unsigned int workspace_size;
 	unsigned char level = F2FS_I(cc->inode)->i_compress_flag >>
@@ -346,17 +346,17 @@
 	if (!level)
 		level = F2FS_ZSTD_DEFAULT_CLEVEL;
 
-	params = ZSTD_getParams(level, cc->rlen, 0);
-	workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams);
+	params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen);
+	workspace_size = zstd_cstream_workspace_bound(&params.cParams);
 
 	workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
 					workspace_size, GFP_NOFS);
 	if (!workspace)
 		return -ENOMEM;
 
-	stream = ZSTD_initCStream(params, 0, workspace, workspace_size);
+	stream = zstd_init_cstream(&params, 0, workspace, workspace_size);
 	if (!stream) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n",
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n",
 				KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
 				__func__);
 		kvfree(workspace);
@@ -379,9 +379,9 @@
 
 static int zstd_compress_pages(struct compress_ctx *cc)
 {
-	ZSTD_CStream *stream = cc->private2;
-	ZSTD_inBuffer inbuf;
-	ZSTD_outBuffer outbuf;
+	zstd_cstream *stream = cc->private2;
+	zstd_in_buffer inbuf;
+	zstd_out_buffer outbuf;
 	int src_size = cc->rlen;
 	int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE;
 	int ret;
@@ -394,19 +394,19 @@
 	outbuf.dst = cc->cbuf->cdata;
 	outbuf.size = dst_size;
 
-	ret = ZSTD_compressStream(stream, &outbuf, &inbuf);
-	if (ZSTD_isError(ret)) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+	ret = zstd_compress_stream(stream, &outbuf, &inbuf);
+	if (zstd_is_error(ret)) {
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n",
 				KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
-				__func__, ZSTD_getErrorCode(ret));
+				__func__, zstd_get_error_code(ret));
 		return -EIO;
 	}
 
-	ret = ZSTD_endStream(stream, &outbuf);
-	if (ZSTD_isError(ret)) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n",
+	ret = zstd_end_stream(stream, &outbuf);
+	if (zstd_is_error(ret)) {
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n",
 				KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
-				__func__, ZSTD_getErrorCode(ret));
+				__func__, zstd_get_error_code(ret));
 		return -EIO;
 	}
 
@@ -423,22 +423,22 @@
 
 static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
 {
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	void *workspace;
 	unsigned int workspace_size;
 	unsigned int max_window_size =
 			MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size);
 
-	workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size);
+	workspace_size = zstd_dstream_workspace_bound(max_window_size);
 
 	workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
 					workspace_size, GFP_NOFS);
 	if (!workspace)
 		return -ENOMEM;
 
-	stream = ZSTD_initDStream(max_window_size, workspace, workspace_size);
+	stream = zstd_init_dstream(max_window_size, workspace, workspace_size);
 	if (!stream) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n",
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n",
 				KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
 				__func__);
 		kvfree(workspace);
@@ -460,9 +460,9 @@
 
 static int zstd_decompress_pages(struct decompress_io_ctx *dic)
 {
-	ZSTD_DStream *stream = dic->private2;
-	ZSTD_inBuffer inbuf;
-	ZSTD_outBuffer outbuf;
+	zstd_dstream *stream = dic->private2;
+	zstd_in_buffer inbuf;
+	zstd_out_buffer outbuf;
 	int ret;
 
 	inbuf.pos = 0;
@@ -473,11 +473,11 @@
 	outbuf.dst = dic->rbuf;
 	outbuf.size = dic->rlen;
 
-	ret = ZSTD_decompressStream(stream, &outbuf, &inbuf);
-	if (ZSTD_isError(ret)) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+	ret = zstd_decompress_stream(stream, &outbuf, &inbuf);
+	if (zstd_is_error(ret)) {
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n",
 				KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
-				__func__, ZSTD_getErrorCode(ret));
+				__func__, zstd_get_error_code(ret));
 		return -EIO;
 	}
 
@@ -882,6 +882,25 @@
 	return is_page_in_cluster(cc, index);
 }
 
+bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
+				int index, int nr_pages)
+{
+	unsigned long pgidx;
+	int i;
+
+	if (nr_pages - index < cc->cluster_size)
+		return false;
+
+	pgidx = pvec->pages[index]->index;
+
+	for (i = 1; i < cc->cluster_size; i++) {
+		if (pvec->pages[index + i]->index != pgidx + i)
+			return false;
+	}
+
+	return true;
+}
+
 static bool cluster_has_invalid_data(struct compress_ctx *cc)
 {
 	loff_t i_size = i_size_read(cc->inode);
@@ -1531,6 +1550,7 @@
 	if (cluster_may_compress(cc)) {
 		err = f2fs_compress_pages(cc);
 		if (err == -EAGAIN) {
+			add_compr_block_stat(cc->inode, cc->cluster_size);
 			goto write;
 		} else if (err) {
 			f2fs_put_rpages_wbc(cc, wbc, true, 1);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f4fd6c2..9f754aa 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1465,10 +1465,15 @@
 	struct extent_info ei = {0, };
 	block_t blkaddr;
 	unsigned int start_pgofs;
+	int bidx = 0;
 
 	if (!maxblocks)
 		return 0;
 
+	map->m_bdev = inode->i_sb->s_bdev;
+	map->m_multidev_dio =
+		f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
+
 	map->m_len = 0;
 	map->m_flags = 0;
 
@@ -1491,6 +1496,21 @@
 		if (flag == F2FS_GET_BLOCK_DIO)
 			f2fs_wait_on_block_writeback_range(inode,
 						map->m_pblk, map->m_len);
+
+		if (map->m_multidev_dio) {
+			block_t blk_addr = map->m_pblk;
+
+			bidx = f2fs_target_device_index(sbi, map->m_pblk);
+
+			map->m_bdev = FDEV(bidx).bdev;
+			map->m_pblk -= FDEV(bidx).start_blk;
+			map->m_len = min(map->m_len,
+				FDEV(bidx).end_blk + 1 - map->m_pblk);
+
+			if (map->m_may_create)
+				f2fs_update_device_state(sbi, inode->i_ino,
+							blk_addr, map->m_len);
+		}
 		goto out;
 	}
 
@@ -1609,6 +1629,9 @@
 	if (flag == F2FS_GET_BLOCK_PRE_AIO)
 		goto skip;
 
+	if (map->m_multidev_dio)
+		bidx = f2fs_target_device_index(sbi, blkaddr);
+
 	if (map->m_len == 0) {
 		/* preallocated unwritten block should be mapped for fiemap. */
 		if (blkaddr == NEW_ADDR)
@@ -1617,10 +1640,15 @@
 
 		map->m_pblk = blkaddr;
 		map->m_len = 1;
+
+		if (map->m_multidev_dio)
+			map->m_bdev = FDEV(bidx).bdev;
 	} else if ((map->m_pblk != NEW_ADDR &&
 			blkaddr == (map->m_pblk + ofs)) ||
 			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
 			flag == F2FS_GET_BLOCK_PRE_DIO) {
+		if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
+			goto sync_out;
 		ofs++;
 		map->m_len++;
 	} else {
@@ -1673,10 +1701,32 @@
 
 sync_out:
 
-	/* for hardware encryption, but to avoid potential issue in future */
-	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
+		/*
+		 * for hardware encryption, but to avoid potential issue
+		 * in future
+		 */
 		f2fs_wait_on_block_writeback_range(inode,
 						map->m_pblk, map->m_len);
+		invalidate_mapping_pages(META_MAPPING(sbi),
+						map->m_pblk, map->m_pblk);
+
+		if (map->m_multidev_dio) {
+			block_t blk_addr = map->m_pblk;
+
+			bidx = f2fs_target_device_index(sbi, map->m_pblk);
+
+			map->m_bdev = FDEV(bidx).bdev;
+			map->m_pblk -= FDEV(bidx).start_blk;
+
+			if (map->m_may_create)
+				f2fs_update_device_state(sbi, inode->i_ino,
+							blk_addr, map->m_len);
+
+			f2fs_bug_on(sbi, blk_addr + map->m_len >
+						FDEV(bidx).end_blk + 1);
+		}
+	}
 
 	if (flag == F2FS_GET_BLOCK_PRECACHE) {
 		if (map->m_flags & F2FS_MAP_MAPPED) {
@@ -1696,7 +1746,7 @@
 		f2fs_balance_fs(sbi, dn.node_changed);
 	}
 out:
-	trace_f2fs_map_blocks(inode, map, err);
+	trace_f2fs_map_blocks(inode, map, create, flag, err);
 	return err;
 }
 
@@ -1755,6 +1805,9 @@
 		map_bh(bh, inode->i_sb, map.m_pblk);
 		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
 		bh->b_size = blks_to_bytes(inode, map.m_len);
+
+		if (map.m_multidev_dio)
+			bh->b_bdev = map.m_bdev;
 	}
 	return err;
 }
@@ -2989,6 +3042,10 @@
 			need_readd = false;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 			if (f2fs_compressed_file(inode)) {
+				void *fsdata = NULL;
+				struct page *pagep;
+				int ret2;
+
 				ret = f2fs_init_compress_ctx(&cc);
 				if (ret) {
 					done = 1;
@@ -3007,27 +3064,23 @@
 				if (unlikely(f2fs_cp_error(sbi)))
 					goto lock_page;
 
-				if (f2fs_cluster_is_empty(&cc)) {
-					void *fsdata = NULL;
-					struct page *pagep;
-					int ret2;
+				if (!f2fs_cluster_is_empty(&cc))
+					goto lock_page;
 
-					ret2 = f2fs_prepare_compress_overwrite(
+				ret2 = f2fs_prepare_compress_overwrite(
 							inode, &pagep,
 							page->index, &fsdata);
-					if (ret2 < 0) {
-						ret = ret2;
-						done = 1;
-						break;
-					} else if (ret2 &&
-						!f2fs_compress_write_end(inode,
-								fsdata, page->index,
-								1)) {
-						retry = 1;
-						break;
-					}
-				} else {
-					goto lock_page;
+				if (ret2 < 0) {
+					ret = ret2;
+					done = 1;
+					break;
+				} else if (ret2 &&
+					(!f2fs_compress_write_end(inode,
+						fsdata, page->index, 1) ||
+					 !f2fs_all_cluster_page_loaded(&cc,
+						&pvec, i, nr_pages))) {
+					retry = 1;
+					break;
 				}
 			}
 #endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b339ae8..ce9fc9f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -55,6 +55,7 @@
 	FAULT_DISCARD,
 	FAULT_WRITE_IO,
 	FAULT_SLAB_ALLOC,
+	FAULT_DQUOT_INIT,
 	FAULT_MAX,
 };
 
@@ -561,6 +562,9 @@
 
 #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
 
+/* dirty segments threshold for triggering CP */
+#define DEFAULT_DIRTY_THRESHOLD		4
+
 /* for in-memory extent cache entry */
 #define F2FS_MIN_EXTENT_LEN	64	/* minimum extent length */
 
@@ -617,6 +621,7 @@
 				F2FS_MAP_UNWRITTEN)
 
 struct f2fs_map_blocks {
+	struct block_device *m_bdev;	/* for multi-device dio */
 	block_t m_pblk;
 	block_t m_lblk;
 	unsigned int m_len;
@@ -625,6 +630,7 @@
 	pgoff_t *m_next_extent;		/* point to next possible extent */
 	int m_seg_type;
 	bool m_may_create;		/* indicate it is from write path */
+	bool m_multidev_dio;		/* indicate it allows multi-device dio */
 };
 
 /* for flag in get_data_block */
@@ -1284,8 +1290,10 @@
 };
 
 enum {
-	FS_MODE_ADAPTIVE,	/* use both lfs/ssr allocation */
-	FS_MODE_LFS,		/* use lfs allocation only */
+	FS_MODE_ADAPTIVE,		/* use both lfs/ssr allocation */
+	FS_MODE_LFS,			/* use lfs allocation only */
+	FS_MODE_FRAGMENT_SEG,		/* segment fragmentation mode */
+	FS_MODE_FRAGMENT_BLK,		/* block fragmentation mode */
 };
 
 enum {
@@ -1728,12 +1736,15 @@
 
 	/* For shrinker support */
 	struct list_head s_list;
+	struct mutex umount_mutex;
+	unsigned int shrinker_run_no;
+
+	/* For multi devices */
 	int s_ndevs;				/* number of devices */
 	struct f2fs_dev_info *devs;		/* for device list */
 	unsigned int dirty_device;		/* for checkpoint data flush */
 	spinlock_t dev_lock;			/* protect dirty_device */
-	struct mutex umount_mutex;
-	unsigned int shrinker_run_no;
+	bool aligned_blksize;			/* all devices has the same logical blksize */
 
 	/* For write statistics */
 	u64 sectors_written_start;
@@ -1756,6 +1767,9 @@
 
 	unsigned long seq_file_ra_mul;		/* multiplier for ra_pages of seq. files in fadvise */
 
+	int max_fragment_chunk;			/* max chunk size for block fragmentation mode */
+	int max_fragment_hole;			/* max hole size for block fragmentation mode */
+
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 	struct kmem_cache *page_array_slab;	/* page array entry */
 	unsigned int page_array_slab_size;	/* default page array slab size */
@@ -3363,6 +3377,7 @@
  */
 int f2fs_inode_dirtied(struct inode *inode, bool sync);
 void f2fs_inode_synced(struct inode *inode);
+int f2fs_dquot_initialize(struct inode *inode);
 int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
 int f2fs_quota_sync(struct super_block *sb, int type);
 loff_t max_file_blocks(struct inode *inode);
@@ -3492,6 +3507,8 @@
 			block_t old_blkaddr, block_t *new_blkaddr,
 			struct f2fs_summary *sum, int type,
 			struct f2fs_io_info *fio);
+void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
+					block_t blkaddr, unsigned int blkcnt);
 void f2fs_wait_on_page_writeback(struct page *page,
 			enum page_type type, bool ordered, bool locked);
 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -3516,6 +3533,16 @@
 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
 			unsigned int segno);
 
+#define DEF_FRAGMENT_SIZE	4
+#define MIN_FRAGMENT_SIZE	1
+#define MAX_FRAGMENT_SIZE	512
+
+static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi)
+{
+	return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG ||
+		F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK;
+}
+
 /*
  * checkpoint.c
  */
@@ -4027,6 +4054,8 @@
 							block_t blkaddr);
 bool f2fs_cluster_is_empty(struct compress_ctx *cc);
 bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
+bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
+				int index, int nr_pages);
 bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
 void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
 int f2fs_write_multi_pages(struct compress_ctx *cc,
@@ -4152,8 +4181,7 @@
 
 	if (!f2fs_compressed_file(inode))
 		return true;
-	if (S_ISREG(inode->i_mode) &&
-		(get_dirty_pages(inode) || atomic_read(&fi->i_compr_blocks)))
+	if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
 		return false;
 
 	fi->i_flags &= ~F2FS_COMPR_FL;
@@ -4302,6 +4330,16 @@
 	return align & blocksize_mask;
 }
 
+static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
+								int flag)
+{
+	if (!f2fs_is_multi_device(sbi))
+		return false;
+	if (flag != F2FS_GET_BLOCK_DIO)
+		return false;
+	return sbi->aligned_blksize;
+}
+
 static inline bool f2fs_force_buffered_io(struct inode *inode,
 				struct kiocb *iocb, struct iov_iter *iter)
 {
@@ -4310,7 +4348,9 @@
 
 	if (f2fs_post_read_required(inode))
 		return true;
-	if (f2fs_is_multi_device(sbi))
+
+	/* disallow direct IO if any of devices has unaligned blksize */
+	if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
 		return true;
 	/*
 	 * for blkzoned device, fallback direct IO to buffered IO, so
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index eb971e1..92ec269 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -786,7 +786,7 @@
 		return -EIO;
 	}
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		return err;
 
@@ -916,7 +916,7 @@
 		return err;
 
 	if (is_quota_modification(inode, attr)) {
-		err = dquot_initialize(inode);
+		err = f2fs_dquot_initialize(inode);
 		if (err)
 			return err;
 	}
@@ -3020,7 +3020,7 @@
 	}
 	f2fs_put_page(ipage, 1);
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		return err;
 
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 77391e3..a946ce0 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
+#include <linux/random.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -257,7 +258,9 @@
 		p->max_search = sbi->max_victim_search;
 
 	/* let's select beginning hot/small space first in no_heap mode*/
-	if (test_opt(sbi, NOHEAP) &&
+	if (f2fs_need_rand_seg(sbi))
+		p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
+	else if (test_opt(sbi, NOHEAP) &&
 		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
 		p->offset = 0;
 	else
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 56a20d5..ea08f0d 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -192,7 +192,7 @@
 			f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
 		return 0;
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		return err;
 
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9141147..0f8b2df 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -527,7 +527,7 @@
 		inode->i_op = &f2fs_dir_inode_operations;
 		inode->i_fop = &f2fs_dir_operations;
 		inode->i_mapping->a_ops = &f2fs_dblock_aops;
-		inode_nohighmem(inode);
+		mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 	} else if (S_ISLNK(inode->i_mode)) {
 		if (file_is_encrypt(inode))
 			inode->i_op = &f2fs_encrypted_symlink_inode_operations;
@@ -754,7 +754,7 @@
 	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err) {
 		err = 0;
 		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 9c528e5..a728a0a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -74,7 +74,7 @@
 	if (err)
 		goto fail_drop;
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		goto fail_drop;
 
@@ -345,7 +345,7 @@
 	if (!f2fs_is_checkpoint_ready(sbi))
 		return -ENOSPC;
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		return err;
 
@@ -404,7 +404,7 @@
 			F2FS_I(old_dentry->d_inode)->i_projid)))
 		return -EXDEV;
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		return err;
 
@@ -460,7 +460,7 @@
 		return 0;
 	}
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		return err;
 
@@ -598,10 +598,10 @@
 		goto fail;
 	}
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		goto fail;
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		goto fail;
 
@@ -675,7 +675,7 @@
 	if (err)
 		return err;
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		return err;
 
@@ -746,7 +746,7 @@
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		return err;
 
@@ -757,7 +757,7 @@
 	inode->i_op = &f2fs_dir_inode_operations;
 	inode->i_fop = &f2fs_dir_operations;
 	inode->i_mapping->a_ops = &f2fs_dblock_aops;
-	inode_nohighmem(inode);
+	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 
 	set_inode_flag(inode, FI_INC_LINK);
 	f2fs_lock_op(sbi);
@@ -803,7 +803,7 @@
 	if (!f2fs_is_checkpoint_ready(sbi))
 		return -ENOSPC;
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		return err;
 
@@ -841,7 +841,7 @@
 	struct inode *inode;
 	int err;
 
-	err = dquot_initialize(dir);
+	err = f2fs_dquot_initialize(dir);
 	if (err)
 		return err;
 
@@ -965,16 +965,16 @@
 			return err;
 	}
 
-	err = dquot_initialize(old_dir);
+	err = f2fs_dquot_initialize(old_dir);
 	if (err)
 		goto out;
 
-	err = dquot_initialize(new_dir);
+	err = f2fs_dquot_initialize(new_dir);
 	if (err)
 		goto out;
 
 	if (new_inode) {
-		err = dquot_initialize(new_inode);
+		err = f2fs_dquot_initialize(new_inode);
 		if (err)
 			goto out;
 	}
@@ -1138,11 +1138,11 @@
 			F2FS_I(new_dentry->d_inode)->i_projid)))
 		return -EXDEV;
 
-	err = dquot_initialize(old_dir);
+	err = f2fs_dquot_initialize(old_dir);
 	if (err)
 		goto out;
 
-	err = dquot_initialize(new_dir);
+	err = f2fs_dquot_initialize(new_dir);
 	if (err)
 		goto out;
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e863136..556fcd8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1443,6 +1443,7 @@
 			  nid, nid_of_node(page), ino_of_node(page),
 			  ofs_of_node(page), cpver_of_node(page),
 			  next_blkaddr_of_node(page));
+		set_sbi_flag(sbi, SBI_NEED_FSCK);
 		err = -EINVAL;
 out_err:
 		ClearPageUptodate(page);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index ff14a6e..18b98cf 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -138,11 +138,6 @@
 	return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
 }
 
-static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
-{
-	return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
-}
-
 enum mem_type {
 	FREE_NIDS,	/* indicates the free nid list */
 	NAT_ENTRIES,	/* indicates the cached nat entry */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 0465551..6a1b466 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -81,7 +81,7 @@
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		goto err_out;
 
@@ -203,7 +203,7 @@
 			goto out_put;
 		}
 
-		err = dquot_initialize(einode);
+		err = f2fs_dquot_initialize(einode);
 		if (err) {
 			iput(einode);
 			goto out_put;
@@ -508,7 +508,7 @@
 		if (IS_ERR(inode))
 			return PTR_ERR(inode);
 
-		ret = dquot_initialize(inode);
+		ret = f2fs_dquot_initialize(inode);
 		if (ret) {
 			iput(inode);
 			return ret;
@@ -787,8 +787,6 @@
 	}
 
 #ifdef CONFIG_QUOTA
-	/* Needed for iput() to work correctly and not trash data */
-	sbi->sb->s_flags |= SB_ACTIVE;
 	/* Turn on quotas so that they are updated correctly */
 	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
 #endif
@@ -816,10 +814,8 @@
 	err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
 	if (!err)
 		f2fs_bug_on(sbi, !list_empty(&inode_list));
-	else {
-		/* restore s_flags to let iput() trash data */
-		sbi->sb->s_flags = s_flags;
-	}
+	else
+		f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
 skip:
 	fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a135d22..df9ed75 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -15,6 +15,7 @@
 #include <linux/timer.h>
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
+#include <linux/random.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -529,6 +530,25 @@
 	}
 }
 
+static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
+{
+	int factor = rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
+	unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
+	unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
+	unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
+	unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
+	unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
+	unsigned int threshold = sbi->blocks_per_seg * factor *
+					DEFAULT_DIRTY_THRESHOLD;
+	unsigned int global_threshold = threshold * 3 / 2;
+
+	if (dents >= threshold || qdata >= threshold ||
+		nodes >= threshold || meta >= threshold ||
+		imeta >= threshold)
+		return true;
+	return dents + qdata + nodes + meta + imeta >  global_threshold;
+}
+
 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
 {
 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -547,8 +567,8 @@
 	else
 		f2fs_build_free_nids(sbi, false, false);
 
-	if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) ||
-		excess_prefree_segs(sbi))
+	if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
+		excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
 		goto do_sync;
 
 	/* there is background inflight IO or foreground operation recently */
@@ -561,7 +581,7 @@
 		goto do_sync;
 
 	/* checkpoint is the only way to shrink partial cached entries */
-	if (f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
+	if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
 		f2fs_available_free_memory(sbi, INO_ENTRIES))
 		return;
 
@@ -2630,6 +2650,8 @@
 	unsigned short seg_type = curseg->seg_type;
 
 	sanity_check_seg_type(sbi, seg_type);
+	if (f2fs_need_rand_seg(sbi))
+		return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
 
 	/* if segs_per_sec is large than 1, we need to keep original policy. */
 	if (__is_large_section(sbi))
@@ -2681,6 +2703,9 @@
 	curseg->next_segno = segno;
 	reset_curseg(sbi, type, 1);
 	curseg->alloc_type = LFS;
+	if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
+		curseg->fragment_remained_chunk =
+				prandom_u32() % sbi->max_fragment_chunk + 1;
 }
 
 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2707,12 +2732,22 @@
 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
 				struct curseg_info *seg)
 {
-	if (seg->alloc_type == SSR)
+	if (seg->alloc_type == SSR) {
 		seg->next_blkoff =
 			__next_free_blkoff(sbi, seg->segno,
 						seg->next_blkoff + 1);
-	else
+	} else {
 		seg->next_blkoff++;
+		if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
+			/* To allocate block chunks in different sizes, use random number */
+			if (--seg->fragment_remained_chunk <= 0) {
+				seg->fragment_remained_chunk =
+				   prandom_u32() % sbi->max_fragment_chunk + 1;
+				seg->next_blkoff +=
+				   prandom_u32() % sbi->max_fragment_hole + 1;
+			}
+		}
+	}
 }
 
 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
@@ -3485,24 +3520,30 @@
 	up_read(&SM_I(sbi)->curseg_lock);
 }
 
-static void update_device_state(struct f2fs_io_info *fio)
+void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
+					block_t blkaddr, unsigned int blkcnt)
 {
-	struct f2fs_sb_info *sbi = fio->sbi;
-	unsigned int devidx;
-
 	if (!f2fs_is_multi_device(sbi))
 		return;
 
-	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+	while (1) {
+		unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
+		unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
 
-	/* update device state for fsync */
-	f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+		/* update device state for fsync */
+		f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
 
-	/* update device state for checkpoint */
-	if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
-		spin_lock(&sbi->dev_lock);
-		f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
-		spin_unlock(&sbi->dev_lock);
+		/* update device state for checkpoint */
+		if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+			spin_lock(&sbi->dev_lock);
+			f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+			spin_unlock(&sbi->dev_lock);
+		}
+
+		if (blkcnt <= blks)
+			break;
+		blkcnt -= blks;
+		blkaddr += blks;
 	}
 }
 
@@ -3529,7 +3570,7 @@
 		goto reallocate;
 	}
 
-	update_device_state(fio);
+	f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
 
 	if (keep_order)
 		up_read(&fio->sbi->io_order_lock);
@@ -3611,6 +3652,9 @@
 		goto drop_bio;
 	}
 
+	invalidate_mapping_pages(META_MAPPING(sbi),
+				fio->new_blkaddr, fio->new_blkaddr);
+
 	stat_inc_inplace_blocks(fio->sbi);
 
 	if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
@@ -3618,7 +3662,8 @@
 	else
 		err = f2fs_submit_page_bio(fio);
 	if (!err) {
-		update_device_state(fio);
+		f2fs_update_device_state(fio->sbi, fio->ino,
+						fio->new_blkaddr, 1);
 		f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
 	}
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 89fff25..46fde9f 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -314,6 +314,7 @@
 	unsigned short next_blkoff;		/* next block offset to write */
 	unsigned int zone;			/* current zone number */
 	unsigned int next_segno;		/* preallocated segment */
+	int fragment_remained_chunk;		/* remained block size in a chunk for block fragmentation mode */
 	bool inited;				/* indicate inmem log is inited */
 };
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index cf049a0..040b6d0 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -58,6 +58,7 @@
 	[FAULT_DISCARD]		= "discard error",
 	[FAULT_WRITE_IO]	= "write IO error",
 	[FAULT_SLAB_ALLOC]	= "slab alloc",
+	[FAULT_DQUOT_INIT]	= "dquot initialize",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -592,7 +593,7 @@
 	if (kstrtouint(str + 1, 10, &level))
 		return -EINVAL;
 
-	if (!level || level > ZSTD_maxCLevel()) {
+	if (!level || level > zstd_max_clevel()) {
 		f2fs_info(sbi, "invalid zstd compress level: %d", level);
 		return -EINVAL;
 	}
@@ -817,6 +818,10 @@
 				F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
 			} else if (!strcmp(name, "lfs")) {
 				F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
+			} else if (!strcmp(name, "fragment:segment")) {
+				F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG;
+			} else if (!strcmp(name, "fragment:block")) {
+				F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK;
 			} else {
 				kfree(name);
 				return -EINVAL;
@@ -1292,7 +1297,7 @@
 	/* Not pass down write hints if the number of active logs is lesser
 	 * than NR_CURSEG_PERSIST_TYPE.
 	 */
-	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
+	if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE)
 		F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
 
 	if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
@@ -1896,6 +1901,10 @@
 		seq_puts(seq, "adaptive");
 	else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS)
 		seq_puts(seq, "lfs");
+	else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG)
+		seq_puts(seq, "fragment:segment");
+	else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
+		seq_puts(seq, "fragment:block");
 	seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
 	if (test_opt(sbi, RESERVE_ROOT))
 		seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
@@ -2491,6 +2500,16 @@
 	return len - towrite;
 }
 
+int f2fs_dquot_initialize(struct inode *inode)
+{
+	if (time_to_inject(F2FS_I_SB(inode), FAULT_DQUOT_INIT)) {
+		f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_DQUOT_INIT);
+		return -ESRCH;
+	}
+
+	return dquot_initialize(inode);
+}
+
 static struct dquot **f2fs_get_dquots(struct inode *inode)
 {
 	return F2FS_I(inode)->i_dquot;
@@ -2875,6 +2894,11 @@
 	.get_nextdqblk	= dquot_get_next_dqblk,
 };
 #else
+int f2fs_dquot_initialize(struct inode *inode)
+{
+	return 0;
+}
+
 int f2fs_quota_sync(struct super_block *sb, int type)
 {
 	return 0;
@@ -3486,7 +3510,7 @@
 		NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
 		f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
 			  cp_payload, nat_bits_blocks);
-		return -EFSCORRUPTED;
+		return 1;
 	}
 
 	if (unlikely(f2fs_cp_error(sbi))) {
@@ -3522,6 +3546,8 @@
 	sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
 	sbi->migration_granularity = sbi->segs_per_sec;
 	sbi->seq_file_ra_mul = MIN_RA_MUL;
+	sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
+	sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
 
 	sbi->dir_level = DEF_DIR_LEVEL;
 	sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3746,6 +3772,7 @@
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	unsigned int max_devices = MAX_DEVICES;
+	unsigned int logical_blksize;
 	int i;
 
 	/* Initialize single device information */
@@ -3766,6 +3793,9 @@
 	if (!sbi->devs)
 		return -ENOMEM;
 
+	logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
+	sbi->aligned_blksize = true;
+
 	for (i = 0; i < max_devices; i++) {
 
 		if (i > 0 && !RDEV(i).path[0])
@@ -3802,6 +3832,9 @@
 		/* to release errored devices */
 		sbi->s_ndevs = i + 1;
 
+		if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev))
+			sbi->aligned_blksize = false;
+
 #ifdef CONFIG_BLK_DEV_ZONED
 		if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
 				!f2fs_sb_has_blkzoned(sbi)) {
@@ -4351,6 +4384,8 @@
 free_stats:
 	f2fs_destroy_stats(sbi);
 free_nm:
+	/* stop discard thread before destroying node manager */
+	f2fs_stop_discard_thread(sbi);
 	f2fs_destroy_node_manager(sbi);
 free_sm:
 	f2fs_destroy_segment_manager(sbi);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index a32fe31..7d28924 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -196,7 +196,7 @@
 	struct super_block *sb = sbi->sb;
 
 	if (f2fs_sb_has_casefold(sbi))
-		return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
+		return sysfs_emit(buf, "%s (%d.%d.%d)\n",
 			sb->s_encoding->charset,
 			(sb->s_encoding->version >> 16) & 0xff,
 			(sb->s_encoding->version >> 8) & 0xff,
@@ -245,7 +245,7 @@
 static ssize_t main_blkaddr_show(struct f2fs_attr *a,
 				struct f2fs_sb_info *sbi, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
+	return sysfs_emit(buf, "%llu\n",
 			(unsigned long long)MAIN_BLKADDR(sbi));
 }
 
@@ -551,6 +551,22 @@
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "max_fragment_chunk")) {
+		if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
+			sbi->max_fragment_chunk = t;
+		else
+			return -EINVAL;
+		return count;
+	}
+
+	if (!strcmp(a->attr.name, "max_fragment_hole")) {
+		if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
+			sbi->max_fragment_hole = t;
+		else
+			return -EINVAL;
+		return count;
+	}
+
 	*ui = (unsigned int)t;
 
 	return count;
@@ -781,6 +797,8 @@
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -859,6 +877,8 @@
 	ATTR_LIST(seq_file_ra_mul),
 	ATTR_LIST(gc_segment_mode),
 	ATTR_LIST(gc_reclaimed_segments),
+	ATTR_LIST(max_fragment_chunk),
+	ATTR_LIST(max_fragment_hole),
 	NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 03549b5..fe5acdc 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -136,7 +136,7 @@
 	 * here and not rely on ->open() doing it.  This must be done before
 	 * evicting the inline data.
 	 */
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		return err;
 
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 1d2d29d..e348f33 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -773,7 +773,7 @@
 	if (!f2fs_is_checkpoint_ready(sbi))
 		return -ENOSPC;
 
-	err = dquot_initialize(inode);
+	err = f2fs_dquot_initialize(inode);
 	if (err)
 		return err;
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 79f7eda..cd54a52 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -847,17 +847,17 @@
 
 	replace_page_cache_page(oldpage, newpage);
 
+	get_page(newpage);
+
+	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
+		lru_cache_add(newpage);
+
 	/*
 	 * Release while we have extra ref on stolen page.  Otherwise
 	 * anon_pipe_buf_release() might think the page can be reused.
 	 */
 	pipe_buf_release(cs->pipe, buf);
 
-	get_page(newpage);
-
-	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
-		lru_cache_add(newpage);
-
 	err = 0;
 	spin_lock(&cs->req->waitq.lock);
 	if (test_bit(FR_ABORTED, &cs->req->flags))
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 7235d53..d671084 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -940,7 +940,7 @@
 		else if (height == ip->i_height)
 			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 		else
-			iomap->length = size - pos;
+			iomap->length = size - iomap->offset;
 	} else if (flags & IOMAP_WRITE) {
 		u64 alloc_size;
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index adafaaf..3e718cf 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -773,8 +773,8 @@
 					 size_t *prev_count,
 					 size_t *window_size)
 {
-	char __user *p = i->iov[0].iov_base + i->iov_offset;
 	size_t count = iov_iter_count(i);
+	char __user *p;
 	int pages = 1;
 
 	if (likely(!count))
@@ -787,14 +787,14 @@
 	if (*prev_count != count || !*window_size) {
 		int pages, nr_dirtied;
 
-		pages = min_t(int, BIO_MAX_VECS,
-			      DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE));
+		pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE));
 		nr_dirtied = max(current->nr_dirtied_pause -
 				 current->nr_dirtied, 1);
 		pages = min(pages, nr_dirtied);
 	}
 
 	*prev_count = count;
+	p = i->iov[0].iov_base + i->iov_offset;
 	*window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
 	return true;
 }
@@ -1013,6 +1013,7 @@
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_holder *statfs_gh = NULL;
 	size_t prev_count = 0, window_size = 0;
+	size_t orig_count = iov_iter_count(from);
 	size_t read = 0;
 	ssize_t ret;
 
@@ -1057,6 +1058,7 @@
 	if (inode == sdp->sd_rindex)
 		gfs2_glock_dq_uninit(statfs_gh);
 
+	from->count = orig_count - read;
 	if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
 		size_t leftover;
 
@@ -1064,6 +1066,7 @@
 		leftover = fault_in_iov_iter_readable(from, window_size);
 		gfs2_holder_disallow_demote(gh);
 		if (leftover != window_size) {
+			from->count = min(from->count, window_size - leftover);
 			if (!gfs2_holder_queued(gh)) {
 				if (read)
 					goto out_uninit;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 19f38ae..8dbd6fe 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -411,14 +411,14 @@
 static void demote_incompat_holders(struct gfs2_glock *gl,
 				    struct gfs2_holder *new_gh)
 {
-	struct gfs2_holder *gh;
+	struct gfs2_holder *gh, *tmp;
 
 	/*
 	 * Demote incompatible holders before we make ourselves eligible.
 	 * (This holder may or may not allow auto-demoting, but we don't want
 	 * to demote the new holder before it's even granted.)
 	 */
-	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
 		/*
 		 * Since holders are at the front of the list, we stop when we
 		 * find the first non-holder.
@@ -496,7 +496,7 @@
 	 * Since we unlock the lockref lock, we set a flag to indicate
 	 * instantiate is in progress.
 	 */
-	if (test_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
+	if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
 		wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG,
 			    TASK_UNINTERRUPTIBLE);
 		/*
@@ -509,14 +509,10 @@
 		goto again;
 	}
 
-	set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
-
 	ret = glops->go_instantiate(gh);
 	if (!ret)
 		clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags);
-	clear_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
-	smp_mb__after_atomic();
-	wake_up_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG);
+	clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
 	return ret;
 }
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 5b12137..0f93e8b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1402,13 +1402,6 @@
 	gfs2_ordered_del_inode(ip);
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
-	if (ip->i_gl) {
-		glock_clear_object(ip->i_gl, ip);
-		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
-		gfs2_glock_add_to_lru(ip->i_gl);
-		gfs2_glock_put_eventually(ip->i_gl);
-		ip->i_gl = NULL;
-	}
 	if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
 
@@ -1421,6 +1414,13 @@
 		gfs2_holder_uninit(&ip->i_iopen_gh);
 		gfs2_glock_put_eventually(gl);
 	}
+	if (ip->i_gl) {
+		glock_clear_object(ip->i_gl, ip);
+		wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
+		gfs2_glock_add_to_lru(ip->i_gl);
+		gfs2_glock_put_eventually(ip->i_gl);
+		ip->i_gl = NULL;
+	}
 }
 
 static struct inode *gfs2_alloc_inode(struct super_block *sb)
diff --git a/fs/inode.c b/fs/inode.c
index 3eba094..6b80a51 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -180,8 +180,6 @@
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
 	mapping->flags = 0;
-	if (sb->s_type->fs_flags & FS_THP_SUPPORT)
-		__set_bit(AS_THP_SUPPORT, &mapping->flags);
 	mapping->wb_err = 0;
 	atomic_set(&mapping->i_mmap_writable, 0);
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
diff --git a/fs/io-wq.c b/fs/io-wq.c
index afd955d..88202de 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -423,9 +423,10 @@
 	return work->flags >> IO_WQ_HASH_SHIFT;
 }
 
-static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
 {
 	struct io_wq *wq = wqe->wq;
+	bool ret = false;
 
 	spin_lock_irq(&wq->hash->wait.lock);
 	if (list_empty(&wqe->wait.entry)) {
@@ -433,9 +434,11 @@
 		if (!test_bit(hash, &wq->hash->map)) {
 			__set_current_state(TASK_RUNNING);
 			list_del_init(&wqe->wait.entry);
+			ret = true;
 		}
 	}
 	spin_unlock_irq(&wq->hash->wait.lock);
+	return ret;
 }
 
 static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
@@ -475,14 +478,21 @@
 	}
 
 	if (stall_hash != -1U) {
+		bool unstalled;
+
 		/*
 		 * Set this before dropping the lock to avoid racing with new
 		 * work being added and clearing the stalled bit.
 		 */
 		set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
 		raw_spin_unlock(&wqe->lock);
-		io_wait_on_hash(wqe, stall_hash);
+		unstalled = io_wait_on_hash(wqe, stall_hash);
 		raw_spin_lock(&wqe->lock);
+		if (unstalled) {
+			clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+			if (wq_has_sleeper(&wqe->wq->hash->wait))
+				wake_up(&wqe->wq->hash->wait);
+		}
 	}
 
 	return NULL;
@@ -564,8 +574,11 @@
 				io_wqe_enqueue(wqe, linked);
 
 			if (hash != -1U && !next_hashed) {
+				/* serialize hash clear with wake_up() */
+				spin_lock_irq(&wq->hash->wait.lock);
 				clear_bit(hash, &wq->hash->map);
 				clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+				spin_unlock_irq(&wq->hash->wait.lock);
 				if (wq_has_sleeper(&wq->hash->wait))
 					wake_up(&wq->hash->wait);
 				raw_spin_lock(&wqe->lock);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index b07196b..c4f2176 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1278,6 +1278,7 @@
 
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
 			  bool cancel_all)
+	__must_hold(&req->ctx->timeout_lock)
 {
 	struct io_kiocb *req;
 
@@ -1293,6 +1294,44 @@
 	return false;
 }
 
+static bool io_match_linked(struct io_kiocb *head)
+{
+	struct io_kiocb *req;
+
+	io_for_each_link(req, head) {
+		if (req->flags & REQ_F_INFLIGHT)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * As io_match_task() but protected against racing with linked timeouts.
+ * User must not hold timeout_lock.
+ */
+static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
+			       bool cancel_all)
+{
+	bool matched;
+
+	if (task && head->task != task)
+		return false;
+	if (cancel_all)
+		return true;
+
+	if (head->flags & REQ_F_LINK_TIMEOUT) {
+		struct io_ring_ctx *ctx = head->ctx;
+
+		/* protect against races with linked timeouts */
+		spin_lock_irq(&ctx->timeout_lock);
+		matched = io_match_linked(head);
+		spin_unlock_irq(&ctx->timeout_lock);
+	} else {
+		matched = io_match_linked(head);
+	}
+	return matched;
+}
+
 static inline bool req_has_async_data(struct io_kiocb *req)
 {
 	return req->flags & REQ_F_ASYNC_DATA;
@@ -1502,10 +1541,10 @@
 	if (req->flags & REQ_F_LINK_TIMEOUT) {
 		struct io_ring_ctx *ctx = req->ctx;
 
-		spin_lock(&ctx->completion_lock);
+		spin_lock_irq(&ctx->timeout_lock);
 		io_for_each_link(cur, req)
 			io_prep_async_work(cur);
-		spin_unlock(&ctx->completion_lock);
+		spin_unlock_irq(&ctx->timeout_lock);
 	} else {
 		io_for_each_link(cur, req)
 			io_prep_async_work(cur);
@@ -4327,6 +4366,7 @@
 		kfree(nxt);
 		if (++i == nbufs)
 			return i;
+		cond_resched();
 	}
 	i++;
 	kfree(buf);
@@ -5704,7 +5744,7 @@
 
 		list = &ctx->cancel_hash[i];
 		hlist_for_each_entry_safe(req, tmp, list, hash_node) {
-			if (io_match_task(req, tsk, cancel_all))
+			if (io_match_task_safe(req, tsk, cancel_all))
 				posted += io_poll_remove_one(req);
 		}
 	}
@@ -6156,6 +6196,9 @@
 	if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
 		return -EFAULT;
 
+	if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
+		return -EINVAL;
+
 	data->mode = io_translate_timeout_mode(flags);
 	hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
 
@@ -6880,10 +6923,11 @@
 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
 {
 	struct io_kiocb *prev = req->timeout.prev;
-	int ret;
+	int ret = -ENOENT;
 
 	if (prev) {
-		ret = io_try_cancel_userdata(req, prev->user_data);
+		if (!(req->task->flags & PF_EXITING))
+			ret = io_try_cancel_userdata(req, prev->user_data);
 		io_req_complete_post(req, ret ?: -ETIME, 0);
 		io_put_req(prev);
 	} else {
@@ -9255,10 +9299,8 @@
 	struct io_buffer *buf;
 	unsigned long index;
 
-	xa_for_each(&ctx->io_buffers, index, buf) {
+	xa_for_each(&ctx->io_buffers, index, buf)
 		__io_remove_buffers(ctx, buf, index, -1U);
-		cond_resched();
-	}
 }
 
 static void io_req_caches_free(struct io_ring_ctx *ctx)
@@ -9562,19 +9604,8 @@
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
 	struct io_task_cancel *cancel = data;
-	bool ret;
 
-	if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) {
-		struct io_ring_ctx *ctx = req->ctx;
-
-		/* protect against races with linked timeouts */
-		spin_lock(&ctx->completion_lock);
-		ret = io_match_task(req, cancel->task, cancel->all);
-		spin_unlock(&ctx->completion_lock);
-	} else {
-		ret = io_match_task(req, cancel->task, cancel->all);
-	}
-	return ret;
+	return io_match_task_safe(req, cancel->task, cancel->all);
 }
 
 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
@@ -9586,7 +9617,7 @@
 
 	spin_lock(&ctx->completion_lock);
 	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
-		if (io_match_task(de->req, task, cancel_all)) {
+		if (io_match_task_safe(de->req, task, cancel_all)) {
 			list_cut_position(&list, &ctx->defer_list, &de->list);
 			break;
 		}
@@ -9764,7 +9795,7 @@
 	}
 	if (wq) {
 		/*
-		 * Must be after io_uring_del_task_file() (removes nodes under
+		 * Must be after io_uring_del_tctx_node() (removes nodes under
 		 * uring_lock) to avoid race with io_uring_try_cancel_iowq().
 		 */
 		io_wq_put_and_exit(wq);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 1753c26..71a36ae 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -205,7 +205,16 @@
 	struct readahead_control *rac;
 };
 
-static loff_t iomap_read_inline_data(const struct iomap_iter *iter,
+/**
+ * iomap_read_inline_data - copy inline data into the page cache
+ * @iter: iteration structure
+ * @page: page to copy to
+ *
+ * Copy the inline data in @iter into @page and zero out the rest of the page.
+ * Only a single IOMAP_INLINE extent is allowed at the end of each file.
+ * Returns zero for success to complete the read, or the usual negative errno.
+ */
+static int iomap_read_inline_data(const struct iomap_iter *iter,
 		struct page *page)
 {
 	const struct iomap *iomap = iomap_iter_srcmap(iter);
@@ -214,7 +223,7 @@
 	void *addr;
 
 	if (PageUptodate(page))
-		return PAGE_SIZE - poff;
+		return 0;
 
 	if (WARN_ON_ONCE(size > PAGE_SIZE - poff))
 		return -EIO;
@@ -231,7 +240,7 @@
 	memset(addr + size, 0, PAGE_SIZE - poff - size);
 	kunmap_local(addr);
 	iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
-	return PAGE_SIZE - poff;
+	return 0;
 }
 
 static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
@@ -257,7 +266,7 @@
 	sector_t sector;
 
 	if (iomap->type == IOMAP_INLINE)
-		return min(iomap_read_inline_data(iter, page), length);
+		return iomap_read_inline_data(iter, page);
 
 	/* zero post-eof blocks as the page may be mapped */
 	iop = iomap_page_create(iter->inode, page);
@@ -370,6 +379,8 @@
 			ctx->cur_page_in_bio = false;
 		}
 		ret = iomap_readpage_iter(iter, ctx, done);
+		if (ret <= 0)
+			return ret;
 	}
 
 	return done;
@@ -580,15 +591,10 @@
 static int iomap_write_begin_inline(const struct iomap_iter *iter,
 		struct page *page)
 {
-	int ret;
-
 	/* needs more work for the tailpacking case; disable for now */
 	if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0))
 		return -EIO;
-	ret = iomap_read_inline_data(iter, page);
-	if (ret < 0)
-		return ret;
-	return 0;
+	return iomap_read_inline_data(iter, page);
 }
 
 static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
diff --git a/fs/ksmbd/Kconfig b/fs/ksmbd/Kconfig
index b83cbd7..e1fe177 100644
--- a/fs/ksmbd/Kconfig
+++ b/fs/ksmbd/Kconfig
@@ -6,7 +6,6 @@
 	select NLS
 	select NLS_UTF8
 	select CRYPTO
-	select CRYPTO_MD4
 	select CRYPTO_MD5
 	select CRYPTO_HMAC
 	select CRYPTO_ECB
@@ -19,6 +18,7 @@
 	select CRYPTO_GCM
 	select ASN1
 	select OID_REGISTRY
+	select CRC32
 	default n
 	help
 	  Choose Y here if you want to allow SMB3 compliant clients
diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c
index 30a92dd..3503b1c 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/ksmbd/auth.c
@@ -873,9 +873,9 @@
 				     __u8 *pi_hash)
 {
 	int rc;
-	struct smb2_hdr *rcv_hdr = (struct smb2_hdr *)buf;
+	struct smb2_hdr *rcv_hdr = smb2_get_msg(buf);
 	char *all_bytes_msg = (char *)&rcv_hdr->ProtocolId;
-	int msg_size = be32_to_cpu(rcv_hdr->smb2_buf_length);
+	int msg_size = get_rfc1002_len(buf);
 	struct ksmbd_crypto_ctx *ctx = NULL;
 
 	if (conn->preauth_info->Preauth_HashId !=
@@ -983,7 +983,7 @@
 					 u8 *sign)
 {
 	struct scatterlist *sg;
-	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
 	int i, nr_entries[3] = {0}, total_entries = 0, sg_idx = 0;
 
 	if (!nvec)
@@ -1047,9 +1047,8 @@
 int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
 			unsigned int nvec, int enc)
 {
-	struct smb2_transform_hdr *tr_hdr =
-		(struct smb2_transform_hdr *)iov[0].iov_base;
-	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+	struct smb2_transform_hdr *tr_hdr = smb2_get_msg(iov[0].iov_base);
+	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
 	int rc;
 	struct scatterlist *sg;
 	u8 sign[SMB2_SIGNATURE_SIZE] = {};
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c
index b57a0d8..83a94d0 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/ksmbd/connection.c
@@ -158,26 +158,25 @@
 int ksmbd_conn_write(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb_hdr *rsp_hdr = work->response_buf;
 	size_t len = 0;
 	int sent;
 	struct kvec iov[3];
 	int iov_idx = 0;
 
 	ksmbd_conn_try_dequeue_request(work);
-	if (!rsp_hdr) {
+	if (!work->response_buf) {
 		pr_err("NULL response header\n");
 		return -EINVAL;
 	}
 
 	if (work->tr_buf) {
 		iov[iov_idx] = (struct kvec) { work->tr_buf,
-				sizeof(struct smb2_transform_hdr) };
+				sizeof(struct smb2_transform_hdr) + 4 };
 		len += iov[iov_idx++].iov_len;
 	}
 
 	if (work->aux_payload_sz) {
-		iov[iov_idx] = (struct kvec) { rsp_hdr, work->resp_hdr_sz };
+		iov[iov_idx] = (struct kvec) { work->response_buf, work->resp_hdr_sz };
 		len += iov[iov_idx++].iov_len;
 		iov[iov_idx] = (struct kvec) { work->aux_payload_buf, work->aux_payload_sz };
 		len += iov[iov_idx++].iov_len;
@@ -185,8 +184,8 @@
 		if (work->tr_buf)
 			iov[iov_idx].iov_len = work->resp_hdr_sz;
 		else
-			iov[iov_idx].iov_len = get_rfc1002_len(rsp_hdr) + 4;
-		iov[iov_idx].iov_base = rsp_hdr;
+			iov[iov_idx].iov_len = get_rfc1002_len(work->response_buf) + 4;
+		iov[iov_idx].iov_base = work->response_buf;
 		len += iov[iov_idx++].iov_len;
 	}
 
diff --git a/fs/ksmbd/ksmbd_work.c b/fs/ksmbd/ksmbd_work.c
index fd58eb4..14b9cae 100644
--- a/fs/ksmbd/ksmbd_work.c
+++ b/fs/ksmbd/ksmbd_work.c
@@ -69,7 +69,6 @@
 
 void ksmbd_workqueue_destroy(void)
 {
-	flush_workqueue(ksmbd_wq);
 	destroy_workqueue(ksmbd_wq);
 	ksmbd_wq = NULL;
 }
diff --git a/fs/ksmbd/ksmbd_work.h b/fs/ksmbd/ksmbd_work.h
index f7156bc..5ece58e 100644
--- a/fs/ksmbd/ksmbd_work.h
+++ b/fs/ksmbd/ksmbd_work.h
@@ -92,7 +92,7 @@
  */
 static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
 {
-	return work->response_buf + work->next_smb2_rsp_hdr_off;
+	return work->response_buf + work->next_smb2_rsp_hdr_off + 4;
 }
 
 /**
@@ -101,7 +101,7 @@
  */
 static inline void *ksmbd_req_buf_next(struct ksmbd_work *work)
 {
-	return work->request_buf + work->next_smb2_rcv_hdr_off;
+	return work->request_buf + work->next_smb2_rcv_hdr_off + 4;
 }
 
 struct ksmbd_work *ksmbd_alloc_work_struct(void);
diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c
index f9dae6e..077b8761 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/ksmbd/oplock.c
@@ -629,10 +629,10 @@
 		return;
 	}
 
-	rsp_hdr = work->response_buf;
+	rsp_hdr = smb2_get_msg(work->response_buf);
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-	rsp_hdr->smb2_buf_length =
-		cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+	*(__be32 *)work->response_buf =
+		cpu_to_be32(conn->vals->header_size);
 	rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -645,7 +645,7 @@
 	rsp_hdr->SessionId = 0;
 	memset(rsp_hdr->Signature, 0, 16);
 
-	rsp = work->response_buf;
+	rsp = smb2_get_msg(work->response_buf);
 
 	rsp->StructureSize = cpu_to_le16(24);
 	if (!br_info->open_trunc &&
@@ -659,7 +659,7 @@
 	rsp->PersistentFid = cpu_to_le64(fp->persistent_id);
 	rsp->VolatileFid = cpu_to_le64(fp->volatile_id);
 
-	inc_rfc1001_len(rsp, 24);
+	inc_rfc1001_len(work->response_buf, 24);
 
 	ksmbd_debug(OPLOCK,
 		    "sending oplock break v_id %llu p_id = %llu lock level = %d\n",
@@ -736,10 +736,10 @@
 		return;
 	}
 
-	rsp_hdr = work->response_buf;
+	rsp_hdr = smb2_get_msg(work->response_buf);
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-	rsp_hdr->smb2_buf_length =
-		cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+	*(__be32 *)work->response_buf =
+		cpu_to_be32(conn->vals->header_size);
 	rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -752,7 +752,7 @@
 	rsp_hdr->SessionId = 0;
 	memset(rsp_hdr->Signature, 0, 16);
 
-	rsp = work->response_buf;
+	rsp = smb2_get_msg(work->response_buf);
 	rsp->StructureSize = cpu_to_le16(44);
 	rsp->Epoch = br_info->epoch;
 	rsp->Flags = 0;
@@ -768,7 +768,7 @@
 	rsp->AccessMaskHint = 0;
 	rsp->ShareMaskHint = 0;
 
-	inc_rfc1001_len(rsp, 44);
+	inc_rfc1001_len(work->response_buf, 44);
 
 	ksmbd_conn_write(work);
 	ksmbd_free_work_struct(work);
@@ -1335,19 +1335,16 @@
  */
 void create_lease_buf(u8 *rbuf, struct lease *lease)
 {
-	char *LeaseKey = (char *)&lease->lease_key;
-
 	if (lease->version == 2) {
 		struct create_lease_v2 *buf = (struct create_lease_v2 *)rbuf;
-		char *ParentLeaseKey = (char *)&lease->parent_lease_key;
 
 		memset(buf, 0, sizeof(struct create_lease_v2));
-		buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
-		buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+		memcpy(buf->lcontext.LeaseKey, lease->lease_key,
+		       SMB2_LEASE_KEY_SIZE);
 		buf->lcontext.LeaseFlags = lease->flags;
 		buf->lcontext.LeaseState = lease->state;
-		buf->lcontext.ParentLeaseKeyLow = *((__le64 *)ParentLeaseKey);
-		buf->lcontext.ParentLeaseKeyHigh = *((__le64 *)(ParentLeaseKey + 8));
+		memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key,
+		       SMB2_LEASE_KEY_SIZE);
 		buf->ccontext.DataOffset = cpu_to_le16(offsetof
 				(struct create_lease_v2, lcontext));
 		buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2));
@@ -1362,8 +1359,7 @@
 		struct create_lease *buf = (struct create_lease *)rbuf;
 
 		memset(buf, 0, sizeof(struct create_lease));
-		buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
-		buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+		memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE);
 		buf->lcontext.LeaseFlags = lease->flags;
 		buf->lcontext.LeaseState = lease->state;
 		buf->ccontext.DataOffset = cpu_to_le16(offsetof
@@ -1398,7 +1394,7 @@
 	if (!lreq)
 		return NULL;
 
-	data_offset = (char *)req + 4 + le32_to_cpu(req->CreateContextsOffset);
+	data_offset = (char *)req + le32_to_cpu(req->CreateContextsOffset);
 	cc = (struct create_context *)data_offset;
 	do {
 		cc = (struct create_context *)((char *)cc + next);
@@ -1416,19 +1412,17 @@
 		if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
 			struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
 
-			*((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
-			*((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+			memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
 			lreq->req_state = lc->lcontext.LeaseState;
 			lreq->flags = lc->lcontext.LeaseFlags;
 			lreq->duration = lc->lcontext.LeaseDuration;
-			*((__le64 *)lreq->parent_lease_key) = lc->lcontext.ParentLeaseKeyLow;
-			*((__le64 *)(lreq->parent_lease_key + 8)) = lc->lcontext.ParentLeaseKeyHigh;
+			memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey,
+			       SMB2_LEASE_KEY_SIZE);
 			lreq->version = 2;
 		} else {
 			struct create_lease *lc = (struct create_lease *)cc;
 
-			*((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
-			*((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+			memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
 			lreq->req_state = lc->lcontext.LeaseState;
 			lreq->flags = lc->lcontext.LeaseFlags;
 			lreq->duration = lc->lcontext.LeaseDuration;
@@ -1462,7 +1456,7 @@
 	 * CreateContextsOffset and CreateContextsLength are guaranteed to
 	 * be valid because of ksmbd_smb2_check_message().
 	 */
-	cc = (struct create_context *)((char *)req + 4 +
+	cc = (struct create_context *)((char *)req +
 				       le32_to_cpu(req->CreateContextsOffset));
 	remain_len = le32_to_cpu(req->CreateContextsLength);
 	do {
diff --git a/fs/ksmbd/oplock.h b/fs/ksmbd/oplock.h
index 119b804..0cf7a2b 100644
--- a/fs/ksmbd/oplock.h
+++ b/fs/ksmbd/oplock.h
@@ -28,8 +28,6 @@
 #define OPLOCK_WRITE_TO_NONE		0x04
 #define OPLOCK_READ_TO_NONE		0x08
 
-#define SMB2_LEASE_KEY_SIZE		16
-
 struct lease_ctx_info {
 	__u8			lease_key[SMB2_LEASE_KEY_SIZE];
 	__le32			req_state;
diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
index 2a2b213..2e12f6d 100644
--- a/fs/ksmbd/server.c
+++ b/fs/ksmbd/server.c
@@ -622,7 +622,6 @@
 MODULE_LICENSE("GPL");
 MODULE_SOFTDEP("pre: ecb");
 MODULE_SOFTDEP("pre: hmac");
-MODULE_SOFTDEP("pre: md4");
 MODULE_SOFTDEP("pre: md5");
 MODULE_SOFTDEP("pre: nls");
 MODULE_SOFTDEP("pre: aes");
@@ -632,5 +631,6 @@
 MODULE_SOFTDEP("pre: aead2");
 MODULE_SOFTDEP("pre: ccm");
 MODULE_SOFTDEP("pre: gcm");
+MODULE_SOFTDEP("pre: crc32");
 module_init(ksmbd_server_init)
 module_exit(ksmbd_server_exit)
diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
index 030ca57..50d0b10 100644
--- a/fs/ksmbd/smb2misc.c
+++ b/fs/ksmbd/smb2misc.c
@@ -6,7 +6,6 @@
 
 #include "glob.h"
 #include "nterr.h"
-#include "smb2pdu.h"
 #include "smb_common.h"
 #include "smbstatus.h"
 #include "mgmt/user_session.h"
@@ -347,23 +346,16 @@
 
 int ksmbd_smb2_check_message(struct ksmbd_work *work)
 {
-	struct smb2_pdu *pdu = work->request_buf;
+	struct smb2_pdu *pdu = ksmbd_req_buf_next(work);
 	struct smb2_hdr *hdr = &pdu->hdr;
 	int command;
 	__u32 clc_len;  /* calculated length */
-	__u32 len = get_rfc1002_len(pdu);
+	__u32 len = get_rfc1002_len(work->request_buf);
 
-	if (work->next_smb2_rcv_hdr_off) {
-		pdu = ksmbd_req_buf_next(work);
-		hdr = &pdu->hdr;
-	}
-
-	if (le32_to_cpu(hdr->NextCommand) > 0) {
+	if (le32_to_cpu(hdr->NextCommand) > 0)
 		len = le32_to_cpu(hdr->NextCommand);
-	} else if (work->next_smb2_rcv_hdr_off) {
+	else if (work->next_smb2_rcv_hdr_off)
 		len -= work->next_smb2_rcv_hdr_off;
-		len = round_up(len, 8);
-	}
 
 	if (check_smb2_hdr(hdr))
 		return 1;
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
index fb6a65d..0a5d845 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/ksmbd/smb2ops.c
@@ -6,7 +6,6 @@
 
 #include <linux/slab.h>
 #include "glob.h"
-#include "smb2pdu.h"
 
 #include "auth.h"
 #include "connection.h"
@@ -199,7 +198,7 @@
 	conn->cmds = smb2_0_server_cmds;
 	conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
 	conn->max_credits = SMB2_MAX_CREDITS;
-	conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256;
+	conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256_LE;
 
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
@@ -217,7 +216,7 @@
 	conn->cmds = smb2_0_server_cmds;
 	conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
 	conn->max_credits = SMB2_MAX_CREDITS;
-	conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+	conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
@@ -242,7 +241,7 @@
 	conn->cmds = smb2_0_server_cmds;
 	conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
 	conn->max_credits = SMB2_MAX_CREDITS;
-	conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+	conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
@@ -267,7 +266,7 @@
 	conn->cmds = smb2_0_server_cmds;
 	conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
 	conn->max_credits = SMB2_MAX_CREDITS;
-	conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+	conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 7e448df..49c9da3 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -13,7 +13,6 @@
 #include <linux/falloc.h>
 
 #include "glob.h"
-#include "smb2pdu.h"
 #include "smbfsctl.h"
 #include "oplock.h"
 #include "smbacl.h"
@@ -44,8 +43,8 @@
 		*req = ksmbd_req_buf_next(work);
 		*rsp = ksmbd_resp_buf_next(work);
 	} else {
-		*req = work->request_buf;
-		*rsp = work->response_buf;
+		*req = smb2_get_msg(work->request_buf);
+		*rsp = smb2_get_msg(work->response_buf);
 	}
 }
 
@@ -93,13 +92,14 @@
  */
 int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
 {
-	struct smb2_hdr *req_hdr = work->request_buf;
+	struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
+	unsigned int cmd = le16_to_cpu(req_hdr->Command);
 	int tree_id;
 
 	work->tcon = NULL;
-	if (work->conn->ops->get_cmd_val(work) == SMB2_TREE_CONNECT_HE ||
-	    work->conn->ops->get_cmd_val(work) ==  SMB2_CANCEL_HE ||
-	    work->conn->ops->get_cmd_val(work) ==  SMB2_LOGOFF_HE) {
+	if (cmd == SMB2_TREE_CONNECT_HE ||
+	    cmd ==  SMB2_CANCEL_HE ||
+	    cmd ==  SMB2_LOGOFF_HE) {
 		ksmbd_debug(SMB, "skip to check tree connect request\n");
 		return 0;
 	}
@@ -130,7 +130,7 @@
 	if (work->next_smb2_rcv_hdr_off)
 		err_rsp = ksmbd_resp_buf_next(work);
 	else
-		err_rsp = work->response_buf;
+		err_rsp = smb2_get_msg(work->response_buf);
 
 	if (err_rsp->hdr.Status != STATUS_STOPPED_ON_SYMLINK) {
 		err_rsp->StructureSize = SMB2_ERROR_STRUCTURE_SIZE2_LE;
@@ -150,7 +150,7 @@
  */
 bool is_smb2_neg_cmd(struct ksmbd_work *work)
 {
-	struct smb2_hdr *hdr = work->request_buf;
+	struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
 
 	/* is it SMB2 header ? */
 	if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -174,7 +174,7 @@
  */
 bool is_smb2_rsp(struct ksmbd_work *work)
 {
-	struct smb2_hdr *hdr = work->response_buf;
+	struct smb2_hdr *hdr = smb2_get_msg(work->response_buf);
 
 	/* is it SMB2 header ? */
 	if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -200,7 +200,7 @@
 	if (work->next_smb2_rcv_hdr_off)
 		rcv_hdr = ksmbd_req_buf_next(work);
 	else
-		rcv_hdr = work->request_buf;
+		rcv_hdr = smb2_get_msg(work->request_buf);
 	return le16_to_cpu(rcv_hdr->Command);
 }
 
@@ -216,7 +216,7 @@
 	if (work->next_smb2_rcv_hdr_off)
 		rsp_hdr = ksmbd_resp_buf_next(work);
 	else
-		rsp_hdr = work->response_buf;
+		rsp_hdr = smb2_get_msg(work->response_buf);
 	rsp_hdr->Status = err;
 	smb2_set_err_rsp(work);
 }
@@ -237,13 +237,11 @@
 	if (conn->need_neg == false)
 		return -EINVAL;
 
-	rsp_hdr = work->response_buf;
+	*(__be32 *)work->response_buf =
+		cpu_to_be32(conn->vals->header_size);
 
+	rsp_hdr = smb2_get_msg(work->response_buf);
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-
-	rsp_hdr->smb2_buf_length =
-		cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
-
 	rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->CreditRequest = cpu_to_le16(2);
@@ -256,7 +254,7 @@
 	rsp_hdr->SessionId = 0;
 	memset(rsp_hdr->Signature, 0, 16);
 
-	rsp = work->response_buf;
+	rsp = smb2_get_msg(work->response_buf);
 
 	WARN_ON(ksmbd_conn_good(work));
 
@@ -277,12 +275,12 @@
 
 	rsp->SecurityBufferOffset = cpu_to_le16(128);
 	rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
-	ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
-		sizeof(rsp->hdr.smb2_buf_length)) +
+	ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
 		le16_to_cpu(rsp->SecurityBufferOffset));
-	inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
-		sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
-		AUTH_GSS_LENGTH);
+	inc_rfc1001_len(work->response_buf,
+			sizeof(struct smb2_negotiate_rsp) -
+			sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
+			AUTH_GSS_LENGTH);
 	rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
 	if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY)
 		rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
@@ -387,8 +385,8 @@
 	next_hdr_offset = le32_to_cpu(req->NextCommand);
 
 	new_len = ALIGN(len, 8);
-	inc_rfc1001_len(work->response_buf, ((sizeof(struct smb2_hdr) - 4)
-			+ new_len - len));
+	inc_rfc1001_len(work->response_buf,
+			sizeof(struct smb2_hdr) + new_len - len);
 	rsp->NextCommand = cpu_to_le32(new_len);
 
 	work->next_smb2_rcv_hdr_off += next_hdr_offset;
@@ -406,7 +404,7 @@
 		work->compound_fid = KSMBD_NO_FID;
 		work->compound_pfid = KSMBD_NO_FID;
 	}
-	memset((char *)rsp_hdr + 4, 0, sizeof(struct smb2_hdr) + 2);
+	memset((char *)rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
 	rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->Command = rcv_hdr->Command;
@@ -432,7 +430,7 @@
  */
 bool is_chained_smb2_message(struct ksmbd_work *work)
 {
-	struct smb2_hdr *hdr = work->request_buf;
+	struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
 	unsigned int len, next_cmd;
 
 	if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -483,13 +481,13 @@
  */
 int init_smb2_rsp_hdr(struct ksmbd_work *work)
 {
-	struct smb2_hdr *rsp_hdr = work->response_buf;
-	struct smb2_hdr *rcv_hdr = work->request_buf;
+	struct smb2_hdr *rsp_hdr = smb2_get_msg(work->response_buf);
+	struct smb2_hdr *rcv_hdr = smb2_get_msg(work->request_buf);
 	struct ksmbd_conn *conn = work->conn;
 
 	memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-	rsp_hdr->smb2_buf_length =
-		cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+	*(__be32 *)work->response_buf =
+		cpu_to_be32(conn->vals->header_size);
 	rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
 	rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
 	rsp_hdr->Command = rcv_hdr->Command;
@@ -522,7 +520,7 @@
  */
 int smb2_allocate_rsp_buf(struct ksmbd_work *work)
 {
-	struct smb2_hdr *hdr = work->request_buf;
+	struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
 	size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
 	size_t large_sz = small_sz + work->conn->vals->max_trans_size;
 	size_t sz = small_sz;
@@ -534,7 +532,7 @@
 	if (cmd == SMB2_QUERY_INFO_HE) {
 		struct smb2_query_info_req *req;
 
-		req = work->request_buf;
+		req = smb2_get_msg(work->request_buf);
 		if (req->InfoType == SMB2_O_INFO_FILE &&
 		    (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
 		     req->FileInfoClass == FILE_ALL_INFORMATION))
@@ -561,7 +559,7 @@
  */
 int smb2_check_user_session(struct ksmbd_work *work)
 {
-	struct smb2_hdr *req_hdr = work->request_buf;
+	struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
 	struct ksmbd_conn *conn = work->conn;
 	unsigned int cmd = conn->ops->get_cmd_val(work);
 	unsigned long long sess_id;
@@ -642,7 +640,7 @@
 	struct ksmbd_conn *conn = work->conn;
 	int id;
 
-	rsp_hdr = work->response_buf;
+	rsp_hdr = smb2_get_msg(work->response_buf);
 	rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
 
 	id = ksmbd_acquire_async_msg_id(&conn->async_ida);
@@ -674,7 +672,7 @@
 {
 	struct smb2_hdr *rsp_hdr;
 
-	rsp_hdr = work->response_buf;
+	rsp_hdr = smb2_get_msg(work->response_buf);
 	smb2_set_err_rsp(work);
 	rsp_hdr->Status = status;
 
@@ -715,17 +713,17 @@
 	int attr = 0;
 
 	if (S_ISDIR(stat->mode)) {
-		attr = ATTR_DIRECTORY |
-			(attribute & (ATTR_HIDDEN | ATTR_SYSTEM));
+		attr = FILE_ATTRIBUTE_DIRECTORY |
+			(attribute & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM));
 	} else {
-		attr = (attribute & 0x00005137) | ATTR_ARCHIVE;
-		attr &= ~(ATTR_DIRECTORY);
+		attr = (attribute & 0x00005137) | FILE_ATTRIBUTE_ARCHIVE;
+		attr &= ~(FILE_ATTRIBUTE_DIRECTORY);
 		if (S_ISREG(stat->mode) && (server_conf.share_fake_fscaps &
 				FILE_SUPPORTS_SPARSE_FILES))
-			attr |= ATTR_SPARSE;
+			attr |= FILE_ATTRIBUTE_SPARSE_FILE;
 
 		if (smb2_get_reparse_tag_special_file(stat->mode))
-			attr |= ATTR_REPARSE;
+			attr |= FILE_ATTRIBUTE_REPARSE_POINT;
 	}
 
 	return attr;
@@ -753,16 +751,16 @@
 	pneg_ctxt->Ciphers[0] = cipher_type;
 }
 
-static void build_compression_ctxt(struct smb2_compression_ctx *pneg_ctxt,
+static void build_compression_ctxt(struct smb2_compression_capabilities_context *pneg_ctxt,
 				   __le16 comp_algo)
 {
 	pneg_ctxt->ContextType = SMB2_COMPRESSION_CAPABILITIES;
 	pneg_ctxt->DataLength =
-		cpu_to_le16(sizeof(struct smb2_compression_ctx)
+		cpu_to_le16(sizeof(struct smb2_compression_capabilities_context)
 			- sizeof(struct smb2_neg_context));
 	pneg_ctxt->Reserved = cpu_to_le32(0);
 	pneg_ctxt->CompressionAlgorithmCount = cpu_to_le16(1);
-	pneg_ctxt->Reserved1 = cpu_to_le32(0);
+	pneg_ctxt->Flags = cpu_to_le32(0);
 	pneg_ctxt->CompressionAlgorithms[0] = comp_algo;
 }
 
@@ -802,11 +800,11 @@
 }
 
 static void assemble_neg_contexts(struct ksmbd_conn *conn,
-				  struct smb2_negotiate_rsp *rsp)
+				  struct smb2_negotiate_rsp *rsp,
+				  void *smb2_buf_len)
 {
-	/* +4 is to account for the RFC1001 len field */
 	char *pneg_ctxt = (char *)rsp +
-			le32_to_cpu(rsp->NegotiateContextOffset) + 4;
+			le32_to_cpu(rsp->NegotiateContextOffset);
 	int neg_ctxt_cnt = 1;
 	int ctxt_size;
 
@@ -815,7 +813,7 @@
 	build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt,
 			   conn->preauth_info->Preauth_HashId);
 	rsp->NegotiateContextCount = cpu_to_le16(neg_ctxt_cnt);
-	inc_rfc1001_len(rsp, AUTH_GSS_PADDING);
+	inc_rfc1001_len(smb2_buf_len, AUTH_GSS_PADDING);
 	ctxt_size = sizeof(struct smb2_preauth_neg_context);
 	/* Round to 8 byte boundary */
 	pneg_ctxt += round_up(sizeof(struct smb2_preauth_neg_context), 8);
@@ -839,12 +837,12 @@
 		ksmbd_debug(SMB,
 			    "assemble SMB2_COMPRESSION_CAPABILITIES context\n");
 		/* Temporarily set to SMB3_COMPRESS_NONE */
-		build_compression_ctxt((struct smb2_compression_ctx *)pneg_ctxt,
+		build_compression_ctxt((struct smb2_compression_capabilities_context *)pneg_ctxt,
 				       conn->compress_algorithm);
 		rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
-		ctxt_size += sizeof(struct smb2_compression_ctx) + 2;
+		ctxt_size += sizeof(struct smb2_compression_capabilities_context) + 2;
 		/* Round to 8 byte boundary */
-		pneg_ctxt += round_up(sizeof(struct smb2_compression_ctx) + 2,
+		pneg_ctxt += round_up(sizeof(struct smb2_compression_capabilities_context) + 2,
 				      8);
 	}
 
@@ -869,7 +867,7 @@
 		ctxt_size += sizeof(struct smb2_signing_capabilities) + 2;
 	}
 
-	inc_rfc1001_len(rsp, ctxt_size);
+	inc_rfc1001_len(smb2_buf_len, ctxt_size);
 }
 
 static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
@@ -918,7 +916,7 @@
 }
 
 static void decode_compress_ctxt(struct ksmbd_conn *conn,
-				 struct smb2_compression_ctx *pneg_ctxt)
+				 struct smb2_compression_capabilities_context *pneg_ctxt)
 {
 	conn->compress_algorithm = SMB3_COMPRESS_NONE;
 }
@@ -939,8 +937,8 @@
 	}
 
 	for (i = 0; i < sign_algo_cnt; i++) {
-		if (pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_HMAC_SHA256 ||
-		    pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_AES_CMAC) {
+		if (pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_HMAC_SHA256_LE ||
+		    pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_AES_CMAC_LE) {
 			ksmbd_debug(SMB, "Signing Algorithm ID = 0x%x\n",
 				    pneg_ctxt->SigningAlgorithms[i]);
 			conn->signing_negotiated = true;
@@ -952,14 +950,14 @@
 }
 
 static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
-				      struct smb2_negotiate_req *req)
+				      struct smb2_negotiate_req *req,
+				      int len_of_smb)
 {
 	/* +4 is to account for the RFC1001 len field */
-	struct smb2_neg_context *pctx = (struct smb2_neg_context *)((char *)req + 4);
+	struct smb2_neg_context *pctx = (struct smb2_neg_context *)req;
 	int i = 0, len_of_ctxts;
 	int offset = le32_to_cpu(req->NegotiateContextOffset);
 	int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
-	int len_of_smb = be32_to_cpu(req->hdr.smb2_buf_length);
 	__le32 status = STATUS_INVALID_PARAMETER;
 
 	ksmbd_debug(SMB, "decoding %d negotiate contexts\n", neg_ctxt_cnt);
@@ -1011,7 +1009,7 @@
 				break;
 
 			decode_compress_ctxt(conn,
-					     (struct smb2_compression_ctx *)pctx);
+					     (struct smb2_compression_capabilities_context *)pctx);
 		} else if (pctx->ContextType == SMB2_NETNAME_NEGOTIATE_CONTEXT_ID) {
 			ksmbd_debug(SMB,
 				    "deassemble SMB2_NETNAME_NEGOTIATE_CONTEXT_ID context\n");
@@ -1044,8 +1042,8 @@
 int smb2_handle_negotiate(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_negotiate_req *req = work->request_buf;
-	struct smb2_negotiate_rsp *rsp = work->response_buf;
+	struct smb2_negotiate_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_negotiate_rsp *rsp = smb2_get_msg(work->response_buf);
 	int rc = 0;
 	unsigned int smb2_buf_len, smb2_neg_size;
 	__le32 status;
@@ -1066,7 +1064,7 @@
 	}
 
 	smb2_buf_len = get_rfc1002_len(work->request_buf);
-	smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects) - 4;
+	smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
 	if (smb2_neg_size > smb2_buf_len) {
 		rsp->hdr.Status = STATUS_INVALID_PARAMETER;
 		rc = -EINVAL;
@@ -1115,7 +1113,8 @@
 			goto err_out;
 		}
 
-		status = deassemble_neg_contexts(conn, req);
+		status = deassemble_neg_contexts(conn, req,
+						 get_rfc1002_len(work->request_buf));
 		if (status != STATUS_SUCCESS) {
 			pr_err("deassemble_neg_contexts error(0x%x)\n",
 			       status);
@@ -1135,7 +1134,7 @@
 						 conn->preauth_info->Preauth_HashValue);
 		rsp->NegotiateContextOffset =
 				cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
-		assemble_neg_contexts(conn, rsp);
+		assemble_neg_contexts(conn, rsp, work->response_buf);
 		break;
 	case SMB302_PROT_ID:
 		init_smb3_02_server(conn);
@@ -1183,10 +1182,9 @@
 
 	rsp->SecurityBufferOffset = cpu_to_le16(128);
 	rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
-	ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
-				  sizeof(rsp->hdr.smb2_buf_length)) +
-				   le16_to_cpu(rsp->SecurityBufferOffset));
-	inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
+	ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
+				  le16_to_cpu(rsp->SecurityBufferOffset));
+	inc_rfc1001_len(work->response_buf, sizeof(struct smb2_negotiate_rsp) -
 			sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
 			 AUTH_GSS_LENGTH);
 	rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
@@ -1278,7 +1276,7 @@
 			  struct negotiate_message *negblob,
 			  size_t negblob_len)
 {
-	struct smb2_sess_setup_rsp *rsp = work->response_buf;
+	struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct challenge_message *chgblob;
 	unsigned char *spnego_blob = NULL;
 	u16 spnego_blob_len;
@@ -1386,8 +1384,8 @@
 
 static int ntlm_authenticate(struct ksmbd_work *work)
 {
-	struct smb2_sess_setup_req *req = work->request_buf;
-	struct smb2_sess_setup_rsp *rsp = work->response_buf;
+	struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_conn *conn = work->conn;
 	struct ksmbd_session *sess = work->sess;
 	struct channel *chann = NULL;
@@ -1410,7 +1408,7 @@
 		memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
 		rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
 		kfree(spnego_blob);
-		inc_rfc1001_len(rsp, spnego_blob_len - 1);
+		inc_rfc1001_len(work->response_buf, spnego_blob_len - 1);
 	}
 
 	user = session_user(conn, req);
@@ -1522,8 +1520,8 @@
 #ifdef CONFIG_SMB_SERVER_KERBEROS5
 static int krb5_authenticate(struct ksmbd_work *work)
 {
-	struct smb2_sess_setup_req *req = work->request_buf;
-	struct smb2_sess_setup_rsp *rsp = work->response_buf;
+	struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_conn *conn = work->conn;
 	struct ksmbd_session *sess = work->sess;
 	char *in_blob, *out_blob;
@@ -1538,8 +1536,7 @@
 	out_blob = (char *)&rsp->hdr.ProtocolId +
 		le16_to_cpu(rsp->SecurityBufferOffset);
 	out_len = work->response_sz -
-		offsetof(struct smb2_hdr, smb2_buf_length) -
-		le16_to_cpu(rsp->SecurityBufferOffset);
+		(le16_to_cpu(rsp->SecurityBufferOffset) + 4);
 
 	/* Check previous session */
 	prev_sess_id = le64_to_cpu(req->PreviousSessionId);
@@ -1556,7 +1553,7 @@
 		return -EINVAL;
 	}
 	rsp->SecurityBufferLength = cpu_to_le16(out_len);
-	inc_rfc1001_len(rsp, out_len - 1);
+	inc_rfc1001_len(work->response_buf, out_len - 1);
 
 	if ((conn->sign || server_conf.enforced_signing) ||
 	    (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
@@ -1612,8 +1609,8 @@
 int smb2_sess_setup(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_sess_setup_req *req = work->request_buf;
-	struct smb2_sess_setup_rsp *rsp = work->response_buf;
+	struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_session *sess;
 	struct negotiate_message *negblob;
 	unsigned int negblob_len, negblob_off;
@@ -1625,7 +1622,7 @@
 	rsp->SessionFlags = 0;
 	rsp->SecurityBufferOffset = cpu_to_le16(72);
 	rsp->SecurityBufferLength = 0;
-	inc_rfc1001_len(rsp, 9);
+	inc_rfc1001_len(work->response_buf, 9);
 
 	if (!req->hdr.SessionId) {
 		sess = ksmbd_smb2_session_create();
@@ -1699,9 +1696,11 @@
 
 	negblob_off = le16_to_cpu(req->SecurityBufferOffset);
 	negblob_len = le16_to_cpu(req->SecurityBufferLength);
-	if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) ||
-	    negblob_len < offsetof(struct negotiate_message, NegotiateFlags))
-		return -EINVAL;
+	if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer) ||
+	    negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) {
+		rc = -EINVAL;
+		goto out_err;
+	}
 
 	negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId +
 			negblob_off);
@@ -1739,7 +1738,8 @@
 				 * Note: here total size -1 is done as an
 				 * adjustment for 0 size blob
 				 */
-				inc_rfc1001_len(rsp, le16_to_cpu(rsp->SecurityBufferLength) - 1);
+				inc_rfc1001_len(work->response_buf,
+						le16_to_cpu(rsp->SecurityBufferLength) - 1);
 
 			} else if (negblob->MessageType == NtLmAuthenticate) {
 				rc = ntlm_authenticate(work);
@@ -1828,8 +1828,8 @@
 int smb2_tree_connect(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_tree_connect_req *req = work->request_buf;
-	struct smb2_tree_connect_rsp *rsp = work->response_buf;
+	struct smb2_tree_connect_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_tree_connect_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_session *sess = work->sess;
 	char *treename = NULL, *name = NULL;
 	struct ksmbd_tree_conn_status status;
@@ -1894,7 +1894,7 @@
 	rsp->Reserved = 0;
 	/* default manual caching */
 	rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
-	inc_rfc1001_len(rsp, 16);
+	inc_rfc1001_len(work->response_buf, 16);
 
 	if (!IS_ERR(treename))
 		kfree(treename);
@@ -1999,17 +1999,18 @@
  */
 int smb2_tree_disconnect(struct ksmbd_work *work)
 {
-	struct smb2_tree_disconnect_rsp *rsp = work->response_buf;
+	struct smb2_tree_disconnect_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_session *sess = work->sess;
 	struct ksmbd_tree_connect *tcon = work->tcon;
 
 	rsp->StructureSize = cpu_to_le16(4);
-	inc_rfc1001_len(rsp, 4);
+	inc_rfc1001_len(work->response_buf, 4);
 
 	ksmbd_debug(SMB, "request\n");
 
 	if (!tcon) {
-		struct smb2_tree_disconnect_req *req = work->request_buf;
+		struct smb2_tree_disconnect_req *req =
+			smb2_get_msg(work->request_buf);
 
 		ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
 		rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
@@ -2031,11 +2032,11 @@
 int smb2_session_logoff(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_logoff_rsp *rsp = work->response_buf;
+	struct smb2_logoff_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_session *sess = work->sess;
 
 	rsp->StructureSize = cpu_to_le16(4);
-	inc_rfc1001_len(rsp, 4);
+	inc_rfc1001_len(work->response_buf, 4);
 
 	ksmbd_debug(SMB, "request\n");
 
@@ -2048,7 +2049,7 @@
 	ksmbd_conn_wait_idle(conn);
 
 	if (ksmbd_tree_conn_session_logoff(sess)) {
-		struct smb2_logoff_req *req = work->request_buf;
+		struct smb2_logoff_req *req = smb2_get_msg(work->request_buf);
 
 		ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
 		rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
@@ -2075,8 +2076,8 @@
  */
 static noinline int create_smb2_pipe(struct ksmbd_work *work)
 {
-	struct smb2_create_rsp *rsp = work->response_buf;
-	struct smb2_create_req *req = work->request_buf;
+	struct smb2_create_rsp *rsp = smb2_get_msg(work->response_buf);
+	struct smb2_create_req *req = smb2_get_msg(work->request_buf);
 	int id;
 	int err;
 	char *name;
@@ -2099,7 +2100,7 @@
 	rsp->hdr.Status = STATUS_SUCCESS;
 	rsp->StructureSize = cpu_to_le16(89);
 	rsp->OplockLevel = SMB2_OPLOCK_LEVEL_NONE;
-	rsp->Reserved = 0;
+	rsp->Flags = 0;
 	rsp->CreateAction = cpu_to_le32(FILE_OPENED);
 
 	rsp->CreationTime = cpu_to_le64(0);
@@ -2107,14 +2108,14 @@
 	rsp->ChangeTime = cpu_to_le64(0);
 	rsp->AllocationSize = cpu_to_le64(0);
 	rsp->EndofFile = cpu_to_le64(0);
-	rsp->FileAttributes = ATTR_NORMAL_LE;
+	rsp->FileAttributes = FILE_ATTRIBUTE_NORMAL_LE;
 	rsp->Reserved2 = 0;
 	rsp->VolatileFileId = cpu_to_le64(id);
 	rsp->PersistentFileId = 0;
 	rsp->CreateContextsOffset = 0;
 	rsp->CreateContextsLength = 0;
 
-	inc_rfc1001_len(rsp, 88); /* StructureSize - 1*/
+	inc_rfc1001_len(work->response_buf, 88); /* StructureSize - 1*/
 	kfree(name);
 	return 0;
 
@@ -2353,7 +2354,7 @@
 	struct xattr_dos_attrib da;
 	int rc;
 
-	fp->f_ci->m_fattr &= ~(ATTR_HIDDEN_LE | ATTR_SYSTEM_LE);
+	fp->f_ci->m_fattr &= ~(FILE_ATTRIBUTE_HIDDEN_LE | FILE_ATTRIBUTE_SYSTEM_LE);
 
 	/* get FileAttributes from XATTR_NAME_DOS_ATTRIBUTE */
 	if (!test_share_config_flag(tcon->share_conf,
@@ -2463,7 +2464,7 @@
 	struct ksmbd_session *sess = work->sess;
 	struct ksmbd_tree_connect *tcon = work->tcon;
 	struct smb2_create_req *req;
-	struct smb2_create_rsp *rsp, *rsp_org;
+	struct smb2_create_rsp *rsp;
 	struct path path;
 	struct ksmbd_share_config *share = tcon->share_conf;
 	struct ksmbd_file *fp = NULL;
@@ -2489,7 +2490,6 @@
 	umode_t posix_mode = 0;
 	__le32 daccess, maximal_access = 0;
 
-	rsp_org = work->response_buf;
 	WORK_BUFFERS(work, req, rsp);
 
 	if (req->hdr.NextCommand && !work->next_smb2_rcv_hdr_off &&
@@ -2559,7 +2559,7 @@
 	if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
 		lc = parse_lease_state(req);
 
-	if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE_LE)) {
+	if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE)) {
 		pr_err("Invalid impersonationlevel : 0x%x\n",
 		       le32_to_cpu(req->ImpersonationLevel));
 		rc = -EIO;
@@ -2567,7 +2567,7 @@
 		goto err_out1;
 	}
 
-	if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK)) {
+	if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK_LE)) {
 		pr_err("Invalid create options : 0x%x\n",
 		       le32_to_cpu(req->CreateOptions));
 		rc = -EINVAL;
@@ -2609,7 +2609,7 @@
 		goto err_out1;
 	}
 
-	if (req->FileAttributes && !(req->FileAttributes & ATTR_MASK_LE)) {
+	if (req->FileAttributes && !(req->FileAttributes & FILE_ATTRIBUTE_MASK_LE)) {
 		pr_err("Invalid file attribute : 0x%x\n",
 		       le32_to_cpu(req->FileAttributes));
 		rc = -EINVAL;
@@ -2740,7 +2740,7 @@
 		}
 
 		if (req->CreateOptions & FILE_DIRECTORY_FILE_LE &&
-		    req->FileAttributes & ATTR_NORMAL_LE) {
+		    req->FileAttributes & FILE_ATTRIBUTE_NORMAL_LE) {
 			rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
 			rc = -EIO;
 		}
@@ -3119,7 +3119,7 @@
 	opinfo = rcu_dereference(fp->f_opinfo);
 	rsp->OplockLevel = opinfo != NULL ? opinfo->level : 0;
 	rcu_read_unlock();
-	rsp->Reserved = 0;
+	rsp->Flags = 0;
 	rsp->CreateAction = cpu_to_le32(file_info);
 	rsp->CreationTime = cpu_to_le64(fp->create_time);
 	time = ksmbd_UnixTimeToNT(stat.atime);
@@ -3140,7 +3140,7 @@
 
 	rsp->CreateContextsOffset = 0;
 	rsp->CreateContextsLength = 0;
-	inc_rfc1001_len(rsp_org, 88); /* StructureSize - 1*/
+	inc_rfc1001_len(work->response_buf, 88); /* StructureSize - 1*/
 
 	/* If lease is request send lease context response */
 	if (opinfo && opinfo->is_lease) {
@@ -3155,7 +3155,8 @@
 		create_lease_buf(rsp->Buffer, opinfo->o_lease);
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_lease_size);
-		inc_rfc1001_len(rsp_org, conn->vals->create_lease_size);
+		inc_rfc1001_len(work->response_buf,
+				conn->vals->create_lease_size);
 		next_ptr = &lease_ccontext->Next;
 		next_off = conn->vals->create_lease_size;
 	}
@@ -3175,7 +3176,8 @@
 				le32_to_cpu(maximal_access));
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_mxac_size);
-		inc_rfc1001_len(rsp_org, conn->vals->create_mxac_size);
+		inc_rfc1001_len(work->response_buf,
+				conn->vals->create_mxac_size);
 		if (next_ptr)
 			*next_ptr = cpu_to_le32(next_off);
 		next_ptr = &mxac_ccontext->Next;
@@ -3193,7 +3195,8 @@
 				stat.ino, tcon->id);
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_disk_id_size);
-		inc_rfc1001_len(rsp_org, conn->vals->create_disk_id_size);
+		inc_rfc1001_len(work->response_buf,
+				conn->vals->create_disk_id_size);
 		if (next_ptr)
 			*next_ptr = cpu_to_le32(next_off);
 		next_ptr = &disk_id_ccontext->Next;
@@ -3207,15 +3210,15 @@
 				fp);
 		le32_add_cpu(&rsp->CreateContextsLength,
 			     conn->vals->create_posix_size);
-		inc_rfc1001_len(rsp_org, conn->vals->create_posix_size);
+		inc_rfc1001_len(work->response_buf,
+				conn->vals->create_posix_size);
 		if (next_ptr)
 			*next_ptr = cpu_to_le32(next_off);
 	}
 
 	if (contxt_cnt > 0) {
 		rsp->CreateContextsOffset =
-			cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer)
-			- 4);
+			cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer));
 	}
 
 err_out:
@@ -3422,9 +3425,9 @@
 		ffdinfo->EaSize =
 			smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
 		if (ffdinfo->EaSize)
-			ffdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+			ffdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
 		if (d_info->hide_dot_file && d_info->name[0] == '.')
-			ffdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+			ffdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
 		memcpy(ffdinfo->FileName, conv_name, conv_len);
 		ffdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
 		break;
@@ -3438,11 +3441,11 @@
 		fbdinfo->EaSize =
 			smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
 		if (fbdinfo->EaSize)
-			fbdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+			fbdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
 		fbdinfo->ShortNameLength = 0;
 		fbdinfo->Reserved = 0;
 		if (d_info->hide_dot_file && d_info->name[0] == '.')
-			fbdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+			fbdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
 		memcpy(fbdinfo->FileName, conv_name, conv_len);
 		fbdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
 		break;
@@ -3454,7 +3457,7 @@
 		fdinfo = (struct file_directory_info *)kstat;
 		fdinfo->FileNameLength = cpu_to_le32(conv_len);
 		if (d_info->hide_dot_file && d_info->name[0] == '.')
-			fdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+			fdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
 		memcpy(fdinfo->FileName, conv_name, conv_len);
 		fdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
 		break;
@@ -3478,11 +3481,11 @@
 		dinfo->EaSize =
 			smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
 		if (dinfo->EaSize)
-			dinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+			dinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
 		dinfo->Reserved = 0;
 		dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
 		if (d_info->hide_dot_file && d_info->name[0] == '.')
-			dinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+			dinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
 		memcpy(dinfo->FileName, conv_name, conv_len);
 		dinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
 		break;
@@ -3496,13 +3499,13 @@
 		fibdinfo->EaSize =
 			smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
 		if (fibdinfo->EaSize)
-			fibdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+			fibdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
 		fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
 		fibdinfo->ShortNameLength = 0;
 		fibdinfo->Reserved = 0;
 		fibdinfo->Reserved2 = cpu_to_le16(0);
 		if (d_info->hide_dot_file && d_info->name[0] == '.')
-			fibdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+			fibdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
 		memcpy(fibdinfo->FileName, conv_name, conv_len);
 		fibdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
 		break;
@@ -3528,9 +3531,10 @@
 		posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode);
 		posix_info->Inode = cpu_to_le64(ksmbd_kstat->kstat->ino);
 		posix_info->DosAttributes =
-			S_ISDIR(ksmbd_kstat->kstat->mode) ? ATTR_DIRECTORY_LE : ATTR_ARCHIVE_LE;
+			S_ISDIR(ksmbd_kstat->kstat->mode) ?
+				FILE_ATTRIBUTE_DIRECTORY_LE : FILE_ATTRIBUTE_ARCHIVE_LE;
 		if (d_info->hide_dot_file && d_info->name[0] == '.')
-			posix_info->DosAttributes |= ATTR_HIDDEN_LE;
+			posix_info->DosAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
 		id_to_sid(from_kuid_munged(&init_user_ns, ksmbd_kstat->kstat->uid),
 			  SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
 		id_to_sid(from_kgid_munged(&init_user_ns, ksmbd_kstat->kstat->gid),
@@ -3816,7 +3820,7 @@
 {
 	struct ksmbd_conn *conn = work->conn;
 	struct smb2_query_directory_req *req;
-	struct smb2_query_directory_rsp *rsp, *rsp_org;
+	struct smb2_query_directory_rsp *rsp;
 	struct ksmbd_share_config *share = work->tcon->share_conf;
 	struct ksmbd_file *dir_fp = NULL;
 	struct ksmbd_dir_info d_info;
@@ -3826,7 +3830,6 @@
 	int buffer_sz;
 	struct smb2_query_dir_private query_dir_private = {NULL, };
 
-	rsp_org = work->response_buf;
 	WORK_BUFFERS(work, req, rsp);
 
 	if (ksmbd_override_fsids(work)) {
@@ -3947,7 +3950,7 @@
 		rsp->OutputBufferOffset = cpu_to_le16(0);
 		rsp->OutputBufferLength = cpu_to_le32(0);
 		rsp->Buffer[0] = 0;
-		inc_rfc1001_len(rsp_org, 9);
+		inc_rfc1001_len(work->response_buf, 9);
 	} else {
 		((struct file_directory_info *)
 		((char *)rsp->Buffer + d_info.last_entry_offset))
@@ -3956,7 +3959,7 @@
 		rsp->StructureSize = cpu_to_le16(9);
 		rsp->OutputBufferOffset = cpu_to_le16(72);
 		rsp->OutputBufferLength = cpu_to_le32(d_info.data_count);
-		inc_rfc1001_len(rsp_org, 8 + d_info.data_count);
+		inc_rfc1001_len(work->response_buf, 8 + d_info.data_count);
 	}
 
 	kfree(srch_ptr);
@@ -3999,26 +4002,28 @@
  * Return:	0 on success, otherwise error
  */
 static int buffer_check_err(int reqOutputBufferLength,
-			    struct smb2_query_info_rsp *rsp, int infoclass_size)
+			    struct smb2_query_info_rsp *rsp,
+			    void *rsp_org, int infoclass_size)
 {
 	if (reqOutputBufferLength < le32_to_cpu(rsp->OutputBufferLength)) {
 		if (reqOutputBufferLength < infoclass_size) {
 			pr_err("Invalid Buffer Size Requested\n");
 			rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
-			rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4);
+			*(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr));
 			return -EINVAL;
 		}
 
 		ksmbd_debug(SMB, "Buffer Overflow\n");
 		rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
-		rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4 +
+		*(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr) +
 				reqOutputBufferLength);
 		rsp->OutputBufferLength = cpu_to_le32(reqOutputBufferLength);
 	}
 	return 0;
 }
 
-static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp)
+static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp,
+				   void *rsp_org)
 {
 	struct smb2_file_standard_info *sinfo;
 
@@ -4031,10 +4036,11 @@
 	sinfo->Directory = 0;
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_standard_info));
-	inc_rfc1001_len(rsp, sizeof(struct smb2_file_standard_info));
+	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_standard_info));
 }
 
-static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num)
+static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num,
+				   void *rsp_org)
 {
 	struct smb2_file_internal_info *file_info;
 
@@ -4044,12 +4050,13 @@
 	file_info->IndexNumber = cpu_to_le64(num | (1ULL << 63));
 	rsp->OutputBufferLength =
 		cpu_to_le32(sizeof(struct smb2_file_internal_info));
-	inc_rfc1001_len(rsp, sizeof(struct smb2_file_internal_info));
+	inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_internal_info));
 }
 
 static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
 				   struct smb2_query_info_req *req,
-				   struct smb2_query_info_rsp *rsp)
+				   struct smb2_query_info_rsp *rsp,
+				   void *rsp_org)
 {
 	u64 id;
 	int rc;
@@ -4067,14 +4074,16 @@
 
 	switch (req->FileInfoClass) {
 	case FILE_STANDARD_INFORMATION:
-		get_standard_info_pipe(rsp);
+		get_standard_info_pipe(rsp, rsp_org);
 		rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-				      rsp, FILE_STANDARD_INFORMATION_SIZE);
+				      rsp, rsp_org,
+				      FILE_STANDARD_INFORMATION_SIZE);
 		break;
 	case FILE_INTERNAL_INFORMATION:
-		get_internal_info_pipe(rsp, id);
+		get_internal_info_pipe(rsp, id, rsp_org);
 		rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-				      rsp, FILE_INTERNAL_INFORMATION_SIZE);
+				      rsp, rsp_org,
+				      FILE_INTERNAL_INFORMATION_SIZE);
 		break;
 	default:
 		ksmbd_debug(SMB, "smb2_info_file_pipe for %u not supported\n",
@@ -4450,6 +4459,12 @@
 			 &stat);
 	file_info = (struct smb2_file_stream_info *)rsp->Buffer;
 
+	buf_free_len =
+		smb2_calc_max_out_buf_len(work, 8,
+					  le32_to_cpu(req->OutputBufferLength));
+	if (buf_free_len < 0)
+		goto out;
+
 	xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
 	if (xattr_list_len < 0) {
 		goto out;
@@ -4458,12 +4473,6 @@
 		goto out;
 	}
 
-	buf_free_len =
-		smb2_calc_max_out_buf_len(work, 8,
-					  le32_to_cpu(req->OutputBufferLength));
-	if (buf_free_len < 0)
-		goto out;
-
 	while (idx < xattr_list_len) {
 		stream_name = xattr_list + idx;
 		streamlen = strlen(stream_name);
@@ -4489,8 +4498,10 @@
 				     ":%s", &stream_name[XATTR_NAME_STREAM_LEN]);
 
 		next = sizeof(struct smb2_file_stream_info) + streamlen * 2;
-		if (next > buf_free_len)
+		if (next > buf_free_len) {
+			kfree(stream_buf);
 			break;
+		}
 
 		file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes];
 		streamlen  = smbConvertToUTF16((__le16 *)file_info->StreamName,
@@ -4507,6 +4518,7 @@
 		file_info->NextEntryOffset = cpu_to_le32(next);
 	}
 
+out:
 	if (!S_ISDIR(stat.mode) &&
 	    buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) {
 		file_info = (struct smb2_file_stream_info *)
@@ -4515,14 +4527,13 @@
 					      "::$DATA", 7, conn->local_nls, 0);
 		streamlen *= 2;
 		file_info->StreamNameLength = cpu_to_le32(streamlen);
-		file_info->StreamSize = 0;
-		file_info->StreamAllocationSize = 0;
+		file_info->StreamSize = cpu_to_le64(stat.size);
+		file_info->StreamAllocationSize = cpu_to_le64(stat.blocks << 9);
 		nbytes += sizeof(struct smb2_file_stream_info) + streamlen;
 	}
 
 	/* last entry offset should be 0 */
 	file_info->NextEntryOffset = 0;
-out:
 	kvfree(xattr_list);
 
 	rsp->OutputBufferLength = cpu_to_le32(nbytes);
@@ -4688,7 +4699,7 @@
 
 static int smb2_get_info_file(struct ksmbd_work *work,
 			      struct smb2_query_info_req *req,
-			      struct smb2_query_info_rsp *rsp, void *rsp_org)
+			      struct smb2_query_info_rsp *rsp)
 {
 	struct ksmbd_file *fp;
 	int fileinfoclass = 0;
@@ -4699,7 +4710,8 @@
 	if (test_share_config_flag(work->tcon->share_conf,
 				   KSMBD_SHARE_FLAG_PIPE)) {
 		/* smb2 info file called for pipe */
-		return smb2_get_info_file_pipe(work->sess, req, rsp);
+		return smb2_get_info_file_pipe(work->sess, req, rsp,
+					       work->response_buf);
 	}
 
 	if (work->next_smb2_rcv_hdr_off) {
@@ -4724,77 +4736,77 @@
 
 	switch (fileinfoclass) {
 	case FILE_ACCESS_INFORMATION:
-		get_file_access_info(rsp, fp, rsp_org);
+		get_file_access_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_ACCESS_INFORMATION_SIZE;
 		break;
 
 	case FILE_BASIC_INFORMATION:
-		rc = get_file_basic_info(rsp, fp, rsp_org);
+		rc = get_file_basic_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_BASIC_INFORMATION_SIZE;
 		break;
 
 	case FILE_STANDARD_INFORMATION:
-		get_file_standard_info(rsp, fp, rsp_org);
+		get_file_standard_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_STANDARD_INFORMATION_SIZE;
 		break;
 
 	case FILE_ALIGNMENT_INFORMATION:
-		get_file_alignment_info(rsp, rsp_org);
+		get_file_alignment_info(rsp, work->response_buf);
 		file_infoclass_size = FILE_ALIGNMENT_INFORMATION_SIZE;
 		break;
 
 	case FILE_ALL_INFORMATION:
-		rc = get_file_all_info(work, rsp, fp, rsp_org);
+		rc = get_file_all_info(work, rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_ALL_INFORMATION_SIZE;
 		break;
 
 	case FILE_ALTERNATE_NAME_INFORMATION:
-		get_file_alternate_info(work, rsp, fp, rsp_org);
+		get_file_alternate_info(work, rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_ALTERNATE_NAME_INFORMATION_SIZE;
 		break;
 
 	case FILE_STREAM_INFORMATION:
-		get_file_stream_info(work, rsp, fp, rsp_org);
+		get_file_stream_info(work, rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_STREAM_INFORMATION_SIZE;
 		break;
 
 	case FILE_INTERNAL_INFORMATION:
-		get_file_internal_info(rsp, fp, rsp_org);
+		get_file_internal_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_INTERNAL_INFORMATION_SIZE;
 		break;
 
 	case FILE_NETWORK_OPEN_INFORMATION:
-		rc = get_file_network_open_info(rsp, fp, rsp_org);
+		rc = get_file_network_open_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_NETWORK_OPEN_INFORMATION_SIZE;
 		break;
 
 	case FILE_EA_INFORMATION:
-		get_file_ea_info(rsp, rsp_org);
+		get_file_ea_info(rsp, work->response_buf);
 		file_infoclass_size = FILE_EA_INFORMATION_SIZE;
 		break;
 
 	case FILE_FULL_EA_INFORMATION:
-		rc = smb2_get_ea(work, fp, req, rsp, rsp_org);
+		rc = smb2_get_ea(work, fp, req, rsp, work->response_buf);
 		file_infoclass_size = FILE_FULL_EA_INFORMATION_SIZE;
 		break;
 
 	case FILE_POSITION_INFORMATION:
-		get_file_position_info(rsp, fp, rsp_org);
+		get_file_position_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_POSITION_INFORMATION_SIZE;
 		break;
 
 	case FILE_MODE_INFORMATION:
-		get_file_mode_info(rsp, fp, rsp_org);
+		get_file_mode_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_MODE_INFORMATION_SIZE;
 		break;
 
 	case FILE_COMPRESSION_INFORMATION:
-		get_file_compression_info(rsp, fp, rsp_org);
+		get_file_compression_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_COMPRESSION_INFORMATION_SIZE;
 		break;
 
 	case FILE_ATTRIBUTE_TAG_INFORMATION:
-		rc = get_file_attribute_tag_info(rsp, fp, rsp_org);
+		rc = get_file_attribute_tag_info(rsp, fp, work->response_buf);
 		file_infoclass_size = FILE_ATTRIBUTE_TAG_INFORMATION_SIZE;
 		break;
 	case SMB_FIND_FILE_POSIX_INFO:
@@ -4802,7 +4814,7 @@
 			pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
 			rc = -EOPNOTSUPP;
 		} else {
-			rc = find_file_posix_info(rsp, fp, rsp_org);
+			rc = find_file_posix_info(rsp, fp, work->response_buf);
 			file_infoclass_size = sizeof(struct smb311_posix_qinfo);
 		}
 		break;
@@ -4813,7 +4825,7 @@
 	}
 	if (!rc)
 		rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-				      rsp,
+				      rsp, work->response_buf,
 				      file_infoclass_size);
 	ksmbd_fd_put(work, fp);
 	return rc;
@@ -4821,7 +4833,7 @@
 
 static int smb2_get_info_filesystem(struct ksmbd_work *work,
 				    struct smb2_query_info_req *req,
-				    struct smb2_query_info_rsp *rsp, void *rsp_org)
+				    struct smb2_query_info_rsp *rsp)
 {
 	struct ksmbd_session *sess = work->sess;
 	struct ksmbd_conn *conn = sess->conn;
@@ -4857,7 +4869,7 @@
 		info->DeviceType = cpu_to_le32(stfs.f_type);
 		info->DeviceCharacteristics = cpu_to_le32(0x00000020);
 		rsp->OutputBufferLength = cpu_to_le32(8);
-		inc_rfc1001_len(rsp_org, 8);
+		inc_rfc1001_len(work->response_buf, 8);
 		fs_infoclass_size = FS_DEVICE_INFORMATION_SIZE;
 		break;
 	}
@@ -4883,7 +4895,7 @@
 		info->FileSystemNameLen = cpu_to_le32(len);
 		sz = sizeof(struct filesystem_attribute_info) - 2 + len;
 		rsp->OutputBufferLength = cpu_to_le32(sz);
-		inc_rfc1001_len(rsp_org, sz);
+		inc_rfc1001_len(work->response_buf, sz);
 		fs_infoclass_size = FS_ATTRIBUTE_INFORMATION_SIZE;
 		break;
 	}
@@ -4891,11 +4903,18 @@
 	{
 		struct filesystem_vol_info *info;
 		size_t sz;
+		unsigned int serial_crc = 0;
 
 		info = (struct filesystem_vol_info *)(rsp->Buffer);
 		info->VolumeCreationTime = 0;
+		serial_crc = crc32_le(serial_crc, share->name,
+				      strlen(share->name));
+		serial_crc = crc32_le(serial_crc, share->path,
+				      strlen(share->path));
+		serial_crc = crc32_le(serial_crc, ksmbd_netbios_name(),
+				      strlen(ksmbd_netbios_name()));
 		/* Taking dummy value of serial number*/
-		info->SerialNumber = cpu_to_le32(0xbc3ac512);
+		info->SerialNumber = cpu_to_le32(serial_crc);
 		len = smbConvertToUTF16((__le16 *)info->VolumeLabel,
 					share->name, PATH_MAX,
 					conn->local_nls, 0);
@@ -4904,7 +4923,7 @@
 		info->Reserved = 0;
 		sz = sizeof(struct filesystem_vol_info) - 2 + len;
 		rsp->OutputBufferLength = cpu_to_le32(sz);
-		inc_rfc1001_len(rsp_org, sz);
+		inc_rfc1001_len(work->response_buf, sz);
 		fs_infoclass_size = FS_VOLUME_INFORMATION_SIZE;
 		break;
 	}
@@ -4918,7 +4937,7 @@
 		info->SectorsPerAllocationUnit = cpu_to_le32(1);
 		info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
 		rsp->OutputBufferLength = cpu_to_le32(24);
-		inc_rfc1001_len(rsp_org, 24);
+		inc_rfc1001_len(work->response_buf, 24);
 		fs_infoclass_size = FS_SIZE_INFORMATION_SIZE;
 		break;
 	}
@@ -4935,7 +4954,7 @@
 		info->SectorsPerAllocationUnit = cpu_to_le32(1);
 		info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
 		rsp->OutputBufferLength = cpu_to_le32(32);
-		inc_rfc1001_len(rsp_org, 32);
+		inc_rfc1001_len(work->response_buf, 32);
 		fs_infoclass_size = FS_FULL_SIZE_INFORMATION_SIZE;
 		break;
 	}
@@ -4956,7 +4975,7 @@
 		info->extended_info.rel_date = 0;
 		memcpy(info->extended_info.version_string, "1.1.0", strlen("1.1.0"));
 		rsp->OutputBufferLength = cpu_to_le32(64);
-		inc_rfc1001_len(rsp_org, 64);
+		inc_rfc1001_len(work->response_buf, 64);
 		fs_infoclass_size = FS_OBJECT_ID_INFORMATION_SIZE;
 		break;
 	}
@@ -4977,7 +4996,7 @@
 		info->ByteOffsetForSectorAlignment = 0;
 		info->ByteOffsetForPartitionAlignment = 0;
 		rsp->OutputBufferLength = cpu_to_le32(28);
-		inc_rfc1001_len(rsp_org, 28);
+		inc_rfc1001_len(work->response_buf, 28);
 		fs_infoclass_size = FS_SECTOR_SIZE_INFORMATION_SIZE;
 		break;
 	}
@@ -4999,7 +5018,7 @@
 		info->DefaultQuotaLimit = cpu_to_le64(SMB2_NO_FID);
 		info->Padding = 0;
 		rsp->OutputBufferLength = cpu_to_le32(48);
-		inc_rfc1001_len(rsp_org, 48);
+		inc_rfc1001_len(work->response_buf, 48);
 		fs_infoclass_size = FS_CONTROL_INFORMATION_SIZE;
 		break;
 	}
@@ -5020,7 +5039,7 @@
 			info->TotalFileNodes = cpu_to_le64(stfs.f_files);
 			info->FreeFileNodes = cpu_to_le64(stfs.f_ffree);
 			rsp->OutputBufferLength = cpu_to_le32(56);
-			inc_rfc1001_len(rsp_org, 56);
+			inc_rfc1001_len(work->response_buf, 56);
 			fs_infoclass_size = FS_POSIX_INFORMATION_SIZE;
 		}
 		break;
@@ -5030,7 +5049,7 @@
 		return -EOPNOTSUPP;
 	}
 	rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-			      rsp,
+			      rsp, work->response_buf,
 			      fs_infoclass_size);
 	path_put(&path);
 	return rc;
@@ -5038,7 +5057,7 @@
 
 static int smb2_get_info_sec(struct ksmbd_work *work,
 			     struct smb2_query_info_req *req,
-			     struct smb2_query_info_rsp *rsp, void *rsp_org)
+			     struct smb2_query_info_rsp *rsp)
 {
 	struct ksmbd_file *fp;
 	struct user_namespace *user_ns;
@@ -5053,7 +5072,7 @@
 	if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
 			      PROTECTED_DACL_SECINFO |
 			      UNPROTECTED_DACL_SECINFO)) {
-		pr_err("Unsupported addition info: 0x%x)\n",
+		ksmbd_debug(SMB, "Unsupported addition info: 0x%x)\n",
 		       addition_info);
 
 		pntsd->revision = cpu_to_le16(1);
@@ -5065,7 +5084,7 @@
 
 		secdesclen = sizeof(struct smb_ntsd);
 		rsp->OutputBufferLength = cpu_to_le32(secdesclen);
-		inc_rfc1001_len(rsp_org, secdesclen);
+		inc_rfc1001_len(work->response_buf, secdesclen);
 
 		return 0;
 	}
@@ -5107,7 +5126,7 @@
 		return rc;
 
 	rsp->OutputBufferLength = cpu_to_le32(secdesclen);
-	inc_rfc1001_len(rsp_org, secdesclen);
+	inc_rfc1001_len(work->response_buf, secdesclen);
 	return 0;
 }
 
@@ -5120,10 +5139,9 @@
 int smb2_query_info(struct ksmbd_work *work)
 {
 	struct smb2_query_info_req *req;
-	struct smb2_query_info_rsp *rsp, *rsp_org;
+	struct smb2_query_info_rsp *rsp;
 	int rc = 0;
 
-	rsp_org = work->response_buf;
 	WORK_BUFFERS(work, req, rsp);
 
 	ksmbd_debug(SMB, "GOT query info request\n");
@@ -5131,15 +5149,15 @@
 	switch (req->InfoType) {
 	case SMB2_O_INFO_FILE:
 		ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
-		rc = smb2_get_info_file(work, req, rsp, (void *)rsp_org);
+		rc = smb2_get_info_file(work, req, rsp);
 		break;
 	case SMB2_O_INFO_FILESYSTEM:
 		ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILESYSTEM\n");
-		rc = smb2_get_info_filesystem(work, req, rsp, (void *)rsp_org);
+		rc = smb2_get_info_filesystem(work, req, rsp);
 		break;
 	case SMB2_O_INFO_SECURITY:
 		ksmbd_debug(SMB, "GOT SMB2_O_INFO_SECURITY\n");
-		rc = smb2_get_info_sec(work, req, rsp, (void *)rsp_org);
+		rc = smb2_get_info_sec(work, req, rsp);
 		break;
 	default:
 		ksmbd_debug(SMB, "InfoType %d not supported yet\n",
@@ -5164,7 +5182,7 @@
 	}
 	rsp->StructureSize = cpu_to_le16(9);
 	rsp->OutputBufferOffset = cpu_to_le16(72);
-	inc_rfc1001_len(rsp_org, 8);
+	inc_rfc1001_len(work->response_buf, 8);
 	return 0;
 }
 
@@ -5177,8 +5195,8 @@
 static noinline int smb2_close_pipe(struct ksmbd_work *work)
 {
 	u64 id;
-	struct smb2_close_req *req = work->request_buf;
-	struct smb2_close_rsp *rsp = work->response_buf;
+	struct smb2_close_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_close_rsp *rsp = smb2_get_msg(work->response_buf);
 
 	id = le64_to_cpu(req->VolatileFileId);
 	ksmbd_session_rpc_close(work->sess, id);
@@ -5193,7 +5211,7 @@
 	rsp->AllocationSize = 0;
 	rsp->EndOfFile = 0;
 	rsp->Attributes = 0;
-	inc_rfc1001_len(rsp, 60);
+	inc_rfc1001_len(work->response_buf, 60);
 	return 0;
 }
 
@@ -5209,14 +5227,12 @@
 	u64 sess_id;
 	struct smb2_close_req *req;
 	struct smb2_close_rsp *rsp;
-	struct smb2_close_rsp *rsp_org;
 	struct ksmbd_conn *conn = work->conn;
 	struct ksmbd_file *fp;
 	struct inode *inode;
 	u64 time;
 	int err = 0;
 
-	rsp_org = work->response_buf;
 	WORK_BUFFERS(work, req, rsp);
 
 	if (test_share_config_flag(work->tcon->share_conf,
@@ -5306,7 +5322,7 @@
 			rsp->hdr.Status = STATUS_FILE_CLOSED;
 		smb2_set_err_rsp(work);
 	} else {
-		inc_rfc1001_len(rsp_org, 60);
+		inc_rfc1001_len(work->response_buf, 60);
 	}
 
 	return 0;
@@ -5320,11 +5336,11 @@
  */
 int smb2_echo(struct ksmbd_work *work)
 {
-	struct smb2_echo_rsp *rsp = work->response_buf;
+	struct smb2_echo_rsp *rsp = smb2_get_msg(work->response_buf);
 
 	rsp->StructureSize = cpu_to_le16(4);
 	rsp->Reserved = 0;
-	inc_rfc1001_len(rsp, 4);
+	inc_rfc1001_len(work->response_buf, 4);
 	return 0;
 }
 
@@ -5566,14 +5582,14 @@
 
 	if (file_info->Attributes) {
 		if (!S_ISDIR(inode->i_mode) &&
-		    file_info->Attributes & ATTR_DIRECTORY_LE) {
+		    file_info->Attributes & FILE_ATTRIBUTE_DIRECTORY_LE) {
 			pr_err("can't change a file to a directory\n");
 			return -EINVAL;
 		}
 
-		if (!(S_ISDIR(inode->i_mode) && file_info->Attributes == ATTR_NORMAL_LE))
+		if (!(S_ISDIR(inode->i_mode) && file_info->Attributes == FILE_ATTRIBUTE_NORMAL_LE))
 			fp->f_ci->m_fattr = file_info->Attributes |
-				(fp->f_ci->m_fattr & ATTR_DIRECTORY_LE);
+				(fp->f_ci->m_fattr & FILE_ATTRIBUTE_DIRECTORY_LE);
 	}
 
 	if (test_share_config_flag(share, KSMBD_SHARE_FLAG_STORE_DOS_ATTRS) &&
@@ -5794,9 +5810,7 @@
 
 	mode = file_info->Mode;
 
-	if ((mode & ~FILE_MODE_INFO_MASK) ||
-	    (mode & FILE_SYNCHRONOUS_IO_ALERT_LE &&
-	     mode & FILE_SYNCHRONOUS_IO_NONALERT_LE)) {
+	if ((mode & ~FILE_MODE_INFO_MASK)) {
 		pr_err("Mode is not valid : 0x%x\n", le32_to_cpu(mode));
 		return -EINVAL;
 	}
@@ -5943,14 +5957,13 @@
 int smb2_set_info(struct ksmbd_work *work)
 {
 	struct smb2_set_info_req *req;
-	struct smb2_set_info_rsp *rsp, *rsp_org;
+	struct smb2_set_info_rsp *rsp;
 	struct ksmbd_file *fp;
 	int rc = 0;
 	unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
 
 	ksmbd_debug(SMB, "Received set info request\n");
 
-	rsp_org = work->response_buf;
 	if (work->next_smb2_rcv_hdr_off) {
 		req = ksmbd_req_buf_next(work);
 		rsp = ksmbd_resp_buf_next(work);
@@ -5961,8 +5974,8 @@
 			pid = work->compound_pfid;
 		}
 	} else {
-		req = work->request_buf;
-		rsp = work->response_buf;
+		req = smb2_get_msg(work->request_buf);
+		rsp = smb2_get_msg(work->response_buf);
 	}
 
 	if (!has_file_id(id)) {
@@ -6002,7 +6015,7 @@
 		goto err_out;
 
 	rsp->StructureSize = cpu_to_le16(2);
-	inc_rfc1001_len(rsp_org, 2);
+	inc_rfc1001_len(work->response_buf, 2);
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -6042,12 +6055,12 @@
 	int nbytes = 0, err;
 	u64 id;
 	struct ksmbd_rpc_command *rpc_resp;
-	struct smb2_read_req *req = work->request_buf;
-	struct smb2_read_rsp *rsp = work->response_buf;
+	struct smb2_read_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_read_rsp *rsp = smb2_get_msg(work->response_buf);
 
 	id = le64_to_cpu(req->VolatileFileId);
 
-	inc_rfc1001_len(rsp, 16);
+	inc_rfc1001_len(work->response_buf, 16);
 	rpc_resp = ksmbd_rpc_read(work->sess, id);
 	if (rpc_resp) {
 		if (rpc_resp->flags != KSMBD_RPC_OK) {
@@ -6066,7 +6079,7 @@
 		       rpc_resp->payload_sz);
 
 		nbytes = rpc_resp->payload_sz;
-		work->resp_hdr_sz = get_rfc1002_len(rsp) + 4;
+		work->resp_hdr_sz = get_rfc1002_len(work->response_buf) + 4;
 		work->aux_payload_sz = nbytes;
 		kvfree(rpc_resp);
 	}
@@ -6076,8 +6089,8 @@
 	rsp->Reserved = 0;
 	rsp->DataLength = cpu_to_le32(nbytes);
 	rsp->DataRemaining = 0;
-	rsp->Reserved2 = 0;
-	inc_rfc1001_len(rsp, nbytes);
+	rsp->Flags = 0;
+	inc_rfc1001_len(work->response_buf, nbytes);
 	return 0;
 
 out:
@@ -6127,14 +6140,13 @@
 {
 	struct ksmbd_conn *conn = work->conn;
 	struct smb2_read_req *req;
-	struct smb2_read_rsp *rsp, *rsp_org;
+	struct smb2_read_rsp *rsp;
 	struct ksmbd_file *fp;
 	loff_t offset;
 	size_t length, mincount;
 	ssize_t nbytes = 0, remain_bytes = 0;
 	int err = 0;
 
-	rsp_org = work->response_buf;
 	WORK_BUFFERS(work, req, rsp);
 
 	if (test_share_config_flag(work->tcon->share_conf,
@@ -6215,11 +6227,11 @@
 	rsp->Reserved = 0;
 	rsp->DataLength = cpu_to_le32(nbytes);
 	rsp->DataRemaining = cpu_to_le32(remain_bytes);
-	rsp->Reserved2 = 0;
-	inc_rfc1001_len(rsp_org, 16);
-	work->resp_hdr_sz = get_rfc1002_len(rsp_org) + 4;
+	rsp->Flags = 0;
+	inc_rfc1001_len(work->response_buf, 16);
+	work->resp_hdr_sz = get_rfc1002_len(work->response_buf) + 4;
 	work->aux_payload_sz = nbytes;
-	inc_rfc1001_len(rsp_org, nbytes);
+	inc_rfc1001_len(work->response_buf, nbytes);
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -6254,8 +6266,8 @@
  */
 static noinline int smb2_write_pipe(struct ksmbd_work *work)
 {
-	struct smb2_write_req *req = work->request_buf;
-	struct smb2_write_rsp *rsp = work->response_buf;
+	struct smb2_write_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_write_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_rpc_command *rpc_resp;
 	u64 id = 0;
 	int err = 0, ret = 0;
@@ -6266,13 +6278,14 @@
 	id = le64_to_cpu(req->VolatileFileId);
 
 	if (le16_to_cpu(req->DataOffset) ==
-	    (offsetof(struct smb2_write_req, Buffer) - 4)) {
+	    offsetof(struct smb2_write_req, Buffer)) {
 		data_buf = (char *)&req->Buffer[0];
 	} else {
-		if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
+		if ((u64)le16_to_cpu(req->DataOffset) + length >
+		    get_rfc1002_len(work->request_buf)) {
 			pr_err("invalid write data offset %u, smb_len %u\n",
 			       le16_to_cpu(req->DataOffset),
-			       get_rfc1002_len(req));
+			       get_rfc1002_len(work->request_buf));
 			err = -EINVAL;
 			goto out;
 		}
@@ -6304,7 +6317,7 @@
 	rsp->DataLength = cpu_to_le32(length);
 	rsp->DataRemaining = 0;
 	rsp->Reserved2 = 0;
-	inc_rfc1001_len(rsp, 16);
+	inc_rfc1001_len(work->response_buf, 16);
 	return 0;
 out:
 	if (err) {
@@ -6372,7 +6385,7 @@
 int smb2_write(struct ksmbd_work *work)
 {
 	struct smb2_write_req *req;
-	struct smb2_write_rsp *rsp, *rsp_org;
+	struct smb2_write_rsp *rsp;
 	struct ksmbd_file *fp = NULL;
 	loff_t offset;
 	size_t length;
@@ -6381,7 +6394,6 @@
 	bool writethrough = false;
 	int err = 0;
 
-	rsp_org = work->response_buf;
 	WORK_BUFFERS(work, req, rsp);
 
 	if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) {
@@ -6424,13 +6436,14 @@
 	if (req->Channel != SMB2_CHANNEL_RDMA_V1 &&
 	    req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
 		if (le16_to_cpu(req->DataOffset) ==
-		    (offsetof(struct smb2_write_req, Buffer) - 4)) {
+		    offsetof(struct smb2_write_req, Buffer)) {
 			data_buf = (char *)&req->Buffer[0];
 		} else {
-			if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
+			if ((u64)le16_to_cpu(req->DataOffset) + length >
+			    get_rfc1002_len(work->request_buf)) {
 				pr_err("invalid write data offset %u, smb_len %u\n",
 				       le16_to_cpu(req->DataOffset),
-				       get_rfc1002_len(req));
+				       get_rfc1002_len(work->request_buf));
 				err = -EINVAL;
 				goto out;
 			}
@@ -6468,7 +6481,7 @@
 	rsp->DataLength = cpu_to_le32(nbytes);
 	rsp->DataRemaining = 0;
 	rsp->Reserved2 = 0;
-	inc_rfc1001_len(rsp_org, 16);
+	inc_rfc1001_len(work->response_buf, 16);
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -6502,10 +6515,9 @@
 int smb2_flush(struct ksmbd_work *work)
 {
 	struct smb2_flush_req *req;
-	struct smb2_flush_rsp *rsp, *rsp_org;
+	struct smb2_flush_rsp *rsp;
 	int err;
 
-	rsp_org = work->response_buf;
 	WORK_BUFFERS(work, req, rsp);
 
 	ksmbd_debug(SMB, "SMB2_FLUSH called for fid %llu\n",
@@ -6519,7 +6531,7 @@
 
 	rsp->StructureSize = cpu_to_le16(4);
 	rsp->Reserved = 0;
-	inc_rfc1001_len(rsp_org, 4);
+	inc_rfc1001_len(work->response_buf, 4);
 	return 0;
 
 out:
@@ -6540,7 +6552,7 @@
 int smb2_cancel(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_hdr *hdr = work->request_buf;
+	struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
 	struct smb2_hdr *chdr;
 	struct ksmbd_work *cancel_work = NULL;
 	int canceled = 0;
@@ -6555,7 +6567,7 @@
 		spin_lock(&conn->request_lock);
 		list_for_each_entry(cancel_work, command_list,
 				    async_request_entry) {
-			chdr = cancel_work->request_buf;
+			chdr = smb2_get_msg(cancel_work->request_buf);
 
 			if (cancel_work->async_id !=
 			    le64_to_cpu(hdr->Id.AsyncId))
@@ -6574,7 +6586,7 @@
 
 		spin_lock(&conn->request_lock);
 		list_for_each_entry(cancel_work, command_list, request_entry) {
-			chdr = cancel_work->request_buf;
+			chdr = smb2_get_msg(cancel_work->request_buf);
 
 			if (chdr->MessageId != hdr->MessageId ||
 			    cancel_work == work)
@@ -6709,8 +6721,8 @@
  */
 int smb2_lock(struct ksmbd_work *work)
 {
-	struct smb2_lock_req *req = work->request_buf;
-	struct smb2_lock_rsp *rsp = work->response_buf;
+	struct smb2_lock_req *req = smb2_get_msg(work->request_buf);
+	struct smb2_lock_rsp *rsp = smb2_get_msg(work->response_buf);
 	struct smb2_lock_element *lock_ele;
 	struct ksmbd_file *fp = NULL;
 	struct file_lock *flock = NULL;
@@ -7017,7 +7029,7 @@
 	ksmbd_debug(SMB, "successful in taking lock\n");
 	rsp->hdr.Status = STATUS_SUCCESS;
 	rsp->Reserved = 0;
-	inc_rfc1001_len(rsp, 4);
+	inc_rfc1001_len(work->response_buf, 4);
 	ksmbd_fd_put(work, fp);
 	return 0;
 
@@ -7312,7 +7324,7 @@
 	int ret = 0;
 	int dialect;
 
-	if (in_buf_len < sizeof(struct validate_negotiate_info_req) +
+	if (in_buf_len < offsetof(struct validate_negotiate_info_req, Dialects) +
 			le16_to_cpu(neg_req->DialectCount) * sizeof(__le16))
 		return -EINVAL;
 
@@ -7435,9 +7447,9 @@
 
 	old_fattr = fp->f_ci->m_fattr;
 	if (sparse->SetSparse)
-		fp->f_ci->m_fattr |= ATTR_SPARSE_FILE_LE;
+		fp->f_ci->m_fattr |= FILE_ATTRIBUTE_SPARSE_FILE_LE;
 	else
-		fp->f_ci->m_fattr &= ~ATTR_SPARSE_FILE_LE;
+		fp->f_ci->m_fattr &= ~FILE_ATTRIBUTE_SPARSE_FILE_LE;
 
 	if (fp->f_ci->m_fattr != old_fattr &&
 	    test_share_config_flag(work->tcon->share_conf,
@@ -7490,13 +7502,12 @@
 int smb2_ioctl(struct ksmbd_work *work)
 {
 	struct smb2_ioctl_req *req;
-	struct smb2_ioctl_rsp *rsp, *rsp_org;
+	struct smb2_ioctl_rsp *rsp;
 	unsigned int cnt_code, nbytes = 0, out_buf_len, in_buf_len;
 	u64 id = KSMBD_NO_FID;
 	struct ksmbd_conn *conn = work->conn;
 	int ret = 0;
 
-	rsp_org = work->response_buf;
 	if (work->next_smb2_rcv_hdr_off) {
 		req = ksmbd_req_buf_next(work);
 		rsp = ksmbd_resp_buf_next(work);
@@ -7506,8 +7517,8 @@
 			id = work->compound_fid;
 		}
 	} else {
-		req = work->request_buf;
-		rsp = work->response_buf;
+		req = smb2_get_msg(work->request_buf);
+		rsp = smb2_get_msg(work->response_buf);
 	}
 
 	if (!has_file_id(id))
@@ -7787,7 +7798,7 @@
 	rsp->Reserved = cpu_to_le16(0);
 	rsp->Flags = cpu_to_le32(0);
 	rsp->Reserved2 = cpu_to_le32(0);
-	inc_rfc1001_len(rsp_org, 48 + nbytes);
+	inc_rfc1001_len(work->response_buf, 48 + nbytes);
 
 	return 0;
 
@@ -7814,8 +7825,8 @@
  */
 static void smb20_oplock_break_ack(struct ksmbd_work *work)
 {
-	struct smb2_oplock_break *req = work->request_buf;
-	struct smb2_oplock_break *rsp = work->response_buf;
+	struct smb2_oplock_break *req = smb2_get_msg(work->request_buf);
+	struct smb2_oplock_break *rsp = smb2_get_msg(work->response_buf);
 	struct ksmbd_file *fp;
 	struct oplock_info *opinfo = NULL;
 	__le32 err = 0;
@@ -7922,7 +7933,7 @@
 	rsp->Reserved2 = 0;
 	rsp->VolatileFid = cpu_to_le64(volatile_id);
 	rsp->PersistentFid = cpu_to_le64(persistent_id);
-	inc_rfc1001_len(rsp, 24);
+	inc_rfc1001_len(work->response_buf, 24);
 	return;
 
 err_out:
@@ -7958,8 +7969,8 @@
 static void smb21_lease_break_ack(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_lease_ack *req = work->request_buf;
-	struct smb2_lease_ack *rsp = work->response_buf;
+	struct smb2_lease_ack *req = smb2_get_msg(work->request_buf);
+	struct smb2_lease_ack *rsp = smb2_get_msg(work->response_buf);
 	struct oplock_info *opinfo;
 	__le32 err = 0;
 	int ret = 0;
@@ -8071,7 +8082,7 @@
 	memcpy(rsp->LeaseKey, req->LeaseKey, 16);
 	rsp->LeaseState = lease_state;
 	rsp->LeaseDuration = 0;
-	inc_rfc1001_len(rsp, 36);
+	inc_rfc1001_len(work->response_buf, 36);
 	return;
 
 err_out:
@@ -8092,8 +8103,8 @@
  */
 int smb2_oplock_break(struct ksmbd_work *work)
 {
-	struct smb2_oplock_break *req = work->request_buf;
-	struct smb2_oplock_break *rsp = work->response_buf;
+	struct smb2_oplock_break *req = smb2_get_msg(work->request_buf);
+	struct smb2_oplock_break *rsp = smb2_get_msg(work->response_buf);
 
 	switch (le16_to_cpu(req->StructureSize)) {
 	case OP_BREAK_STRUCT_SIZE_20:
@@ -8120,8 +8131,8 @@
  */
 int smb2_notify(struct ksmbd_work *work)
 {
-	struct smb2_notify_req *req;
-	struct smb2_notify_rsp *rsp;
+	struct smb2_change_notify_req *req;
+	struct smb2_change_notify_rsp *rsp;
 
 	WORK_BUFFERS(work, req, rsp);
 
@@ -8145,7 +8156,7 @@
  */
 bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command)
 {
-	struct smb2_hdr *rcv_hdr2 = work->request_buf;
+	struct smb2_hdr *rcv_hdr2 = smb2_get_msg(work->request_buf);
 
 	if ((rcv_hdr2->Flags & SMB2_FLAGS_SIGNED) &&
 	    command != SMB2_NEGOTIATE_HE &&
@@ -8164,22 +8175,22 @@
  */
 int smb2_check_sign_req(struct ksmbd_work *work)
 {
-	struct smb2_hdr *hdr, *hdr_org;
+	struct smb2_hdr *hdr;
 	char signature_req[SMB2_SIGNATURE_SIZE];
 	char signature[SMB2_HMACSHA256_SIZE];
 	struct kvec iov[1];
 	size_t len;
 
-	hdr_org = hdr = work->request_buf;
+	hdr = smb2_get_msg(work->request_buf);
 	if (work->next_smb2_rcv_hdr_off)
 		hdr = ksmbd_req_buf_next(work);
 
 	if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
-		len = be32_to_cpu(hdr_org->smb2_buf_length);
+		len = get_rfc1002_len(work->request_buf);
 	else if (hdr->NextCommand)
 		len = le32_to_cpu(hdr->NextCommand);
 	else
-		len = be32_to_cpu(hdr_org->smb2_buf_length) -
+		len = get_rfc1002_len(work->request_buf) -
 			work->next_smb2_rcv_hdr_off;
 
 	memcpy(signature_req, hdr->Signature, SMB2_SIGNATURE_SIZE);
@@ -8207,25 +8218,26 @@
  */
 void smb2_set_sign_rsp(struct ksmbd_work *work)
 {
-	struct smb2_hdr *hdr, *hdr_org;
+	struct smb2_hdr *hdr;
 	struct smb2_hdr *req_hdr;
 	char signature[SMB2_HMACSHA256_SIZE];
 	struct kvec iov[2];
 	size_t len;
 	int n_vec = 1;
 
-	hdr_org = hdr = work->response_buf;
+	hdr = smb2_get_msg(work->response_buf);
 	if (work->next_smb2_rsp_hdr_off)
 		hdr = ksmbd_resp_buf_next(work);
 
 	req_hdr = ksmbd_req_buf_next(work);
 
 	if (!work->next_smb2_rsp_hdr_off) {
-		len = get_rfc1002_len(hdr_org);
+		len = get_rfc1002_len(work->response_buf);
 		if (req_hdr->NextCommand)
 			len = ALIGN(len, 8);
 	} else {
-		len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+		len = get_rfc1002_len(work->response_buf) -
+			work->next_smb2_rsp_hdr_off;
 		len = ALIGN(len, 8);
 	}
 
@@ -8261,23 +8273,23 @@
 {
 	struct ksmbd_conn *conn = work->conn;
 	char *signing_key;
-	struct smb2_hdr *hdr, *hdr_org;
+	struct smb2_hdr *hdr;
 	struct channel *chann;
 	char signature_req[SMB2_SIGNATURE_SIZE];
 	char signature[SMB2_CMACAES_SIZE];
 	struct kvec iov[1];
 	size_t len;
 
-	hdr_org = hdr = work->request_buf;
+	hdr = smb2_get_msg(work->request_buf);
 	if (work->next_smb2_rcv_hdr_off)
 		hdr = ksmbd_req_buf_next(work);
 
 	if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
-		len = be32_to_cpu(hdr_org->smb2_buf_length);
+		len = get_rfc1002_len(work->request_buf);
 	else if (hdr->NextCommand)
 		len = le32_to_cpu(hdr->NextCommand);
 	else
-		len = be32_to_cpu(hdr_org->smb2_buf_length) -
+		len = get_rfc1002_len(work->request_buf) -
 			work->next_smb2_rcv_hdr_off;
 
 	if (le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
@@ -8318,8 +8330,7 @@
 void smb3_set_sign_rsp(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_hdr *req_hdr;
-	struct smb2_hdr *hdr, *hdr_org;
+	struct smb2_hdr *req_hdr, *hdr;
 	struct channel *chann;
 	char signature[SMB2_CMACAES_SIZE];
 	struct kvec iov[2];
@@ -8327,18 +8338,19 @@
 	size_t len;
 	char *signing_key;
 
-	hdr_org = hdr = work->response_buf;
+	hdr = smb2_get_msg(work->response_buf);
 	if (work->next_smb2_rsp_hdr_off)
 		hdr = ksmbd_resp_buf_next(work);
 
 	req_hdr = ksmbd_req_buf_next(work);
 
 	if (!work->next_smb2_rsp_hdr_off) {
-		len = get_rfc1002_len(hdr_org);
+		len = get_rfc1002_len(work->response_buf);
 		if (req_hdr->NextCommand)
 			len = ALIGN(len, 8);
 	} else {
-		len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+		len = get_rfc1002_len(work->response_buf) -
+			work->next_smb2_rsp_hdr_off;
 		len = ALIGN(len, 8);
 	}
 
@@ -8391,7 +8403,7 @@
 
 	if (le16_to_cpu(req->Command) == SMB2_NEGOTIATE_HE &&
 	    conn->preauth_info)
-		ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+		ksmbd_gen_preauth_integrity_hash(conn, work->response_buf,
 						 conn->preauth_info->Preauth_HashValue);
 
 	if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && sess) {
@@ -8409,35 +8421,34 @@
 			if (!hash_value)
 				return;
 		}
-		ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+		ksmbd_gen_preauth_integrity_hash(conn, work->response_buf,
 						 hash_value);
 	}
 }
 
-static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, char *old_buf,
-			       __le16 cipher_type)
+static void fill_transform_hdr(void *tr_buf, char *old_buf, __le16 cipher_type)
 {
-	struct smb2_hdr *hdr = (struct smb2_hdr *)old_buf;
+	struct smb2_transform_hdr *tr_hdr = tr_buf + 4;
+	struct smb2_hdr *hdr = smb2_get_msg(old_buf);
 	unsigned int orig_len = get_rfc1002_len(old_buf);
 
-	memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
+	memset(tr_buf, 0, sizeof(struct smb2_transform_hdr) + 4);
 	tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
 	tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len);
-	tr_hdr->Flags = cpu_to_le16(0x01);
+	tr_hdr->Flags = cpu_to_le16(TRANSFORM_FLAG_ENCRYPTED);
 	if (cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
 	    cipher_type == SMB2_ENCRYPTION_AES256_GCM)
 		get_random_bytes(&tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
 	else
 		get_random_bytes(&tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
 	memcpy(&tr_hdr->SessionId, &hdr->SessionId, 8);
-	inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - 4);
-	inc_rfc1001_len(tr_hdr, orig_len);
+	inc_rfc1001_len(tr_buf, sizeof(struct smb2_transform_hdr));
+	inc_rfc1001_len(tr_buf, orig_len);
 }
 
 int smb3_encrypt_resp(struct ksmbd_work *work)
 {
 	char *buf = work->response_buf;
-	struct smb2_transform_hdr *tr_hdr;
 	struct kvec iov[3];
 	int rc = -ENOMEM;
 	int buf_size = 0, rq_nvec = 2 + (work->aux_payload_sz ? 1 : 0);
@@ -8445,15 +8456,15 @@
 	if (ARRAY_SIZE(iov) < rq_nvec)
 		return -ENOMEM;
 
-	tr_hdr = kzalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
-	if (!tr_hdr)
+	work->tr_buf = kzalloc(sizeof(struct smb2_transform_hdr) + 4, GFP_KERNEL);
+	if (!work->tr_buf)
 		return rc;
 
 	/* fill transform header */
-	fill_transform_hdr(tr_hdr, buf, work->conn->cipher_type);
+	fill_transform_hdr(work->tr_buf, buf, work->conn->cipher_type);
 
-	iov[0].iov_base = tr_hdr;
-	iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+	iov[0].iov_base = work->tr_buf;
+	iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
 	buf_size += iov[0].iov_len - 4;
 
 	iov[1].iov_base = buf + 4;
@@ -8473,15 +8484,14 @@
 		return rc;
 
 	memmove(buf, iov[1].iov_base, iov[1].iov_len);
-	tr_hdr->smb2_buf_length = cpu_to_be32(buf_size);
-	work->tr_buf = tr_hdr;
+	*(__be32 *)work->tr_buf = cpu_to_be32(buf_size);
 
 	return rc;
 }
 
 bool smb3_is_transform_hdr(void *buf)
 {
-	struct smb2_transform_hdr *trhdr = buf;
+	struct smb2_transform_hdr *trhdr = smb2_get_msg(buf);
 
 	return trhdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM;
 }
@@ -8491,12 +8501,10 @@
 	struct ksmbd_conn *conn = work->conn;
 	struct ksmbd_session *sess;
 	char *buf = work->request_buf;
-	struct smb2_hdr *hdr;
 	unsigned int pdu_length = get_rfc1002_len(buf);
 	struct kvec iov[2];
-	int buf_data_size = pdu_length + 4 -
-		sizeof(struct smb2_transform_hdr);
-	struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+	int buf_data_size = pdu_length - sizeof(struct smb2_transform_hdr);
+	struct smb2_transform_hdr *tr_hdr = smb2_get_msg(buf);
 	int rc = 0;
 
 	if (buf_data_size < sizeof(struct smb2_hdr)) {
@@ -8518,16 +8526,15 @@
 	}
 
 	iov[0].iov_base = buf;
-	iov[0].iov_len = sizeof(struct smb2_transform_hdr);
-	iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
+	iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
+	iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr) + 4;
 	iov[1].iov_len = buf_data_size;
 	rc = ksmbd_crypt_message(conn, iov, 2, 0);
 	if (rc)
 		return rc;
 
 	memmove(buf + 4, iov[1].iov_base, buf_data_size);
-	hdr = (struct smb2_hdr *)buf;
-	hdr->smb2_buf_length = cpu_to_be32(buf_data_size);
+	*(__be32 *)buf = cpu_to_be32(buf_data_size);
 
 	return rc;
 }
@@ -8535,7 +8542,7 @@
 bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
 {
 	struct ksmbd_conn *conn = work->conn;
-	struct smb2_hdr *rsp = work->response_buf;
+	struct smb2_hdr *rsp = smb2_get_msg(work->response_buf);
 
 	if (conn->dialect < SMB30_PROT_ID)
 		return false;
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index ff5a2f0..4a3e433 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -10,60 +10,6 @@
 #include "ntlmssp.h"
 #include "smbacl.h"
 
-/*
- * Note that, due to trying to use names similar to the protocol specifications,
- * there are many mixed case field names in the structures below.  Although
- * this does not match typical Linux kernel style, it is necessary to be
- * able to match against the protocol specfication.
- *
- * SMB2 commands
- * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
- * (ie no useful data other than the SMB error code itself) and are marked such.
- * Knowing this helps avoid response buffer allocations and copy in some cases.
- */
-
-/* List of commands in host endian */
-#define SMB2_NEGOTIATE_HE	0x0000
-#define SMB2_SESSION_SETUP_HE	0x0001
-#define SMB2_LOGOFF_HE		0x0002 /* trivial request/resp */
-#define SMB2_TREE_CONNECT_HE	0x0003
-#define SMB2_TREE_DISCONNECT_HE	0x0004 /* trivial req/resp */
-#define SMB2_CREATE_HE		0x0005
-#define SMB2_CLOSE_HE		0x0006
-#define SMB2_FLUSH_HE		0x0007 /* trivial resp */
-#define SMB2_READ_HE		0x0008
-#define SMB2_WRITE_HE		0x0009
-#define SMB2_LOCK_HE		0x000A
-#define SMB2_IOCTL_HE		0x000B
-#define SMB2_CANCEL_HE		0x000C
-#define SMB2_ECHO_HE		0x000D
-#define SMB2_QUERY_DIRECTORY_HE	0x000E
-#define SMB2_CHANGE_NOTIFY_HE	0x000F
-#define SMB2_QUERY_INFO_HE	0x0010
-#define SMB2_SET_INFO_HE	0x0011
-#define SMB2_OPLOCK_BREAK_HE	0x0012
-
-/* The same list in little endian */
-#define SMB2_NEGOTIATE		cpu_to_le16(SMB2_NEGOTIATE_HE)
-#define SMB2_SESSION_SETUP	cpu_to_le16(SMB2_SESSION_SETUP_HE)
-#define SMB2_LOGOFF		cpu_to_le16(SMB2_LOGOFF_HE)
-#define SMB2_TREE_CONNECT	cpu_to_le16(SMB2_TREE_CONNECT_HE)
-#define SMB2_TREE_DISCONNECT	cpu_to_le16(SMB2_TREE_DISCONNECT_HE)
-#define SMB2_CREATE		cpu_to_le16(SMB2_CREATE_HE)
-#define SMB2_CLOSE		cpu_to_le16(SMB2_CLOSE_HE)
-#define SMB2_FLUSH		cpu_to_le16(SMB2_FLUSH_HE)
-#define SMB2_READ		cpu_to_le16(SMB2_READ_HE)
-#define SMB2_WRITE		cpu_to_le16(SMB2_WRITE_HE)
-#define SMB2_LOCK		cpu_to_le16(SMB2_LOCK_HE)
-#define SMB2_IOCTL		cpu_to_le16(SMB2_IOCTL_HE)
-#define SMB2_CANCEL		cpu_to_le16(SMB2_CANCEL_HE)
-#define SMB2_ECHO		cpu_to_le16(SMB2_ECHO_HE)
-#define SMB2_QUERY_DIRECTORY	cpu_to_le16(SMB2_QUERY_DIRECTORY_HE)
-#define SMB2_CHANGE_NOTIFY	cpu_to_le16(SMB2_CHANGE_NOTIFY_HE)
-#define SMB2_QUERY_INFO		cpu_to_le16(SMB2_QUERY_INFO_HE)
-#define SMB2_SET_INFO		cpu_to_le16(SMB2_SET_INFO_HE)
-#define SMB2_OPLOCK_BREAK	cpu_to_le16(SMB2_OPLOCK_BREAK_HE)
-
 /*Create Action Flags*/
 #define FILE_SUPERSEDED                0x00000000
 #define FILE_OPENED            0x00000001
@@ -96,9 +42,6 @@
 /* SMB2 Max Credits */
 #define SMB2_MAX_CREDITS		8192
 
-#define SMB2_CLIENT_GUID_SIZE		16
-#define SMB2_CREATE_GUID_SIZE		16
-
 /* Maximum buffer size value we can send with 1 credit */
 #define SMB2_MAX_BUFFER_SIZE 65536
 
@@ -107,9 +50,6 @@
 /* BB FIXME - analyze following length BB */
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
-#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) /* 'B''M''S' */
-#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
-
 #define SMB21_DEFAULT_IOSIZE	(1024 * 1024)
 #define SMB3_DEFAULT_IOSIZE	(4 * 1024 * 1024)
 #define SMB3_DEFAULT_TRANS_SIZE	(1024 * 1024)
@@ -117,78 +57,6 @@
 #define SMB3_MAX_IOSIZE	(8 * 1024 * 1024)
 
 /*
- * SMB2 Header Definition
- *
- * "MBZ" :  Must be Zero
- * "BB"  :  BugBug, Something to check/review/analyze later
- * "PDU" :  "Protocol Data Unit" (ie a network "frame")
- *
- */
-
-#define __SMB2_HEADER_STRUCTURE_SIZE	64
-#define SMB2_HEADER_STRUCTURE_SIZE				\
-	cpu_to_le16(__SMB2_HEADER_STRUCTURE_SIZE)
-
-struct smb2_hdr {
-	__be32 smb2_buf_length;	/* big endian on wire */
-				/*
-				 * length is only two or three bytes - with
-				 * one or two byte type preceding it that MBZ
-				 */
-	__le32 ProtocolId;	/* 0xFE 'S' 'M' 'B' */
-	__le16 StructureSize;	/* 64 */
-	__le16 CreditCharge;	/* MBZ */
-	__le32 Status;		/* Error from server */
-	__le16 Command;
-	__le16 CreditRequest;	/* CreditResponse */
-	__le32 Flags;
-	__le32 NextCommand;
-	__le64 MessageId;
-	union {
-		struct {
-			__le32 ProcessId;
-			__le32  TreeId;
-		} __packed SyncId;
-		__le64  AsyncId;
-	} __packed Id;
-	__le64  SessionId;
-	__u8   Signature[16];
-} __packed;
-
-struct smb2_pdu {
-	struct smb2_hdr hdr;
-	__le16 StructureSize2; /* size of wct area (varies, request specific) */
-} __packed;
-
-#define SMB3_AES_CCM_NONCE 11
-#define SMB3_AES_GCM_NONCE 12
-
-struct smb2_transform_hdr {
-	__be32 smb2_buf_length; /* big endian on wire */
-	/*
-	 * length is only two or three bytes - with
-	 * one or two byte type preceding it that MBZ
-	 */
-	__le32 ProtocolId;      /* 0xFD 'S' 'M' 'B' */
-	__u8   Signature[16];
-	__u8   Nonce[16];
-	__le32 OriginalMessageSize;
-	__u16  Reserved1;
-	__le16 Flags; /* EncryptionAlgorithm */
-	__le64  SessionId;
-} __packed;
-
-/*
- *	SMB2 flag definitions
- */
-#define SMB2_FLAGS_SERVER_TO_REDIR	cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND	cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS	cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED		cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS	cpu_to_le32(0x10000000)
-#define SMB2_FLAGS_REPLAY_OPERATIONS	cpu_to_le32(0x20000000)
-
-/*
  *	Definitions for SMB2 Protocol Data Units (network frames)
  *
  *  See MS-SMB2.PDF specification for protocol details.
@@ -209,425 +77,30 @@
 	__u8   ErrorData[1];  /* variable length */
 } __packed;
 
-struct smb2_negotiate_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 36 */
-	__le16 DialectCount;
-	__le16 SecurityMode;
-	__le16 Reserved;	/* MBZ */
-	__le32 Capabilities;
-	__u8   ClientGUID[SMB2_CLIENT_GUID_SIZE];
-	/* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */
-	__le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
-	__le16 NegotiateContextCount;  /* SMB3.1.1 only. MBZ earlier */
-	__le16 Reserved2;
-	__le16 Dialects[1]; /* One dialect (vers=) at a time for now */
-} __packed;
-
-/* SecurityMode flags */
-#define SMB2_NEGOTIATE_SIGNING_ENABLED_LE	cpu_to_le16(0x0001)
-#define SMB2_NEGOTIATE_SIGNING_REQUIRED		0x0002
-#define SMB2_NEGOTIATE_SIGNING_REQUIRED_LE	cpu_to_le16(0x0002)
-/* Capabilities flags */
-#define SMB2_GLOBAL_CAP_DFS		0x00000001
-#define SMB2_GLOBAL_CAP_LEASING		0x00000002 /* Resp only New to SMB2.1 */
-#define SMB2_GLOBAL_CAP_LARGE_MTU	0X00000004 /* Resp only New to SMB2.1 */
-#define SMB2_GLOBAL_CAP_MULTI_CHANNEL	0x00000008 /* New to SMB3 */
-#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
-#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING  0x00000020 /* New to SMB3 */
-#define SMB2_GLOBAL_CAP_ENCRYPTION	0x00000040 /* New to SMB3 */
-/* Internal types */
-#define SMB2_NT_FIND			0x00100000
-#define SMB2_LARGE_FILES		0x00200000
-
-#define SMB311_SALT_SIZE			32
-/* Hash Algorithm Types */
-#define SMB2_PREAUTH_INTEGRITY_SHA512	cpu_to_le16(0x0001)
-
-#define PREAUTH_HASHVALUE_SIZE		64
-
 struct preauth_integrity_info {
 	/* PreAuth integrity Hash ID */
 	__le16			Preauth_HashId;
 	/* PreAuth integrity Hash Value */
-	__u8			Preauth_HashValue[PREAUTH_HASHVALUE_SIZE];
+	__u8			Preauth_HashValue[SMB2_PREAUTH_HASH_SIZE];
 };
 
-/* offset is sizeof smb2_negotiate_rsp - 4 but rounded up to 8 bytes. */
+/* offset is sizeof smb2_negotiate_rsp but rounded up to 8 bytes. */
 #ifdef CONFIG_SMB_SERVER_KERBEROS5
-/* sizeof(struct smb2_negotiate_rsp) - 4 =
+/* sizeof(struct smb2_negotiate_rsp) =
  * header(64) + response(64) + GSS_LENGTH(96) + GSS_PADDING(0)
  */
 #define OFFSET_OF_NEG_CONTEXT	0xe0
 #else
-/* sizeof(struct smb2_negotiate_rsp) - 4 =
+/* sizeof(struct smb2_negotiate_rsp) =
  * header(64) + response(64) + GSS_LENGTH(74) + GSS_PADDING(6)
  */
 #define OFFSET_OF_NEG_CONTEXT	0xd0
 #endif
 
-#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES	cpu_to_le16(1)
-#define SMB2_ENCRYPTION_CAPABILITIES		cpu_to_le16(2)
-#define SMB2_COMPRESSION_CAPABILITIES		cpu_to_le16(3)
-#define SMB2_NETNAME_NEGOTIATE_CONTEXT_ID	cpu_to_le16(5)
-#define SMB2_SIGNING_CAPABILITIES		cpu_to_le16(8)
-#define SMB2_POSIX_EXTENSIONS_AVAILABLE		cpu_to_le16(0x100)
-
-struct smb2_neg_context {
-	__le16  ContextType;
-	__le16  DataLength;
-	__le32  Reserved;
-	/* Followed by array of data */
-} __packed;
-
-struct smb2_preauth_neg_context {
-	__le16	ContextType; /* 1 */
-	__le16	DataLength;
-	__le32	Reserved;
-	__le16	HashAlgorithmCount; /* 1 */
-	__le16	SaltLength;
-	__le16	HashAlgorithms; /* HashAlgorithms[0] since only one defined */
-	__u8	Salt[SMB311_SALT_SIZE];
-} __packed;
-
-/* Encryption Algorithms Ciphers */
-#define SMB2_ENCRYPTION_AES128_CCM	cpu_to_le16(0x0001)
-#define SMB2_ENCRYPTION_AES128_GCM	cpu_to_le16(0x0002)
-#define SMB2_ENCRYPTION_AES256_CCM	cpu_to_le16(0x0003)
-#define SMB2_ENCRYPTION_AES256_GCM	cpu_to_le16(0x0004)
-
-struct smb2_encryption_neg_context {
-	__le16	ContextType; /* 2 */
-	__le16	DataLength;
-	__le32	Reserved;
-	/* CipherCount usally 2, but can be 3 when AES256-GCM enabled */
-	__le16	CipherCount; /* AES-128-GCM and AES-128-CCM by default */
-	__le16	Ciphers[];
-} __packed;
-
-#define SMB3_COMPRESS_NONE	cpu_to_le16(0x0000)
-#define SMB3_COMPRESS_LZNT1	cpu_to_le16(0x0001)
-#define SMB3_COMPRESS_LZ77	cpu_to_le16(0x0002)
-#define SMB3_COMPRESS_LZ77_HUFF	cpu_to_le16(0x0003)
-
-struct smb2_compression_ctx {
-	__le16	ContextType; /* 3 */
-	__le16  DataLength;
-	__le32	Reserved;
-	__le16	CompressionAlgorithmCount;
-	__u16	Padding;
-	__le32	Reserved1;
-	__le16	CompressionAlgorithms[];
-} __packed;
-
-#define POSIX_CTXT_DATA_LEN     16
-struct smb2_posix_neg_context {
-	__le16	ContextType; /* 0x100 */
-	__le16	DataLength;
-	__le32	Reserved;
-	__u8	Name[16]; /* POSIX ctxt GUID 93AD25509CB411E7B42383DE968BCD7C */
-} __packed;
-
-struct smb2_netname_neg_context {
-	__le16	ContextType; /* 0x100 */
-	__le16	DataLength;
-	__le32	Reserved;
-	__le16	NetName[]; /* hostname of target converted to UCS-2 */
-} __packed;
-
-/* Signing algorithms */
-#define SIGNING_ALG_HMAC_SHA256		cpu_to_le16(0)
-#define SIGNING_ALG_AES_CMAC		cpu_to_le16(1)
-#define SIGNING_ALG_AES_GMAC		cpu_to_le16(2)
-
-struct smb2_signing_capabilities {
-	__le16	ContextType; /* 8 */
-	__le16	DataLength;
-	__le32	Reserved;
-	__le16	SigningAlgorithmCount;
-	__le16	SigningAlgorithms[];
-} __packed;
-
-struct smb2_negotiate_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 65 */
-	__le16 SecurityMode;
-	__le16 DialectRevision;
-	__le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */
-	__u8   ServerGUID[16];
-	__le32 Capabilities;
-	__le32 MaxTransactSize;
-	__le32 MaxReadSize;
-	__le32 MaxWriteSize;
-	__le64 SystemTime;	/* MBZ */
-	__le64 ServerStartTime;
-	__le16 SecurityBufferOffset;
-	__le16 SecurityBufferLength;
-	__le32 NegotiateContextOffset;	/* Pre:SMB3.1.1 was reserved/ignored */
-	__u8   Buffer[1];	/* variable length GSS security buffer */
-} __packed;
-
-/* Flags */
-#define SMB2_SESSION_REQ_FLAG_BINDING		0x01
-#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA	0x04
-
 #define SMB2_SESSION_EXPIRED		(0)
 #define SMB2_SESSION_IN_PROGRESS	BIT(0)
 #define SMB2_SESSION_VALID		BIT(1)
 
-/* Flags */
-#define SMB2_SESSION_REQ_FLAG_BINDING		0x01
-#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA	0x04
-
-struct smb2_sess_setup_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 25 */
-	__u8   Flags;
-	__u8   SecurityMode;
-	__le32 Capabilities;
-	__le32 Channel;
-	__le16 SecurityBufferOffset;
-	__le16 SecurityBufferLength;
-	__le64 PreviousSessionId;
-	__u8   Buffer[1];	/* variable length GSS security buffer */
-} __packed;
-
-/* Flags/Reserved for SMB3.1.1 */
-#define SMB2_SHAREFLAG_CLUSTER_RECONNECT	0x0001
-
-/* Currently defined SessionFlags */
-#define SMB2_SESSION_FLAG_IS_GUEST_LE		cpu_to_le16(0x0001)
-#define SMB2_SESSION_FLAG_IS_NULL_LE		cpu_to_le16(0x0002)
-#define SMB2_SESSION_FLAG_ENCRYPT_DATA_LE	cpu_to_le16(0x0004)
-struct smb2_sess_setup_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 9 */
-	__le16 SessionFlags;
-	__le16 SecurityBufferOffset;
-	__le16 SecurityBufferLength;
-	__u8   Buffer[1];	/* variable length GSS security buffer */
-} __packed;
-
-struct smb2_logoff_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 4 */
-	__le16 Reserved;
-} __packed;
-
-struct smb2_logoff_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 4 */
-	__le16 Reserved;
-} __packed;
-
-struct smb2_tree_connect_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 9 */
-	__le16 Reserved;	/* Flags in SMB3.1.1 */
-	__le16 PathOffset;
-	__le16 PathLength;
-	__u8   Buffer[1];	/* variable length */
-} __packed;
-
-struct smb2_tree_connect_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 16 */
-	__u8   ShareType;  /* see below */
-	__u8   Reserved;
-	__le32 ShareFlags; /* see below */
-	__le32 Capabilities; /* see below */
-	__le32 MaximalAccess;
-} __packed;
-
-/* Possible ShareType values */
-#define SMB2_SHARE_TYPE_DISK	0x01
-#define SMB2_SHARE_TYPE_PIPE	0x02
-#define	SMB2_SHARE_TYPE_PRINT	0x03
-
-/*
- * Possible ShareFlags - exactly one and only one of the first 4 caching flags
- * must be set (any of the remaining, SHI1005, flags may be set individually
- * or in combination.
- */
-#define SMB2_SHAREFLAG_MANUAL_CACHING			0x00000000
-#define SMB2_SHAREFLAG_AUTO_CACHING			0x00000010
-#define SMB2_SHAREFLAG_VDO_CACHING			0x00000020
-#define SMB2_SHAREFLAG_NO_CACHING			0x00000030
-#define SHI1005_FLAGS_DFS				0x00000001
-#define SHI1005_FLAGS_DFS_ROOT				0x00000002
-#define SHI1005_FLAGS_RESTRICT_EXCLUSIVE_OPENS		0x00000100
-#define SHI1005_FLAGS_FORCE_SHARED_DELETE		0x00000200
-#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING		0x00000400
-#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM	0x00000800
-#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK		0x00001000
-#define SHI1005_FLAGS_ENABLE_HASH			0x00002000
-
-/* Possible share capabilities */
-#define SMB2_SHARE_CAP_DFS	cpu_to_le32(0x00000008)
-
-struct smb2_tree_disconnect_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 4 */
-	__le16 Reserved;
-} __packed;
-
-struct smb2_tree_disconnect_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 4 */
-	__le16 Reserved;
-} __packed;
-
-#define ATTR_READONLY_LE	cpu_to_le32(ATTR_READONLY)
-#define ATTR_HIDDEN_LE		cpu_to_le32(ATTR_HIDDEN)
-#define ATTR_SYSTEM_LE		cpu_to_le32(ATTR_SYSTEM)
-#define ATTR_DIRECTORY_LE	cpu_to_le32(ATTR_DIRECTORY)
-#define ATTR_ARCHIVE_LE		cpu_to_le32(ATTR_ARCHIVE)
-#define ATTR_NORMAL_LE		cpu_to_le32(ATTR_NORMAL)
-#define ATTR_TEMPORARY_LE	cpu_to_le32(ATTR_TEMPORARY)
-#define ATTR_SPARSE_FILE_LE	cpu_to_le32(ATTR_SPARSE)
-#define ATTR_REPARSE_POINT_LE	cpu_to_le32(ATTR_REPARSE)
-#define ATTR_COMPRESSED_LE	cpu_to_le32(ATTR_COMPRESSED)
-#define ATTR_OFFLINE_LE		cpu_to_le32(ATTR_OFFLINE)
-#define ATTR_NOT_CONTENT_INDEXED_LE	cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
-#define ATTR_ENCRYPTED_LE	cpu_to_le32(ATTR_ENCRYPTED)
-#define ATTR_INTEGRITY_STREAML_LE	cpu_to_le32(0x00008000)
-#define ATTR_NO_SCRUB_DATA_LE	cpu_to_le32(0x00020000)
-#define ATTR_MASK_LE		cpu_to_le32(0x00007FB7)
-
-/* Oplock levels */
-#define SMB2_OPLOCK_LEVEL_NONE		0x00
-#define SMB2_OPLOCK_LEVEL_II		0x01
-#define SMB2_OPLOCK_LEVEL_EXCLUSIVE	0x08
-#define SMB2_OPLOCK_LEVEL_BATCH		0x09
-#define SMB2_OPLOCK_LEVEL_LEASE		0xFF
-/* Non-spec internal type */
-#define SMB2_OPLOCK_LEVEL_NOCHANGE	0x99
-
-/* Desired Access Flags */
-#define FILE_READ_DATA_LE		cpu_to_le32(0x00000001)
-#define FILE_LIST_DIRECTORY_LE		cpu_to_le32(0x00000001)
-#define FILE_WRITE_DATA_LE		cpu_to_le32(0x00000002)
-#define FILE_ADD_FILE_LE		cpu_to_le32(0x00000002)
-#define FILE_APPEND_DATA_LE		cpu_to_le32(0x00000004)
-#define FILE_ADD_SUBDIRECTORY_LE	cpu_to_le32(0x00000004)
-#define FILE_READ_EA_LE			cpu_to_le32(0x00000008)
-#define FILE_WRITE_EA_LE		cpu_to_le32(0x00000010)
-#define FILE_EXECUTE_LE			cpu_to_le32(0x00000020)
-#define FILE_TRAVERSE_LE		cpu_to_le32(0x00000020)
-#define FILE_DELETE_CHILD_LE		cpu_to_le32(0x00000040)
-#define FILE_READ_ATTRIBUTES_LE		cpu_to_le32(0x00000080)
-#define FILE_WRITE_ATTRIBUTES_LE	cpu_to_le32(0x00000100)
-#define FILE_DELETE_LE			cpu_to_le32(0x00010000)
-#define FILE_READ_CONTROL_LE		cpu_to_le32(0x00020000)
-#define FILE_WRITE_DAC_LE		cpu_to_le32(0x00040000)
-#define FILE_WRITE_OWNER_LE		cpu_to_le32(0x00080000)
-#define FILE_SYNCHRONIZE_LE		cpu_to_le32(0x00100000)
-#define FILE_ACCESS_SYSTEM_SECURITY_LE	cpu_to_le32(0x01000000)
-#define FILE_MAXIMAL_ACCESS_LE		cpu_to_le32(0x02000000)
-#define FILE_GENERIC_ALL_LE		cpu_to_le32(0x10000000)
-#define FILE_GENERIC_EXECUTE_LE		cpu_to_le32(0x20000000)
-#define FILE_GENERIC_WRITE_LE		cpu_to_le32(0x40000000)
-#define FILE_GENERIC_READ_LE		cpu_to_le32(0x80000000)
-#define DESIRED_ACCESS_MASK		cpu_to_le32(0xF21F01FF)
-
-/* ShareAccess Flags */
-#define FILE_SHARE_READ_LE		cpu_to_le32(0x00000001)
-#define FILE_SHARE_WRITE_LE		cpu_to_le32(0x00000002)
-#define FILE_SHARE_DELETE_LE		cpu_to_le32(0x00000004)
-#define FILE_SHARE_ALL_LE		cpu_to_le32(0x00000007)
-
-/* CreateDisposition Flags */
-#define FILE_SUPERSEDE_LE		cpu_to_le32(0x00000000)
-#define FILE_OPEN_LE			cpu_to_le32(0x00000001)
-#define FILE_CREATE_LE			cpu_to_le32(0x00000002)
-#define	FILE_OPEN_IF_LE			cpu_to_le32(0x00000003)
-#define FILE_OVERWRITE_LE		cpu_to_le32(0x00000004)
-#define FILE_OVERWRITE_IF_LE		cpu_to_le32(0x00000005)
-#define FILE_CREATE_MASK_LE		cpu_to_le32(0x00000007)
-
-#define FILE_READ_DESIRED_ACCESS_LE	(FILE_READ_DATA_LE |		\
-					FILE_READ_EA_LE |		\
-					FILE_GENERIC_READ_LE)
-#define FILE_WRITE_DESIRE_ACCESS_LE	(FILE_WRITE_DATA_LE |		\
-					FILE_APPEND_DATA_LE |		\
-					FILE_WRITE_EA_LE |		\
-					FILE_WRITE_ATTRIBUTES_LE |	\
-					FILE_GENERIC_WRITE_LE)
-
-/* Impersonation Levels */
-#define IL_ANONYMOUS_LE		cpu_to_le32(0x00000000)
-#define IL_IDENTIFICATION_LE	cpu_to_le32(0x00000001)
-#define IL_IMPERSONATION_LE	cpu_to_le32(0x00000002)
-#define IL_DELEGATE_LE		cpu_to_le32(0x00000003)
-
-/* Create Context Values */
-#define SMB2_CREATE_EA_BUFFER			"ExtA" /* extended attributes */
-#define SMB2_CREATE_SD_BUFFER			"SecD" /* security descriptor */
-#define SMB2_CREATE_DURABLE_HANDLE_REQUEST	"DHnQ"
-#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT	"DHnC"
-#define SMB2_CREATE_ALLOCATION_SIZE		"AlSi"
-#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
-#define SMB2_CREATE_TIMEWARP_REQUEST		"TWrp"
-#define SMB2_CREATE_QUERY_ON_DISK_ID		"QFid"
-#define SMB2_CREATE_REQUEST_LEASE		"RqLs"
-#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2   "DH2Q"
-#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C"
-#define SMB2_CREATE_APP_INSTANCE_ID     "\x45\xBC\xA6\x6A\xEF\xA7\xF7\x4A\x90\x08\xFA\x46\x2E\x14\x4D\x74"
- #define SMB2_CREATE_APP_INSTANCE_VERSION	"\xB9\x82\xD0\xB7\x3B\x56\x07\x4F\xA0\x7B\x52\x4A\x81\x16\xA0\x10"
-#define SVHDX_OPEN_DEVICE_CONTEXT       0x83CE6F1AD851E0986E34401CC9BCFCE9
-#define SMB2_CREATE_TAG_POSIX		"\x93\xAD\x25\x50\x9C\xB4\x11\xE7\xB4\x23\x83\xDE\x96\x8B\xCD\x7C"
-
-struct smb2_create_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 57 */
-	__u8   SecurityFlags;
-	__u8   RequestedOplockLevel;
-	__le32 ImpersonationLevel;
-	__le64 SmbCreateFlags;
-	__le64 Reserved;
-	__le32 DesiredAccess;
-	__le32 FileAttributes;
-	__le32 ShareAccess;
-	__le32 CreateDisposition;
-	__le32 CreateOptions;
-	__le16 NameOffset;
-	__le16 NameLength;
-	__le32 CreateContextsOffset;
-	__le32 CreateContextsLength;
-	__u8   Buffer[0];
-} __packed;
-
-struct smb2_create_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 89 */
-	__u8   OplockLevel;
-	__u8   Reserved;
-	__le32 CreateAction;
-	__le64 CreationTime;
-	__le64 LastAccessTime;
-	__le64 LastWriteTime;
-	__le64 ChangeTime;
-	__le64 AllocationSize;
-	__le64 EndofFile;
-	__le32 FileAttributes;
-	__le32 Reserved2;
-	__le64  PersistentFileId;
-	__le64  VolatileFileId;
-	__le32 CreateContextsOffset;
-	__le32 CreateContextsLength;
-	__u8   Buffer[1];
-} __packed;
-
-struct create_context {
-	__le32 Next;
-	__le16 NameOffset;
-	__le16 NameLength;
-	__le16 Reserved;
-	__le16 DataOffset;
-	__le32 DataLength;
-	__u8 Buffer[0];
-} __packed;
-
 struct create_durable_req_v2 {
 	struct create_context ccontext;
 	__u8   Name[8];
@@ -743,22 +216,21 @@
 
 #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE	cpu_to_le32(0x02)
 
+#define SMB2_LEASE_KEY_SIZE			16
+
 struct lease_context {
-	__le64 LeaseKeyLow;
-	__le64 LeaseKeyHigh;
+	__u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
 	__le32 LeaseState;
 	__le32 LeaseFlags;
 	__le64 LeaseDuration;
 } __packed;
 
 struct lease_context_v2 {
-	__le64 LeaseKeyLow;
-	__le64 LeaseKeyHigh;
+	__u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
 	__le32 LeaseState;
 	__le32 LeaseFlags;
 	__le64 LeaseDuration;
-	__le64 ParentLeaseKeyLow;
-	__le64 ParentLeaseKeyHigh;
+	__u8 ParentLeaseKey[SMB2_LEASE_KEY_SIZE];
 	__le16 Epoch;
 	__le16 Reserved;
 } __packed;
@@ -776,114 +248,12 @@
 	__u8   Pad[4];
 } __packed;
 
-/* Currently defined values for close flags */
-#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB	cpu_to_le16(0x0001)
-struct smb2_close_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 24 */
-	__le16 Flags;
-	__le32 Reserved;
-	__le64  PersistentFileId;
-	__le64  VolatileFileId;
-} __packed;
-
-struct smb2_close_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* 60 */
-	__le16 Flags;
-	__le32 Reserved;
-	__le64 CreationTime;
-	__le64 LastAccessTime;
-	__le64 LastWriteTime;
-	__le64 ChangeTime;
-	__le64 AllocationSize;	/* Beginning of FILE_STANDARD_INFO equivalent */
-	__le64 EndOfFile;
-	__le32 Attributes;
-} __packed;
-
-struct smb2_flush_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;	/* Must be 24 */
-	__le16 Reserved1;
-	__le32 Reserved2;
-	__le64  PersistentFileId;
-	__le64  VolatileFileId;
-} __packed;
-
-struct smb2_flush_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize;
-	__le16 Reserved;
-} __packed;
-
 struct smb2_buffer_desc_v1 {
 	__le64 offset;
 	__le32 token;
 	__le32 length;
 } __packed;
 
-#define SMB2_CHANNEL_NONE		cpu_to_le32(0x00000000)
-#define SMB2_CHANNEL_RDMA_V1		cpu_to_le32(0x00000001)
-#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002)
-
-struct smb2_read_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 49 */
-	__u8   Padding; /* offset from start of SMB2 header to place read */
-	__u8   Reserved;
-	__le32 Length;
-	__le64 Offset;
-	__le64  PersistentFileId;
-	__le64  VolatileFileId;
-	__le32 MinimumCount;
-	__le32 Channel; /* Reserved MBZ */
-	__le32 RemainingBytes;
-	__le16 ReadChannelInfoOffset; /* Reserved MBZ */
-	__le16 ReadChannelInfoLength; /* Reserved MBZ */
-	__u8   Buffer[1];
-} __packed;
-
-struct smb2_read_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 17 */
-	__u8   DataOffset;
-	__u8   Reserved;
-	__le32 DataLength;
-	__le32 DataRemaining;
-	__u32  Reserved2;
-	__u8   Buffer[1];
-} __packed;
-
-/* For write request Flags field below the following flag is defined: */
-#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001
-
-struct smb2_write_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 49 */
-	__le16 DataOffset; /* offset from start of SMB2 header to write data */
-	__le32 Length;
-	__le64 Offset;
-	__le64  PersistentFileId;
-	__le64  VolatileFileId;
-	__le32 Channel; /* Reserved MBZ */
-	__le32 RemainingBytes;
-	__le16 WriteChannelInfoOffset; /* Reserved MBZ */
-	__le16 WriteChannelInfoLength; /* Reserved MBZ */
-	__le32 Flags;
-	__u8   Buffer[1];
-} __packed;
-
-struct smb2_write_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 17 */
-	__u8   DataOffset;
-	__u8   Reserved;
-	__le32 DataLength;
-	__le32 DataRemaining;
-	__u32  Reserved2;
-	__u8   Buffer[1];
-} __packed;
-
 #define SMB2_0_IOCTL_IS_FSCTL 0x00000001
 
 struct duplicate_extents_to_file {
@@ -1033,43 +403,6 @@
 	__u8	DataBuffer[]; /* Variable Length */
 } __packed;
 
-/* Completion Filter flags for Notify */
-#define FILE_NOTIFY_CHANGE_FILE_NAME	0x00000001
-#define FILE_NOTIFY_CHANGE_DIR_NAME	0x00000002
-#define FILE_NOTIFY_CHANGE_NAME		0x00000003
-#define FILE_NOTIFY_CHANGE_ATTRIBUTES	0x00000004
-#define FILE_NOTIFY_CHANGE_SIZE		0x00000008
-#define FILE_NOTIFY_CHANGE_LAST_WRITE	0x00000010
-#define FILE_NOTIFY_CHANGE_LAST_ACCESS	0x00000020
-#define FILE_NOTIFY_CHANGE_CREATION	0x00000040
-#define FILE_NOTIFY_CHANGE_EA		0x00000080
-#define FILE_NOTIFY_CHANGE_SECURITY	0x00000100
-#define FILE_NOTIFY_CHANGE_STREAM_NAME	0x00000200
-#define FILE_NOTIFY_CHANGE_STREAM_SIZE	0x00000400
-#define FILE_NOTIFY_CHANGE_STREAM_WRITE	0x00000800
-
-/* Flags */
-#define SMB2_WATCH_TREE	0x0001
-
-struct smb2_notify_req {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 32 */
-	__le16 Flags;
-	__le32 OutputBufferLength;
-	__le64 PersistentFileId;
-	__le64 VolatileFileId;
-	__u32 CompletionFileter;
-	__u32 Reserved;
-} __packed;
-
-struct smb2_notify_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 9 */
-	__le16 OutputBufferOffset;
-	__le32 OutputBufferLength;
-	__u8 Buffer[1];
-} __packed;
-
 /* SMB2 Notify Action Flags */
 #define FILE_ACTION_ADDED		0x00000001
 #define FILE_ACTION_REMOVED		0x00000002
@@ -1528,7 +861,7 @@
 	__le64 CurrentByteOffset;
 } __packed;
 
-#define FILE_MODE_INFO_MASK cpu_to_le32(0x0000103e)
+#define FILE_MODE_INFO_MASK cpu_to_le32(0x0000100e)
 
 struct smb2_file_mode_info {
 	__le32 Mode;
@@ -1705,4 +1038,13 @@
 int smb2_oplock_break(struct ksmbd_work *work);
 int smb2_notify(struct ksmbd_work *ksmbd_work);
 
+/*
+ * Get the body of the smb2 message excluding the 4 byte rfc1002 headers
+ * from request/response buffer.
+ */
+static inline void *smb2_get_msg(void *buf)
+{
+	return buf + 4;
+}
+
 #endif	/* _SMB2PDU_H */
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index 707490a..ef7f42b 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -132,7 +132,7 @@
  */
 int ksmbd_verify_smb_message(struct ksmbd_work *work)
 {
-	struct smb2_hdr *smb2_hdr = work->request_buf + work->next_smb2_rcv_hdr_off;
+	struct smb2_hdr *smb2_hdr = ksmbd_req_buf_next(work);
 	struct smb_hdr *hdr;
 
 	if (smb2_hdr->ProtocolId == SMB2_PROTO_NUMBER)
@@ -239,14 +239,14 @@
 static int ksmbd_negotiate_smb_dialect(void *buf)
 {
 	int smb_buf_length = get_rfc1002_len(buf);
-	__le32 proto = ((struct smb2_hdr *)buf)->ProtocolId;
+	__le32 proto = ((struct smb2_hdr *)smb2_get_msg(buf))->ProtocolId;
 
 	if (proto == SMB2_PROTO_NUMBER) {
 		struct smb2_negotiate_req *req;
 		int smb2_neg_size =
-			offsetof(struct smb2_negotiate_req, Dialects) - 4;
+			offsetof(struct smb2_negotiate_req, Dialects);
 
-		req = (struct smb2_negotiate_req *)buf;
+		req = (struct smb2_negotiate_req *)smb2_get_msg(buf);
 		if (smb2_neg_size > smb_buf_length)
 			goto err_out;
 
@@ -445,11 +445,12 @@
 	struct ksmbd_conn *conn = work->conn;
 	int ret;
 
-	conn->dialect = ksmbd_negotiate_smb_dialect(work->request_buf);
+	conn->dialect =
+		ksmbd_negotiate_smb_dialect(work->request_buf);
 	ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
 
 	if (command == SMB2_NEGOTIATE_HE) {
-		struct smb2_hdr *smb2_hdr = work->request_buf;
+		struct smb2_hdr *smb2_hdr = smb2_get_msg(work->request_buf);
 
 		if (smb2_hdr->ProtocolId != SMB2_PROTO_NUMBER) {
 			ksmbd_debug(SMB, "Downgrade to SMB1 negotiation\n");
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index 6e79e75..5059084 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -10,6 +10,7 @@
 
 #include "glob.h"
 #include "nterr.h"
+#include "../smbfs_common/smb2pdu.h"
 #include "smb2pdu.h"
 
 /* ksmbd's Specific ERRNO */
@@ -32,17 +33,6 @@
 #define SMB302_VERSION_STRING	"3.02"
 #define SMB311_VERSION_STRING	"3.1.1"
 
-/* Dialects */
-#define SMB10_PROT_ID		0x00
-#define SMB20_PROT_ID		0x0202
-#define SMB21_PROT_ID		0x0210
-/* multi-protocol negotiate request */
-#define SMB2X_PROT_ID		0x02FF
-#define SMB30_PROT_ID		0x0300
-#define SMB302_PROT_ID		0x0302
-#define SMB311_PROT_ID		0x0311
-#define BAD_PROT_ID		0xFFFF
-
 #define SMB_ECHO_INTERVAL	(60 * HZ)
 
 #define CIFS_DEFAULT_IOSIZE	(64 * 1024)
@@ -59,21 +49,6 @@
 /*
  * File Attribute flags
  */
-#define ATTR_READONLY			0x0001
-#define ATTR_HIDDEN			0x0002
-#define ATTR_SYSTEM			0x0004
-#define ATTR_VOLUME			0x0008
-#define ATTR_DIRECTORY			0x0010
-#define ATTR_ARCHIVE			0x0020
-#define ATTR_DEVICE			0x0040
-#define ATTR_NORMAL			0x0080
-#define ATTR_TEMPORARY			0x0100
-#define ATTR_SPARSE			0x0200
-#define ATTR_REPARSE			0x0400
-#define ATTR_COMPRESSED			0x0800
-#define ATTR_OFFLINE			0x1000
-#define ATTR_NOT_CONTENT_INDEXED	0x2000
-#define ATTR_ENCRYPTED			0x4000
 #define ATTR_POSIX_SEMANTICS		0x01000000
 #define ATTR_BACKUP_SEMANTICS		0x02000000
 #define ATTR_DELETE_ON_CLOSE		0x04000000
@@ -82,23 +57,6 @@
 #define ATTR_NO_BUFFERING		0x20000000
 #define ATTR_WRITE_THROUGH		0x80000000
 
-#define ATTR_READONLY_LE		cpu_to_le32(ATTR_READONLY)
-#define ATTR_HIDDEN_LE			cpu_to_le32(ATTR_HIDDEN)
-#define ATTR_SYSTEM_LE			cpu_to_le32(ATTR_SYSTEM)
-#define ATTR_DIRECTORY_LE		cpu_to_le32(ATTR_DIRECTORY)
-#define ATTR_ARCHIVE_LE			cpu_to_le32(ATTR_ARCHIVE)
-#define ATTR_NORMAL_LE			cpu_to_le32(ATTR_NORMAL)
-#define ATTR_TEMPORARY_LE		cpu_to_le32(ATTR_TEMPORARY)
-#define ATTR_SPARSE_FILE_LE		cpu_to_le32(ATTR_SPARSE)
-#define ATTR_REPARSE_POINT_LE		cpu_to_le32(ATTR_REPARSE)
-#define ATTR_COMPRESSED_LE		cpu_to_le32(ATTR_COMPRESSED)
-#define ATTR_OFFLINE_LE			cpu_to_le32(ATTR_OFFLINE)
-#define ATTR_NOT_CONTENT_INDEXED_LE	cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
-#define ATTR_ENCRYPTED_LE		cpu_to_le32(ATTR_ENCRYPTED)
-#define ATTR_INTEGRITY_STREAML_LE	cpu_to_le32(0x00008000)
-#define ATTR_NO_SCRUB_DATA_LE		cpu_to_le32(0x00020000)
-#define ATTR_MASK_LE			cpu_to_le32(0x00007FB7)
-
 /* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
 #define FILE_SUPPORTS_SPARSE_VDL	0x10000000 /* faster nonsparse extend */
 #define FILE_SUPPORTS_BLOCK_REFCOUNTING	0x08000000 /* allow ioctl dup extents */
@@ -160,11 +118,6 @@
 /* file_execute, file_read_attributes*/
 /* write_dac, and delete.           */
 
-#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
-#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
-		| FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
-#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
-
 #define SET_FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA \
 		| FILE_READ_ATTRIBUTES \
 		| DELETE | READ_CONTROL | WRITE_DAC \
@@ -477,12 +430,6 @@
 	int (*proc)(struct ksmbd_work *swork);
 };
 
-static inline size_t
-smb2_hdr_size_no_buflen(struct smb_version_values *vals)
-{
-	return vals->header_size - 4;
-}
-
 int ksmbd_min_protocol(void);
 int ksmbd_max_protocol(void);
 
diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c
index a2fd5a4..7e57cbb 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/ksmbd/transport_rdma.c
@@ -484,7 +484,7 @@
 		struct smb_direct_data_transfer *req =
 			(struct smb_direct_data_transfer *)recvmsg->packet;
 		struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
-				+ le32_to_cpu(req->data_offset) - 4);
+				+ le32_to_cpu(req->data_offset));
 		ksmbd_debug(RDMA,
 			    "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
 			    le16_to_cpu(req->credits_granted),
@@ -2043,7 +2043,6 @@
 	smb_direct_listener.cm_id = NULL;
 
 	if (smb_direct_wq) {
-		flush_workqueue(smb_direct_wq);
 		destroy_workqueue(smb_direct_wq);
 		smb_direct_wq = NULL;
 	}
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 835b384..19d3639 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -1013,7 +1013,7 @@
 			loff_t off, loff_t len)
 {
 	smb_break_all_levII_oplock(work, fp, 1);
-	if (fp->f_ci->m_fattr & ATTR_SPARSE_FILE_LE)
+	if (fp->f_ci->m_fattr & FILE_ATTRIBUTE_SPARSE_FILE_LE)
 		return vfs_fallocate(fp->filp,
 				     FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 				     off, len);
@@ -1624,7 +1624,7 @@
 	time = ksmbd_UnixTimeToNT(kstat->ctime);
 	info->ChangeTime = cpu_to_le64(time);
 
-	if (ksmbd_kstat->file_attributes & ATTR_DIRECTORY_LE) {
+	if (ksmbd_kstat->file_attributes & FILE_ATTRIBUTE_DIRECTORY_LE) {
 		info->EndOfFile = 0;
 		info->AllocationSize = 0;
 	} else {
@@ -1654,9 +1654,9 @@
 	 * or that acl is disable in server's filesystem and the config is yes.
 	 */
 	if (S_ISDIR(ksmbd_kstat->kstat->mode))
-		ksmbd_kstat->file_attributes = ATTR_DIRECTORY_LE;
+		ksmbd_kstat->file_attributes = FILE_ATTRIBUTE_DIRECTORY_LE;
 	else
-		ksmbd_kstat->file_attributes = ATTR_ARCHIVE_LE;
+		ksmbd_kstat->file_attributes = FILE_ATTRIBUTE_ARCHIVE_LE;
 
 	if (test_share_config_flag(work->tcon->share_conf,
 				   KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h
index b0d5b8f..adf94a4 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/ksmbd/vfs.h
@@ -25,48 +25,9 @@
 };
 
 /* CreateOptions */
-/* Flag is set, it must not be a file , valid for directory only */
-#define FILE_DIRECTORY_FILE_LE			cpu_to_le32(0x00000001)
-#define FILE_WRITE_THROUGH_LE			cpu_to_le32(0x00000002)
-#define FILE_SEQUENTIAL_ONLY_LE			cpu_to_le32(0x00000004)
-
-/* Should not buffer on server*/
-#define FILE_NO_INTERMEDIATE_BUFFERING_LE	cpu_to_le32(0x00000008)
-/* MBZ */
-#define FILE_SYNCHRONOUS_IO_ALERT_LE		cpu_to_le32(0x00000010)
-/* MBZ */
-#define FILE_SYNCHRONOUS_IO_NONALERT_LE		cpu_to_le32(0x00000020)
-
-/* Flaf must not be set for directory */
-#define FILE_NON_DIRECTORY_FILE_LE		cpu_to_le32(0x00000040)
-
-/* Should be zero */
 #define CREATE_TREE_CONNECTION			cpu_to_le32(0x00000080)
-#define FILE_COMPLETE_IF_OPLOCKED_LE		cpu_to_le32(0x00000100)
-#define FILE_NO_EA_KNOWLEDGE_LE			cpu_to_le32(0x00000200)
-#define FILE_OPEN_REMOTE_INSTANCE		cpu_to_le32(0x00000400)
-
-/**
- * Doc says this is obsolete "open for recovery" flag should be zero
- * in any case.
- */
-#define CREATE_OPEN_FOR_RECOVERY		cpu_to_le32(0x00000400)
-#define FILE_RANDOM_ACCESS_LE			cpu_to_le32(0x00000800)
-#define FILE_DELETE_ON_CLOSE_LE			cpu_to_le32(0x00001000)
-#define FILE_OPEN_BY_FILE_ID_LE			cpu_to_le32(0x00002000)
-#define FILE_OPEN_FOR_BACKUP_INTENT_LE		cpu_to_le32(0x00004000)
-#define FILE_NO_COMPRESSION_LE			cpu_to_le32(0x00008000)
-
-/* Should be zero*/
-#define FILE_OPEN_REQUIRING_OPLOCK		cpu_to_le32(0x00010000)
-#define FILE_DISALLOW_EXCLUSIVE			cpu_to_le32(0x00020000)
 #define FILE_RESERVE_OPFILTER_LE		cpu_to_le32(0x00100000)
-#define FILE_OPEN_REPARSE_POINT_LE		cpu_to_le32(0x00200000)
-#define FILE_OPEN_NO_RECALL_LE			cpu_to_le32(0x00400000)
 
-/* Should be zero */
-#define FILE_OPEN_FOR_FREE_SPACE_QUERY_LE	cpu_to_le32(0x00800000)
-#define CREATE_OPTIONS_MASK			cpu_to_le32(0x00FFFFFF)
 #define CREATE_OPTION_READONLY			0x10000000
 /* system. NB not sent over wire */
 #define CREATE_OPTION_SPECIAL			0x20000000
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 994ec22..7046f9b 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -230,7 +230,7 @@
 
 /*
  * Deal with the completion of writing the data to the cache.  We have to clear
- * the PG_fscache bits on the pages involved and release the caller's ref.
+ * the PG_fscache bits on the folios involved and release the caller's ref.
  *
  * May be called in softirq mode and we inherit a ref from the caller.
  */
@@ -238,7 +238,7 @@
 					  bool was_async)
 {
 	struct netfs_read_subrequest *subreq;
-	struct page *page;
+	struct folio *folio;
 	pgoff_t unlocked = 0;
 	bool have_unlocked = false;
 
@@ -247,14 +247,14 @@
 	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
 		XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
 
-		xas_for_each(&xas, page, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+		xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
 			/* We might have multiple writes from the same huge
-			 * page, but we mustn't unlock a page more than once.
+			 * folio, but we mustn't unlock a folio more than once.
 			 */
-			if (have_unlocked && page->index <= unlocked)
+			if (have_unlocked && folio_index(folio) <= unlocked)
 				continue;
-			unlocked = page->index;
-			end_page_fscache(page);
+			unlocked = folio_index(folio);
+			folio_end_fscache(folio);
 			have_unlocked = true;
 		}
 	}
@@ -367,18 +367,17 @@
 }
 
 /*
- * Unlock the pages in a read operation.  We need to set PG_fscache on any
- * pages we're going to write back before we unlock them.
+ * Unlock the folios in a read operation.  We need to set PG_fscache on any
+ * folios we're going to write back before we unlock them.
  */
 static void netfs_rreq_unlock(struct netfs_read_request *rreq)
 {
 	struct netfs_read_subrequest *subreq;
-	struct page *page;
+	struct folio *folio;
 	unsigned int iopos, account = 0;
 	pgoff_t start_page = rreq->start / PAGE_SIZE;
 	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
 	bool subreq_failed = false;
-	int i;
 
 	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
 
@@ -403,9 +402,9 @@
 	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
 
 	rcu_read_lock();
-	xas_for_each(&xas, page, last_page) {
-		unsigned int pgpos = (page->index - start_page) * PAGE_SIZE;
-		unsigned int pgend = pgpos + thp_size(page);
+	xas_for_each(&xas, folio, last_page) {
+		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
+		unsigned int pgend = pgpos + folio_size(folio);
 		bool pg_failed = false;
 
 		for (;;) {
@@ -414,7 +413,7 @@
 				break;
 			}
 			if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
-				set_page_fscache(page);
+				folio_start_fscache(folio);
 			pg_failed |= subreq_failed;
 			if (pgend < iopos + subreq->len)
 				break;
@@ -433,17 +432,16 @@
 		}
 
 		if (!pg_failed) {
-			for (i = 0; i < thp_nr_pages(page); i++)
-				flush_dcache_page(page);
-			SetPageUptodate(page);
+			flush_dcache_folio(folio);
+			folio_mark_uptodate(folio);
 		}
 
-		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_PAGES, &rreq->flags)) {
-			if (page->index == rreq->no_unlock_page &&
-			    test_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags))
+		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+			if (folio_index(folio) == rreq->no_unlock_folio &&
+			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
 				_debug("no unlock");
 			else
-				unlock_page(page);
+				folio_unlock(folio);
 		}
 	}
 	rcu_read_unlock();
@@ -876,7 +874,6 @@
 		     void *netfs_priv)
 {
 	struct netfs_read_request *rreq;
-	struct page *page;
 	unsigned int debug_index = 0;
 	int ret;
 
@@ -911,11 +908,11 @@
 
 	} while (rreq->submitted < rreq->len);
 
-	/* Drop the refs on the pages here rather than in the cache or
+	/* Drop the refs on the folios here rather than in the cache or
 	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
 	 */
-	while ((page = readahead_page(ractl)))
-		put_page(page);
+	while (readahead_folio(ractl))
+		;
 
 	/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
 	if (atomic_dec_and_test(&rreq->nr_rd_ops))
@@ -935,7 +932,7 @@
 /**
  * netfs_readpage - Helper to manage a readpage request
  * @file: The file to read from
- * @page: The page to read
+ * @folio: The folio to read
  * @ops: The network filesystem's operations for the helper to use
  * @netfs_priv: Private netfs data to be retained in the request
  *
@@ -950,7 +947,7 @@
  * This is usable whether or not caching is enabled.
  */
 int netfs_readpage(struct file *file,
-		   struct page *page,
+		   struct folio *folio,
 		   const struct netfs_read_request_ops *ops,
 		   void *netfs_priv)
 {
@@ -958,23 +955,23 @@
 	unsigned int debug_index = 0;
 	int ret;
 
-	_enter("%lx", page_index(page));
+	_enter("%lx", folio_index(folio));
 
 	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
 	if (!rreq) {
 		if (netfs_priv)
-			ops->cleanup(netfs_priv, page_file_mapping(page));
-		unlock_page(page);
+			ops->cleanup(netfs_priv, folio_file_mapping(folio));
+		folio_unlock(folio);
 		return -ENOMEM;
 	}
-	rreq->mapping	= page_file_mapping(page);
-	rreq->start	= page_file_offset(page);
-	rreq->len	= thp_size(page);
+	rreq->mapping	= folio_file_mapping(folio);
+	rreq->start	= folio_file_pos(folio);
+	rreq->len	= folio_size(folio);
 
 	if (ops->begin_cache_operation) {
 		ret = ops->begin_cache_operation(rreq);
 		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) {
-			unlock_page(page);
+			folio_unlock(folio);
 			goto out;
 		}
 	}
@@ -1011,41 +1008,41 @@
 }
 EXPORT_SYMBOL(netfs_readpage);
 
-/**
- * netfs_skip_page_read - prep a page for writing without reading first
- * @page: page being prepared
+/*
+ * Prepare a folio for writing without reading first
+ * @folio: The folio being prepared
  * @pos: starting position for the write
  * @len: length of write
  *
  * In some cases, write_begin doesn't need to read at all:
- * - full page write
- * - write that lies in a page that is completely beyond EOF
- * - write that covers the the page from start to EOF or beyond it
+ * - full folio write
+ * - write that lies in a folio that is completely beyond EOF
+ * - write that covers the folio from start to EOF or beyond it
  *
  * If any of these criteria are met, then zero out the unwritten parts
- * of the page and return true. Otherwise, return false.
+ * of the folio and return true. Otherwise, return false.
  */
-static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
+static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = folio_inode(folio);
 	loff_t i_size = i_size_read(inode);
-	size_t offset = offset_in_thp(page, pos);
+	size_t offset = offset_in_folio(folio, pos);
 
-	/* Full page write */
-	if (offset == 0 && len >= thp_size(page))
+	/* Full folio write */
+	if (offset == 0 && len >= folio_size(folio))
 		return true;
 
-	/* pos beyond last page in the file */
+	/* pos beyond last folio in the file */
 	if (pos - offset >= i_size)
 		goto zero_out;
 
-	/* Write that covers from the start of the page to EOF or beyond */
+	/* Write that covers from the start of the folio to EOF or beyond */
 	if (offset == 0 && (pos + len) >= i_size)
 		goto zero_out;
 
 	return false;
 zero_out:
-	zero_user_segments(page, 0, offset, offset + len, thp_size(page));
+	zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio));
 	return true;
 }
 
@@ -1054,9 +1051,9 @@
  * @file: The file to read from
  * @mapping: The mapping to read from
  * @pos: File position at which the write will begin
- * @len: The length of the write (may extend beyond the end of the page chosen)
- * @flags: AOP_* flags
- * @_page: Where to put the resultant page
+ * @len: The length of the write (may extend beyond the end of the folio chosen)
+ * @aop_flags: AOP_* flags
+ * @_folio: Where to put the resultant folio
  * @_fsdata: Place for the netfs to store a cookie
  * @ops: The network filesystem's operations for the helper to use
  * @netfs_priv: Private netfs data to be retained in the request
@@ -1072,37 +1069,41 @@
  * issue_op, is mandatory.
  *
  * The check_write_begin() operation can be provided to check for and flush
- * conflicting writes once the page is grabbed and locked.  It is passed a
+ * conflicting writes once the folio is grabbed and locked.  It is passed a
  * pointer to the fsdata cookie that gets returned to the VM to be passed to
  * write_end.  It is permitted to sleep.  It should return 0 if the request
- * should go ahead; unlock the page and return -EAGAIN to cause the page to be
- * regot; or return an error.
+ * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
+ * be regot; or return an error.
  *
  * This is usable whether or not caching is enabled.
  */
 int netfs_write_begin(struct file *file, struct address_space *mapping,
-		      loff_t pos, unsigned int len, unsigned int flags,
-		      struct page **_page, void **_fsdata,
+		      loff_t pos, unsigned int len, unsigned int aop_flags,
+		      struct folio **_folio, void **_fsdata,
 		      const struct netfs_read_request_ops *ops,
 		      void *netfs_priv)
 {
 	struct netfs_read_request *rreq;
-	struct page *page, *xpage;
+	struct folio *folio;
 	struct inode *inode = file_inode(file);
-	unsigned int debug_index = 0;
+	unsigned int debug_index = 0, fgp_flags;
 	pgoff_t index = pos >> PAGE_SHIFT;
 	int ret;
 
 	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
 
 retry:
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page)
+	fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+	if (aop_flags & AOP_FLAG_NOFS)
+		fgp_flags |= FGP_NOFS;
+	folio = __filemap_get_folio(mapping, index, fgp_flags,
+				    mapping_gfp_mask(mapping));
+	if (!folio)
 		return -ENOMEM;
 
 	if (ops->check_write_begin) {
 		/* Allow the netfs (eg. ceph) to flush conflicts. */
-		ret = ops->check_write_begin(file, pos, len, page, _fsdata);
+		ret = ops->check_write_begin(file, pos, len, folio, _fsdata);
 		if (ret < 0) {
 			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
 			if (ret == -EAGAIN)
@@ -1111,28 +1112,28 @@
 		}
 	}
 
-	if (PageUptodate(page))
-		goto have_page;
+	if (folio_test_uptodate(folio))
+		goto have_folio;
 
 	/* If the page is beyond the EOF, we want to clear it - unless it's
 	 * within the cache granule containing the EOF, in which case we need
 	 * to preload the granule.
 	 */
 	if (!ops->is_cache_enabled(inode) &&
-	    netfs_skip_page_read(page, pos, len)) {
+	    netfs_skip_folio_read(folio, pos, len)) {
 		netfs_stat(&netfs_n_rh_write_zskip);
-		goto have_page_no_wait;
+		goto have_folio_no_wait;
 	}
 
 	ret = -ENOMEM;
 	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
 	if (!rreq)
 		goto error;
-	rreq->mapping		= page->mapping;
-	rreq->start		= page_offset(page);
-	rreq->len		= thp_size(page);
-	rreq->no_unlock_page	= page->index;
-	__set_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags);
+	rreq->mapping		= folio_file_mapping(folio);
+	rreq->start		= folio_file_pos(folio);
+	rreq->len		= folio_size(folio);
+	rreq->no_unlock_folio	= folio_index(folio);
+	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
 	netfs_priv = NULL;
 
 	if (ops->begin_cache_operation) {
@@ -1147,14 +1148,14 @@
 	/* Expand the request to meet caching requirements and download
 	 * preferences.
 	 */
-	ractl._nr_pages = thp_nr_pages(page);
+	ractl._nr_pages = folio_nr_pages(folio);
 	netfs_rreq_expand(rreq, &ractl);
 	netfs_get_read_request(rreq);
 
-	/* We hold the page locks, so we can drop the references */
-	while ((xpage = readahead_page(&ractl)))
-		if (xpage != page)
-			put_page(xpage);
+	/* We hold the folio locks, so we can drop the references */
+	folio_get(folio);
+	while (readahead_folio(&ractl))
+		;
 
 	atomic_set(&rreq->nr_rd_ops, 1);
 	do {
@@ -1184,22 +1185,22 @@
 	if (ret < 0)
 		goto error;
 
-have_page:
-	ret = wait_on_page_fscache_killable(page);
+have_folio:
+	ret = folio_wait_fscache_killable(folio);
 	if (ret < 0)
 		goto error;
-have_page_no_wait:
+have_folio_no_wait:
 	if (netfs_priv)
 		ops->cleanup(netfs_priv, mapping);
-	*_page = page;
+	*_folio = folio;
 	_leave(" = 0");
 	return 0;
 
 error_put:
 	netfs_put_read_request(rreq, false);
 error:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 	if (netfs_priv)
 		ops->cleanup(netfs_priv, mapping);
 	_leave(" = %d", ret);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index dd53704..fda530d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -219,6 +219,7 @@
 					  NFS_INO_DATA_INVAL_DEFER);
 	else if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
 		nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER;
+	trace_nfs_set_cache_invalid(inode, 0);
 }
 EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
 
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 08355b6..8b21ff1 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -289,7 +289,9 @@
 	loff_t newsize = pos + len;
 	loff_t end = newsize - 1;
 
-	truncate_pagecache_range(inode, pos, end);
+	WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping,
+				pos >> PAGE_SHIFT, end >> PAGE_SHIFT));
+
 	spin_lock(&inode->i_lock);
 	if (newsize > i_size_read(inode))
 		i_size_write(inode, newsize);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index c8bad73..271e5f9 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -1434,8 +1434,7 @@
 	status = decode_clone(xdr);
 	if (status)
 		goto out;
-	status = decode_getfattr(xdr, res->dst_fattr, res->server);
-
+	decode_getfattr(xdr, res->dst_fattr, res->server);
 out:
 	res->rpc_status = status;
 	return status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ecc4594..f63dfa0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1998,6 +1998,10 @@
 		dprintk("%s: exit with error %d for server %s\n",
 				__func__, -EPROTONOSUPPORT, clp->cl_hostname);
 		return -EPROTONOSUPPORT;
+	case -ENOSPC:
+		if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
+			nfs_mark_client_ready(clp, -EIO);
+		return -EIO;
 	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
 				 * in nfs4_exchange_id */
 	default:
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 21dac84..b3aee26 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -162,6 +162,7 @@
 DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
 DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
 DEFINE_NFS_INODE_EVENT(nfs_access_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_set_cache_invalid);
 
 TRACE_EVENT(nfs_access_exit,
 		TP_PROTO(
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b2a1d96..5a93a5d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -288,11 +288,8 @@
 	p = xdr_inline_decode(argp->xdr, count << 2);
 	if (!p)
 		return nfserr_bad_xdr;
-	i = 0;
-	while (i < count)
-		bmval[i++] = be32_to_cpup(p++);
-	while (i < bmlen)
-		bmval[i++] = 0;
+	for (i = 0; i < bmlen; i++)
+		bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
 
 	return nfs_ok;
 }
diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig
index 1667a7e5..f93e69a 100644
--- a/fs/ntfs/Kconfig
+++ b/fs/ntfs/Kconfig
@@ -52,6 +52,7 @@
 config NTFS_RW
 	bool "NTFS write support"
 	depends on NTFS_FS
+	depends on PAGE_SIZE_LESS_THAN_64KB
 	help
 	  This enables the partial, but safe, write support in the NTFS driver.
 
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 30a3b66..509f851 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -154,9 +154,13 @@
 			nr_bytes = count;
 
 		/* If pfn is not ram, return zeros for sparse dump files */
-		if (!pfn_is_ram(pfn))
-			memset(buf, 0, nr_bytes);
-		else {
+		if (!pfn_is_ram(pfn)) {
+			tmp = 0;
+			if (!userbuf)
+				memset(buf, 0, nr_bytes);
+			else if (clear_user(buf, nr_bytes))
+				tmp = -EFAULT;
+		} else {
 			if (encrypted)
 				tmp = copy_oldmem_page_encrypted(pfn, buf,
 								 nr_bytes,
@@ -165,12 +169,12 @@
 			else
 				tmp = copy_oldmem_page(pfn, buf, nr_bytes,
 						       offset, userbuf);
-
-			if (tmp < 0) {
-				up_read(&vmcore_cb_rwsem);
-				return tmp;
-			}
 		}
+		if (tmp < 0) {
+			up_read(&vmcore_cb_rwsem);
+			return tmp;
+		}
+
 		*ppos += nr_bytes;
 		count -= nr_bytes;
 		buf += nr_bytes;
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 328da35..8adabde 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -173,7 +173,6 @@
 	tristate "Log panic/oops to a block device"
 	depends on PSTORE
 	depends on BLOCK
-	depends on BROKEN
 	select PSTORE_ZONE
 	default n
 	help
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 5d1fbaf..4ae0cfc 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -309,7 +309,7 @@
 	if (ret)
 		kfree(best_effort_dev);
 	else
-		pr_info("attached %s (%zu) (no dedicated panic_write!)\n",
+		pr_info("attached %s (%lu) (no dedicated panic_write!)\n",
 			blkdev, best_effort_dev->zone.total_size);
 
 	return ret;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b9614db..f243cb5 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -218,7 +218,7 @@
 #if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
 static int zbufsize_zstd(size_t size)
 {
-	return ZSTD_compressBound(size);
+	return zstd_compress_bound(size);
 }
 #endif
 
diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c
index 0015cf8..c40445d 100644
--- a/fs/squashfs/zstd_wrapper.c
+++ b/fs/squashfs/zstd_wrapper.c
@@ -34,7 +34,7 @@
 		goto failed;
 	wksp->window_size = max_t(size_t,
 			msblk->block_size, SQUASHFS_METADATA_SIZE);
-	wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size);
+	wksp->mem_size = zstd_dstream_workspace_bound(wksp->window_size);
 	wksp->mem = vmalloc(wksp->mem_size);
 	if (wksp->mem == NULL)
 		goto failed;
@@ -63,15 +63,15 @@
 	struct squashfs_page_actor *output)
 {
 	struct workspace *wksp = strm;
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	size_t total_out = 0;
 	int error = 0;
-	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
-	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
+	zstd_in_buffer in_buf = { NULL, 0, 0 };
+	zstd_out_buffer out_buf = { NULL, 0, 0 };
 	struct bvec_iter_all iter_all = {};
 	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 
-	stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size);
+	stream = zstd_init_dstream(wksp->window_size, wksp->mem, wksp->mem_size);
 
 	if (!stream) {
 		ERROR("Failed to initialize zstd decompressor\n");
@@ -116,14 +116,14 @@
 		}
 
 		total_out -= out_buf.pos;
-		zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf);
+		zstd_err = zstd_decompress_stream(stream, &out_buf, &in_buf);
 		total_out += out_buf.pos; /* add the additional data produced */
 		if (zstd_err == 0)
 			break;
 
-		if (ZSTD_isError(zstd_err)) {
+		if (zstd_is_error(zstd_err)) {
 			ERROR("zstd decompression error: %d\n",
-					(int)ZSTD_getErrorCode(zstd_err));
+					(int)zstd_get_error_code(zstd_err));
 			error = -EIO;
 			break;
 		}
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 70abdfa..42e3e55 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -31,6 +31,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/bio.h>
+#include <linux/iversion.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
@@ -43,7 +44,7 @@
 	struct fileIdentDesc *fi = NULL;
 	struct fileIdentDesc cfi;
 	udf_pblk_t block, iblock;
-	loff_t nf_pos;
+	loff_t nf_pos, emit_pos = 0;
 	int flen;
 	unsigned char *fname = NULL, *copy_name = NULL;
 	unsigned char *nameptr;
@@ -57,6 +58,7 @@
 	int i, num, ret = 0;
 	struct extent_position epos = { NULL, 0, {0, 0} };
 	struct super_block *sb = dir->i_sb;
+	bool pos_valid = false;
 
 	if (ctx->pos == 0) {
 		if (!dir_emit_dot(file, ctx))
@@ -67,6 +69,21 @@
 	if (nf_pos >= size)
 		goto out;
 
+	/*
+	 * Something changed since last readdir (either lseek was called or dir
+	 * changed)?  We need to verify the position correctly points at the
+	 * beginning of some dir entry so that the directory parsing code does
+	 * not get confused. Since UDF does not have any reliable way of
+	 * identifying beginning of dir entry (names are under user control),
+	 * we need to scan the directory from the beginning.
+	 */
+	if (!inode_eq_iversion(dir, file->f_version)) {
+		emit_pos = nf_pos;
+		nf_pos = 0;
+	} else {
+		pos_valid = true;
+	}
+
 	fname = kmalloc(UDF_NAME_LEN, GFP_NOFS);
 	if (!fname) {
 		ret = -ENOMEM;
@@ -122,13 +139,21 @@
 
 	while (nf_pos < size) {
 		struct kernel_lb_addr tloc;
+		loff_t cur_pos = nf_pos;
 
-		ctx->pos = (nf_pos >> 2) + 1;
+		/* Update file position only if we got past the current one */
+		if (nf_pos >= emit_pos) {
+			ctx->pos = (nf_pos >> 2) + 1;
+			pos_valid = true;
+		}
 
 		fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc,
 					&elen, &offset);
 		if (!fi)
 			goto out;
+		/* Still not at offset where user asked us to read from? */
+		if (cur_pos < emit_pos)
+			continue;
 
 		liu = le16_to_cpu(cfi.lengthOfImpUse);
 		lfi = cfi.lengthFileIdent;
@@ -186,8 +211,11 @@
 	} /* end while */
 
 	ctx->pos = (nf_pos >> 2) + 1;
+	pos_valid = true;
 
 out:
+	if (pos_valid)
+		file->f_version = inode_query_iversion(dir);
 	if (fibh.sbh != fibh.ebh)
 		brelse(fibh.ebh);
 	brelse(fibh.sbh);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index caeef08..0ed4861 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -30,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/crc-itu-t.h>
 #include <linux/exportfs.h>
+#include <linux/iversion.h>
 
 static inline int udf_match(int len1, const unsigned char *name1, int len2,
 			    const unsigned char *name2)
@@ -134,6 +135,8 @@
 			mark_buffer_dirty_inode(fibh->ebh, inode);
 		mark_buffer_dirty_inode(fibh->sbh, inode);
 	}
+	inode_inc_iversion(inode);
+
 	return 0;
 }
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 34247fb..f26b5e0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -57,6 +57,7 @@
 #include <linux/crc-itu-t.h>
 #include <linux/log2.h>
 #include <asm/byteorder.h>
+#include <linux/iversion.h>
 
 #include "udf_sb.h"
 #include "udf_i.h"
@@ -149,6 +150,7 @@
 	init_rwsem(&ei->i_data_sem);
 	ei->cached_extent.lstart = -1;
 	spin_lock_init(&ei->i_extent_cache_lock);
+	inode_set_iversion(&ei->vfs_inode, 1);
 
 	return &ei->vfs_inode;
 }
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index d7d875c..1e4ee04 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -248,6 +248,7 @@
 		spin_unlock(&mp->m_perag_lock);
 		radix_tree_preload_end();
 
+#ifdef __KERNEL__
 		/* Place kernel structure only init below this point. */
 		spin_lock_init(&pag->pag_ici_lock);
 		spin_lock_init(&pag->pagb_lock);
@@ -257,6 +258,7 @@
 		init_waitqueue_head(&pag->pagb_wait);
 		pag->pagb_count = 0;
 		pag->pagb_tree = RB_ROOT;
+#endif /* __KERNEL__ */
 
 		error = xfs_buf_hash_init(pag);
 		if (error)
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 3f597ca..e411d51 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -64,6 +64,10 @@
 	/* Blocks reserved for the reverse mapping btree. */
 	struct xfs_ag_resv	pag_rmapbt_resv;
 
+	/* for rcu-safe freeing */
+	struct rcu_head	rcu_head;
+
+#ifdef __KERNEL__
 	/* -- kernel only structures below this line -- */
 
 	/*
@@ -90,9 +94,6 @@
 	spinlock_t	pag_buf_lock;	/* lock for pag_buf_hash */
 	struct rhashtable pag_buf_hash;
 
-	/* for rcu-safe freeing */
-	struct rcu_head	rcu_head;
-
 	/* background prealloc block trimming */
 	struct delayed_work	pag_blockgc_work;
 
@@ -102,6 +103,7 @@
 	 * or have some other means to control concurrency.
 	 */
 	struct rhashtable	pagi_unlinked_hash;
+#endif /* __KERNEL__ */
 };
 
 int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index fbc9d81..23523b8 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -1077,21 +1077,18 @@
 
 	state = xfs_da_state_alloc(args);
 	if (statep != NULL)
-		*statep = NULL;
+		*statep = state;
 
 	/*
 	 * Search to see if name exists, and get back a pointer to it.
 	 */
 	error = xfs_da3_node_lookup_int(state, &retval);
-	if (error) {
-		xfs_da_state_free(state);
-		return error;
-	}
+	if (error)
+		retval = error;
 
-	if (statep != NULL)
-		*statep = state;
-	else
+	if (!statep)
 		xfs_da_state_free(state);
+
 	return retval;
 }
 
@@ -1112,7 +1109,7 @@
 	 */
 	retval = xfs_attr_node_hasname(args, &dac->da_state);
 	if (retval != -ENOATTR && retval != -EEXIST)
-		return retval;
+		goto error;
 
 	if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
 		goto error;
@@ -1337,7 +1334,7 @@
 
 	error = xfs_attr_node_hasname(args, state);
 	if (error != -EEXIST)
-		return error;
+		goto out;
 	error = 0;
 
 	ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index b4e19aa..f18a875 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2785,6 +2785,7 @@
 	return error;
 }
 
+#ifdef __KERNEL__
 struct xfs_btree_split_args {
 	struct xfs_btree_cur	*cur;
 	int			level;
@@ -2870,6 +2871,9 @@
 	destroy_work_on_stack(&args.work);
 	return args.result;
 }
+#else
+#define xfs_btree_split	__xfs_btree_split
+#endif /* __KERNEL__ */
 
 
 /*
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index dd7a2db..9dc1ecb 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -864,7 +864,6 @@
 {
 	struct xfs_da_intnode	*node1;
 	struct xfs_da_intnode	*node2;
-	struct xfs_da_intnode	*tmpnode;
 	struct xfs_da_node_entry *btree1;
 	struct xfs_da_node_entry *btree2;
 	struct xfs_da_node_entry *btree_s;
@@ -894,9 +893,7 @@
 	    ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
 	     (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
 			be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
-		tmpnode = node1;
-		node1 = node2;
-		node2 = tmpnode;
+		swap(node1, node2);
 		xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
 		xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
 		btree1 = nodehdr1.btree;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index e147200..da4af21 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -289,22 +289,6 @@
 	trace_xfs_perag_clear_inode_tag(mp, pag->pag_agno, tag, _RET_IP_);
 }
 
-static inline void
-xfs_inew_wait(
-	struct xfs_inode	*ip)
-{
-	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT);
-	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
-
-	do {
-		prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-		if (!xfs_iflags_test(ip, XFS_INEW))
-			break;
-		schedule();
-	} while (true);
-	finish_wait(wq, &wait.wq_entry);
-}
-
 /*
  * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
  * part of the structure. This is made more complex by the fact we store
@@ -368,18 +352,13 @@
 	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 	error = xfs_reinit_inode(mp, inode);
 	if (error) {
-		bool	wake;
-
 		/*
 		 * Re-initializing the inode failed, and we are in deep
 		 * trouble.  Try to re-add it to the reclaim list.
 		 */
 		rcu_read_lock();
 		spin_lock(&ip->i_flags_lock);
-		wake = !!__xfs_iflags_test(ip, XFS_INEW);
 		ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
-		if (wake)
-			wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
 		ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
 		spin_unlock(&ip->i_flags_lock);
 		rcu_read_unlock();
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index e635a3d..c447bf0 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -231,8 +231,7 @@
 #define XFS_IRECLAIM		(1 << 0) /* started reclaiming this inode */
 #define XFS_ISTALE		(1 << 1) /* inode has been staled */
 #define XFS_IRECLAIMABLE	(1 << 2) /* inode can be reclaimed */
-#define __XFS_INEW_BIT		3	 /* inode has just been allocated */
-#define XFS_INEW		(1 << __XFS_INEW_BIT)
+#define XFS_INEW		(1 << 3) /* inode has just been allocated */
 #define XFS_IPRESERVE_DM_FIELDS	(1 << 4) /* has legacy DMAPI fields set */
 #define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
 #define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
@@ -492,7 +491,6 @@
 	xfs_iflags_clear(ip, XFS_INEW);
 	barrier();
 	unlock_new_inode(VFS_I(ip));
-	wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
 }
 
 static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index fedc0df..4f07afa 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -50,13 +50,7 @@
 {
 }
 
-static inline void flush_dcache_folio(struct folio *folio) { }
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-#endif
-
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-void flush_dcache_folio(struct folio *folio);
 #endif
 
 #ifndef flush_dcache_mmap_lock
diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h
index aafd073..b84693f 100644
--- a/include/drm/drm_modeset_lock.h
+++ b/include/drm/drm_modeset_lock.h
@@ -24,6 +24,8 @@
 #ifndef DRM_MODESET_LOCK_H_
 #define DRM_MODESET_LOCK_H_
 
+#include <linux/types.h> /* stackdepot.h is not self-contained */
+#include <linux/stackdepot.h>
 #include <linux/ww_mutex.h>
 
 struct drm_modeset_lock;
@@ -52,6 +54,12 @@
 	struct drm_modeset_lock *contended;
 
 	/*
+	 * Stack depot for debugging when a contended lock was not backed off
+	 * from.
+	 */
+	depot_stack_handle_t stack_depot;
+
+	/*
 	 * list of held locks (drm_modeset_lock)
 	 */
 	struct list_head locked;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 76d7c33..cd785cf 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -351,9 +351,10 @@
  * @bo: Pointer to a ttm_buffer_object to be initialized.
  * @size: Requested size of buffer object.
  * @type: Requested type of buffer object.
- * @flags: Initial placement flags.
+ * @placement: Initial placement for buffer object.
  * @page_alignment: Data alignment in pages.
  * @ctx: TTM operation context for memory allocation.
+ * @sg: Scatter-gather table.
  * @resv: Pointer to a dma_resv, or NULL to let ttm allocate one.
  * @destroy: Destroy function. Use NULL for kfree().
  *
@@ -394,7 +395,7 @@
  * @bo: Pointer to a ttm_buffer_object to be initialized.
  * @size: Requested size of buffer object.
  * @type: Requested type of buffer object.
- * @flags: Initial placement flags.
+ * @placement: Initial placement for buffer object.
  * @page_alignment: Data alignment in pages.
  * @interruptible: If needing to sleep to wait for GPU resources,
  * sleep interruptible.
@@ -402,6 +403,7 @@
  * holds a pointer to a persistent shmem object. Typically, this would
  * point to the shmem object backing a GEM object if TTM is used to back a
  * GEM user interface.
+ * @sg: Scatter-gather table.
  * @resv: Pointer to a dma_resv, or NULL to let ttm allocate one.
  * @destroy: Destroy function. Use NULL for kfree().
  *
@@ -582,8 +584,7 @@
 
 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 				    pgprot_t prot,
-				    pgoff_t num_prefault,
-				    pgoff_t fault_page_size);
+				    pgoff_t num_prefault);
 
 vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf);
 
diff --git a/include/dt-bindings/clock/jz4725b-cgu.h b/include/dt-bindings/clock/ingenic,jz4725b-cgu.h
similarity index 100%
rename from include/dt-bindings/clock/jz4725b-cgu.h
rename to include/dt-bindings/clock/ingenic,jz4725b-cgu.h
diff --git a/include/dt-bindings/clock/jz4740-cgu.h b/include/dt-bindings/clock/ingenic,jz4740-cgu.h
similarity index 100%
rename from include/dt-bindings/clock/jz4740-cgu.h
rename to include/dt-bindings/clock/ingenic,jz4740-cgu.h
diff --git a/include/dt-bindings/clock/jz4760-cgu.h b/include/dt-bindings/clock/ingenic,jz4760-cgu.h
similarity index 100%
rename from include/dt-bindings/clock/jz4760-cgu.h
rename to include/dt-bindings/clock/ingenic,jz4760-cgu.h
diff --git a/include/dt-bindings/clock/jz4770-cgu.h b/include/dt-bindings/clock/ingenic,jz4770-cgu.h
similarity index 100%
rename from include/dt-bindings/clock/jz4770-cgu.h
rename to include/dt-bindings/clock/ingenic,jz4770-cgu.h
diff --git a/include/dt-bindings/clock/jz4780-cgu.h b/include/dt-bindings/clock/ingenic,jz4780-cgu.h
similarity index 100%
rename from include/dt-bindings/clock/jz4780-cgu.h
rename to include/dt-bindings/clock/ingenic,jz4780-cgu.h
diff --git a/include/dt-bindings/clock/x1000-cgu.h b/include/dt-bindings/clock/ingenic,x1000-cgu.h
similarity index 100%
rename from include/dt-bindings/clock/x1000-cgu.h
rename to include/dt-bindings/clock/ingenic,x1000-cgu.h
diff --git a/include/dt-bindings/clock/x1830-cgu.h b/include/dt-bindings/clock/ingenic,x1830-cgu.h
similarity index 100%
rename from include/dt-bindings/clock/x1830-cgu.h
rename to include/dt-bindings/clock/ingenic,x1830-cgu.h
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 143ce7e..b28f879 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -974,6 +974,15 @@
 	return -ENODEV;
 }
 
+static inline int acpi_register_wakeup_handler(int wake_irq,
+	bool (*wakeup)(void *context), void *context)
+{
+	return -ENXIO;
+}
+
+static inline void acpi_unregister_wakeup_handler(
+	bool (*wakeup)(void *context), void *context) { }
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
@@ -1173,7 +1182,6 @@
 
 struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 					    struct fwnode_handle *child);
-struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode);
 
 struct acpi_probe_entry;
 typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
@@ -1279,12 +1287,6 @@
 }
 
 static inline struct fwnode_handle *
-acpi_node_get_parent(const struct fwnode_handle *fwnode)
-{
-	return NULL;
-}
-
-static inline struct fwnode_handle *
 acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
 			     struct fwnode_handle *prev)
 {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f715e88..e7a163a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -193,7 +193,7 @@
 	atomic64_t usercnt;
 	struct work_struct work;
 	struct mutex freeze_mutex;
-	u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
+	atomic64_t writecnt;
 };
 
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -1419,6 +1419,7 @@
 void *bpf_map_area_alloc(u64 size, int numa_node);
 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
 void bpf_map_area_free(void *base);
+bool bpf_map_write_active(const struct bpf_map *map);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 int  generic_map_lookup_batch(struct bpf_map *map,
 			      const union bpf_attr *attr,
diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
new file mode 100644
index 0000000..fef8b60
--- /dev/null
+++ b/include/linux/cacheflush.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CACHEFLUSH_H
+#define _LINUX_CACHEFLUSH_H
+
+#include <asm/cacheflush.h>
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio);
+#endif
+#else
+static inline void flush_dcache_folio(struct folio *folio)
+{
+}
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0
+#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
+
+#endif /* _LINUX_CACHEFLUSH_H */
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index bc2699f..7ad6c3d 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -302,6 +302,8 @@
 	CEPH_SESSION_REQUEST_FLUSH_MDLOG,
 };
 
+#define CEPH_SESSION_BLOCKLISTED	(1 << 0)  /* session blocklisted */
+
 extern const char *ceph_session_op_name(int op);
 
 struct ceph_mds_session_head {
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 83fa08a..3431011 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -475,6 +475,14 @@
 				       u64 expected_object_size,
 				       u64 expected_write_size,
 				       u32 flags);
+extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+				     u64 src_snapid, u64 src_version,
+				     struct ceph_object_id *src_oid,
+				     struct ceph_object_locator *src_oloc,
+				     u32 src_fadvise_flags,
+				     u32 dst_fadvise_flags,
+				     u32 truncate_seq, u64 truncate_size,
+				     u8 copy_from_flags);
 
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
@@ -515,17 +523,6 @@
 		   struct page *req_page, size_t req_len,
 		   struct page **resp_pages, size_t *resp_len);
 
-int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
-			u64 src_snapid, u64 src_version,
-			struct ceph_object_id *src_oid,
-			struct ceph_object_locator *src_oloc,
-			u32 src_fadvise_flags,
-			struct ceph_object_id *dst_oid,
-			struct ceph_object_locator *dst_oloc,
-			u32 dst_fadvise_flags,
-			u32 truncate_seq, u64 truncate_size,
-			u8 copy_from_flags);
-
 /* watch/notify */
 struct ceph_osd_linger_request *
 ceph_osdc_watch(struct ceph_osd_client *osdc,
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 8b6c2063..dbd235a 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -170,15 +170,20 @@
 	/** @index: index into the shared fences */
 	unsigned int index;
 
-	/** @fences: the shared fences */
+	/** @fences: the shared fences; private, *MUST* not dereference  */
 	struct dma_resv_list *fences;
 
+	/** @shared_count: number of shared fences */
+	unsigned int shared_count;
+
 	/** @is_restarted: true if this is the first returned fence */
 	bool is_restarted;
 };
 
 struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor);
 struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor);
+struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor);
+struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor);
 
 /**
  * dma_resv_iter_begin - initialize a dma_resv_iter object
@@ -244,6 +249,24 @@
 	for (fence = dma_resv_iter_first_unlocked(cursor);		\
 	     fence; fence = dma_resv_iter_next_unlocked(cursor))
 
+/**
+ * dma_resv_for_each_fence - fence iterator
+ * @cursor: a struct dma_resv_iter pointer
+ * @obj: a dma_resv object pointer
+ * @all_fences: true if all fences should be returned
+ * @fence: the current fence
+ *
+ * Iterate over the fences in a struct dma_resv object while holding the
+ * &dma_resv.lock. @all_fences controls if the shared fences are returned as
+ * well. The cursor initialisation is part of the iterator and the fence stays
+ * valid as long as the lock is held and so no extra reference to the fence is
+ * taken.
+ */
+#define dma_resv_for_each_fence(cursor, obj, all_fences, fence)	\
+	for (dma_resv_iter_begin(cursor, obj, all_fences),	\
+	     fence = dma_resv_iter_first(cursor); fence;	\
+	     fence = dma_resv_iter_next(cursor))
+
 #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base)
 #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6b5d36b..dbd39b2 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -362,6 +362,7 @@
 
 /* OEM GUIDs */
 #define DELLEMC_EFI_RCI2_TABLE_GUID		EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
+#define AMD_SEV_MEM_ENCRYPT_GUID		EFI_GUID(0x0cf29b71, 0x9e51, 0x433a,  0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
 
 typedef struct {
 	efi_guid_t guid;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 5950f8f..6f3db99 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -262,7 +262,7 @@
 
 	/* Draws a rectangle */
 	void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect);
-	/* Copy data from area to another */
+	/* Copy data from area to another. Obsolete. */
 	void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region);
 	/* Draws a image to the display */
 	void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1cb616f..bbf812c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2518,7 +2518,6 @@
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM	16	/* Disable fanotify permission events */
 #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
-#define FS_THP_SUPPORT		8192	/* Remove once all fs converted */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	int (*init_fs_context)(struct fs_context *);
 	const struct fs_parameter_spec *parameters;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 462634b..74c4102 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -205,9 +205,9 @@
 void disk_uevent(struct gendisk *disk, enum kobject_action action);
 
 /* block/genhd.c */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-		const struct attribute_group **groups);
-static inline int add_disk(struct gendisk *disk)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups);
+static inline int __must_check add_disk(struct gendisk *disk)
 {
 	return device_add_disk(NULL, disk, NULL);
 }
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 25aff0f..39bb9b4 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -5,12 +5,11 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/cacheflush.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 
-#include <asm/cacheflush.h>
-
 #include "highmem-internal.h"
 
 /**
@@ -231,10 +230,10 @@
  * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
  * If we pass in a head page, we can zero up to the size of the compound page.
  */
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#ifdef CONFIG_HIGHMEM
 void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2);
-#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#else
 static inline void zero_user_segments(struct page *page,
 		unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2)
@@ -254,7 +253,7 @@
 	for (i = 0; i < compound_nr(page); i++)
 		flush_dcache_page(page + i);
 }
-#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#endif
 
 static inline void zero_user_segment(struct page *page,
 	unsigned start, unsigned end)
@@ -364,4 +363,42 @@
 	kunmap_local(addr);
 }
 
+/**
+ * folio_zero_segments() - Zero two byte ranges in a folio.
+ * @folio: The folio to write to.
+ * @start1: The first byte to zero.
+ * @xend1: One more than the last byte in the first range.
+ * @start2: The first byte to zero in the second range.
+ * @xend2: One more than the last byte in the second range.
+ */
+static inline void folio_zero_segments(struct folio *folio,
+		size_t start1, size_t xend1, size_t start2, size_t xend2)
+{
+	zero_user_segments(&folio->page, start1, xend1, start2, xend2);
+}
+
+/**
+ * folio_zero_segment() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @xend: One more than the last byte to zero.
+ */
+static inline void folio_zero_segment(struct folio *folio,
+		size_t start, size_t xend)
+{
+	zero_user_segments(&folio->page, start, xend, 0, 0);
+}
+
+/**
+ * folio_zero_range() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @length: The number of bytes to zero.
+ */
+static inline void folio_zero_range(struct folio *folio,
+		size_t start, size_t length)
+{
+	zero_user_segments(&folio->page, start, start + length, 0, 0);
+}
+
 #endif /* _LINUX_HIGHMEM_H */
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index c137396..ba025ae 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -128,6 +128,13 @@
 		css_get(resv_map->css);
 }
 
+static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+	if (resv_map->css)
+		css_put(resv_map->css);
+}
+
 extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					struct hugetlb_cgroup **ptr);
 extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
@@ -211,6 +218,11 @@
 {
 }
 
+static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+}
+
 static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					       struct hugetlb_cgroup **ptr)
 {
diff --git a/include/linux/input/cy8ctmg110_pdata.h b/include/linux/input/cy8ctmg110_pdata.h
deleted file mode 100644
index ee1d445..0000000
--- a/include/linux/input/cy8ctmg110_pdata.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_CY8CTMG110_PDATA_H
-#define _LINUX_CY8CTMG110_PDATA_H
-
-struct cy8ctmg110_pdata
-{
-	int reset_pin;		/* Reset pin is wired to this GPIO (optional) */
-};
-
-#endif
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index aee8ff4..f45f133 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -9,7 +9,7 @@
 #define _INTEL_ISH_CLIENT_IF_H_
 
 #include <linux/device.h>
-#include <linux/uuid.h>
+#include <linux/mod_devicetable.h>
 
 struct ishtp_cl_device;
 struct ishtp_device;
@@ -40,7 +40,7 @@
 struct ishtp_cl_driver {
 	struct device_driver driver;
 	const char *name;
-	const guid_t *guid;
+	const struct ishtp_device_id *id;
 	int (*probe)(struct ishtp_cl_device *dev);
 	void (*remove)(struct ishtp_cl_device *dev);
 	int (*reset)(struct ishtp_cl_device *dev);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 05e2277..b75395e 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -131,6 +131,16 @@
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	if (ns) {
+		if (refcount_inc_not_zero(&ns->ns.count))
+			return ns;
+	}
+
+	return NULL;
+}
+
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -147,6 +157,11 @@
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	return ns;
+}
+
 static inline void put_ipc_ns(struct ipc_namespace *ns)
 {
 }
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 968b4c4..77755ac 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -85,7 +85,7 @@
 struct completion;
 struct user;
 
-#ifdef CONFIG_PREEMPT_VOLUNTARY
+#ifdef CONFIG_PREEMPT_VOLUNTARY_BUILD
 
 extern int __cond_resched(void);
 # define might_resched() __cond_resched()
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 60a35d9..c310648 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -150,7 +150,7 @@
 #define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_UNBLOCK           2
 #define KVM_REQ_UNHALT            3
-#define KVM_REQ_VM_BUGGED         (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VM_DEAD           (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQUEST_ARCH_BASE     8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -617,6 +617,7 @@
 	unsigned int max_halt_poll_ns;
 	u32 dirty_ring_size;
 	bool vm_bugged;
+	bool vm_dead;
 
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
 	struct notifier_block pm_notifier;
@@ -650,12 +651,19 @@
 #define vcpu_err(vcpu, fmt, ...)					\
 	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
+static inline void kvm_vm_dead(struct kvm *kvm)
+{
+	kvm->vm_dead = true;
+	kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD);
+}
+
 static inline void kvm_vm_bugged(struct kvm *kvm)
 {
 	kvm->vm_bugged = true;
-	kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
+	kvm_vm_dead(kvm);
 }
 
+
 #define KVM_BUG(cond, kvm, fmt...)				\
 ({								\
 	int __ret = (cond);					\
@@ -866,7 +874,7 @@
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@ -942,12 +950,8 @@
 kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
 kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-		struct gfn_to_pfn_cache *cache, bool atomic);
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
-		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
 unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
 int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2237abb..234eab0 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -53,13 +53,6 @@
 	struct kvm_memory_slot *memslot;
 };
 
-struct gfn_to_pfn_cache {
-	u64 generation;
-	gfn_t gfn;
-	kvm_pfn_t pfn;
-	bool dirty;
-};
-
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 /*
  * Memory caches are used to preallocate memory ahead of various MMU flows,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2884383..2a8404b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -394,7 +394,7 @@
 	/* This should match the actual table size of
 	 * ata_eh_cmd_timeout_table in libata-eh.c.
 	 */
-	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6,
+	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7,
 
 	/* Horkage types. May be set by libata or controller on drives
 	   (some horkage may be drive/controller pair dependent */
@@ -427,6 +427,7 @@
 	ATA_HORKAGE_MAX_SEC_1024 = (1 << 25),	/* Limit max sects to 1024 */
 	ATA_HORKAGE_MAX_TRIM_128M = (1 << 26),	/* Limit max trim size to 128M */
 	ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27),	/* Disable NCQ on ATI chipset */
+	ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28),	/* Identify device log missing */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 442a611..df8de62 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -335,8 +335,6 @@
 	 struct sockaddr *address, int addrlen)
 LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
-LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc,
-	 struct sk_buff *skb)
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d682321..d45b6f6 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1050,11 +1050,6 @@
  *	@asoc pointer to current sctp association structure.
  *	@sk pointer to current sock structure.
  *	@newsk pointer to new sock structure.
- * @sctp_assoc_established:
- *	Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet
- *	to the security module.
- *	@asoc pointer to sctp association structure.
- *	@skb pointer to skbuff of association packet.
  *
  * Security hooks for Infiniband
  *
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 97afcea..8b18fe9 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -145,13 +145,13 @@
 	GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
 		ESW_TUN_OPTS_OFFSET + 1)
 
-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
 struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
-static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
+static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
 {
 	return MLX5_ESWITCH_NONE;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bb8c6f5..c3a6e62 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -105,7 +105,18 @@
 			struct page_pool *pp;
 			unsigned long _pp_mapping_pad;
 			unsigned long dma_addr;
-			atomic_long_t pp_frag_count;
+			union {
+				/**
+				 * dma_addr_upper: might require a 64-bit
+				 * value on 32-bit architectures.
+				 */
+				unsigned long dma_addr_upper;
+				/**
+				 * For frag page support, not supported in
+				 * 32-bit architectures with 64-bit DMA.
+				 */
+				atomic_long_t pp_frag_count;
+			};
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index ae2e75d..4bb7197 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -895,4 +895,18 @@
 	kernel_ulong_t driver_data;
 };
 
+/* ISHTP (Integrated Sensor Hub Transport Protocol) */
+
+#define ISHTP_MODULE_PREFIX	"ishtp:"
+
+/**
+ * struct ishtp_device_id - ISHTP device identifier
+ * @guid: GUID of the device.
+ * @driver_data: pointer to driver specific data
+ */
+struct ishtp_device_id {
+	guid_t guid;
+	kernel_ulong_t driver_data;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 49cf6eb..e616f94 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -148,7 +148,7 @@
 				u8	is_msix		: 1;
 				u8	multiple	: 3;
 				u8	multi_cap	: 3;
-				u8	maskbit		: 1;
+				u8	can_mask	: 1;
 				u8	is_64		: 1;
 				u8	is_virtual	: 1;
 				u16	entry_nr;
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 12c4177..ca0683b 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -166,13 +166,13 @@
 	short			error;		/* 0 or error that occurred */
 	loff_t			i_size;		/* Size of the file */
 	loff_t			start;		/* Start position */
-	pgoff_t			no_unlock_page;	/* Don't unlock this page after read */
+	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
 	refcount_t		usage;
 	unsigned long		flags;
 #define NETFS_RREQ_INCOMPLETE_IO	0	/* Some ioreqs terminated short or with error */
 #define NETFS_RREQ_WRITE_TO_CACHE	1	/* Need to write to the cache */
-#define NETFS_RREQ_NO_UNLOCK_PAGE	2	/* Don't unlock no_unlock_page on completion */
-#define NETFS_RREQ_DONT_UNLOCK_PAGES	3	/* Don't unlock the pages on completion */
+#define NETFS_RREQ_NO_UNLOCK_FOLIO	2	/* Don't unlock no_unlock_folio on completion */
+#define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
 	const struct netfs_read_request_ops *netfs_ops;
@@ -190,7 +190,7 @@
 	void (*issue_op)(struct netfs_read_subrequest *subreq);
 	bool (*is_still_valid)(struct netfs_read_request *rreq);
 	int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
-				 struct page *page, void **_fsdata);
+				 struct folio *folio, void **_fsdata);
 	void (*done)(struct netfs_read_request *rreq);
 	void (*cleanup)(struct address_space *mapping, void *netfs_priv);
 };
@@ -240,11 +240,11 @@
 			    const struct netfs_read_request_ops *,
 			    void *);
 extern int netfs_readpage(struct file *,
-			  struct page *,
+			  struct folio *,
 			  const struct netfs_read_request_ops *,
 			  void *);
 extern int netfs_write_begin(struct file *, struct address_space *,
-			     loff_t, unsigned int, unsigned int, struct page **,
+			     loff_t, unsigned int, unsigned int, struct folio **,
 			     void **,
 			     const struct netfs_read_request_ops *,
 			     void *);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 52ec4b5..b5f14d5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -686,13 +686,13 @@
 
 __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
 
-/* Whether there are one or multiple pages in a folio */
-static inline bool folio_test_single(struct folio *folio)
-{
-	return !folio_test_head(folio);
-}
-
-static inline bool folio_test_multi(struct folio *folio)
+/**
+ * folio_test_large() - Does this folio contain more than one page?
+ * @folio: The folio to test.
+ *
+ * Return: True if the folio is larger than one page.
+ */
+static inline bool folio_test_large(struct folio *folio)
 {
 	return folio_test_head(folio);
 }
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6a30916..6052464 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -84,7 +84,7 @@
 	AS_EXITING	= 4, 	/* final truncate in progress */
 	/* writeback related tags are not used */
 	AS_NO_WRITEBACK_TAGS = 5,
-	AS_THP_SUPPORT = 6,	/* THPs supported */
+	AS_LARGE_FOLIO_SUPPORT = 6,
 };
 
 /**
@@ -176,9 +176,25 @@
 	m->gfp_mask = mask;
 }
 
-static inline bool mapping_thp_support(struct address_space *mapping)
+/**
+ * mapping_set_large_folios() - Indicate the file supports large folios.
+ * @mapping: The file.
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate that the VFS can use large folios to cache the contents of
+ * the file.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_large_folios(struct address_space *mapping)
 {
-	return test_bit(AS_THP_SUPPORT, &mapping->flags);
+	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+}
+
+static inline bool mapping_large_folio_support(struct address_space *mapping)
+{
+	return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
 static inline int filemap_nr_thps(struct address_space *mapping)
@@ -193,7 +209,7 @@
 static inline void filemap_nr_thps_inc(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-	if (!mapping_thp_support(mapping))
+	if (!mapping_large_folio_support(mapping))
 		atomic_inc(&mapping->nr_thps);
 #else
 	WARN_ON_ONCE(1);
@@ -203,7 +219,7 @@
 static inline void filemap_nr_thps_dec(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-	if (!mapping_thp_support(mapping))
+	if (!mapping_large_folio_support(mapping))
 		atomic_dec(&mapping->nr_thps);
 #else
 	WARN_ON_ONCE(1);
@@ -253,6 +269,20 @@
 	return folio_mapping(folio);
 }
 
+/**
+ * folio_inode - Get the host inode for this folio.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the inode that this folio
+ * belongs to.
+ *
+ * Do not call this for folios which aren't in the page cache.
+ */
+static inline struct inode *folio_inode(struct folio *folio)
+{
+	return folio->mapping->host;
+}
+
 static inline bool page_cache_add_speculative(struct page *page, int count)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
@@ -280,6 +310,25 @@
 }
 
 /**
+ * folio_change_private - Change private data on a folio.
+ * @folio: Folio to change the data on.
+ * @data: Data to set on the folio.
+ *
+ * Change the private data attached to a folio and return the old
+ * data.  The page must previously have had data attached and the data
+ * must be detached before the folio will be freed.
+ *
+ * Return: Data that was previously attached to the folio.
+ */
+static inline void *folio_change_private(struct folio *folio, void *data)
+{
+	void *old = folio_get_private(folio);
+
+	folio->private = data;
+	return old;
+}
+
+/**
  * folio_detach_private - Detach private data from a folio.
  * @folio: Folio to detach data from.
  *
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 138d764..18a75c8e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -233,6 +233,8 @@
 	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
 	/* Don't use Relaxed Ordering for TLPs directed at this device */
 	PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11),
+	/* Device does honor MSI masking despite saying otherwise */
+	PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
@@ -1666,6 +1668,7 @@
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
 
+void pci_dev_lock(struct pci_dev *dev);
 int pci_dev_trylock(struct pci_dev *dev);
 void pci_dev_unlock(struct pci_dev *dev);
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 98a9371..ae4004e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -6,7 +6,6 @@
 #include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
-#include <linux/printk.h>
 #include <linux/pfn.h>
 #include <linux/init.h>
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 00fef00..5bbcd28 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -184,8 +184,10 @@
 #endif
 
 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+void clear_posix_cputimers_work(struct task_struct *p);
 void posix_cputimers_init_work(void);
 #else
+static inline void clear_posix_cputimers_work(struct task_struct *p) { }
 static inline void posix_cputimers_init_work(void) { }
 #endif
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 85b656f..9497f6b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -198,6 +198,7 @@
 void show_regs_print_info(const char *log_lvl);
 extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
+void printk_trigger_flush(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -274,6 +275,9 @@
 static inline void dump_stack(void)
 {
 }
+static inline void printk_trigger_flush(void)
+{
+}
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index ae04968..9afd34a 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -37,6 +37,7 @@
 #define PTP_MSGTYPE_PDELAY_RESP 0x3
 
 #define PTP_EV_PORT 319
+#define PTP_GEN_PORT 320
 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */
 
 #define OFF_PTP_SOURCE_UUID	22 /* PTPv1 only */
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 725c9b7..e6dac95 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -429,16 +429,19 @@
 #else
 static inline struct pwm_device *pwm_request(int pwm_id, const char *label)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
 static inline void pwm_free(struct pwm_device *pwm)
 {
+	might_sleep();
 }
 
 static inline int pwm_apply_state(struct pwm_device *pwm,
 				  const struct pwm_state *state)
 {
+	might_sleep();
 	return -ENOTSUPP;
 }
 
@@ -450,6 +453,7 @@
 static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
 			     int period_ns)
 {
+	might_sleep();
 	return -EINVAL;
 }
 
@@ -462,11 +466,13 @@
 
 static inline int pwm_enable(struct pwm_device *pwm)
 {
+	might_sleep();
 	return -EINVAL;
 }
 
 static inline void pwm_disable(struct pwm_device *pwm)
 {
+	might_sleep();
 }
 
 static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
@@ -493,12 +499,14 @@
 						       unsigned int index,
 						       const char *label)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
 static inline struct pwm_device *pwm_get(struct device *dev,
 					 const char *consumer)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
@@ -506,16 +514,19 @@
 					    struct device_node *np,
 					    const char *con_id)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
 static inline void pwm_put(struct pwm_device *pwm)
 {
+	might_sleep();
 }
 
 static inline struct pwm_device *devm_pwm_get(struct device *dev,
 					      const char *consumer)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
@@ -523,6 +534,7 @@
 						 struct device_node *np,
 						 const char *con_id)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
@@ -530,6 +542,7 @@
 devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode,
 		    const char *con_id)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 #endif
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index bd611e2..47fd1c2 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -66,6 +66,8 @@
 	int (*alarm_irq_enable)(struct device *, unsigned int enabled);
 	int (*read_offset)(struct device *, long *offset);
 	int (*set_offset)(struct device *, long offset);
+	int (*param_get)(struct device *, struct rtc_param *param);
+	int (*param_set)(struct device *, struct rtc_param *param);
 };
 
 struct rtc_device;
@@ -80,6 +82,7 @@
 
 /* flags */
 #define RTC_DEV_BUSY 0
+#define RTC_NO_CDEV  1
 
 struct rtc_device {
 	struct device dev;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2350539..33a5064 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -352,6 +352,7 @@
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int);
 extern void force_fatal_sig(int);
+extern void force_exit_sig(int);
 extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ba88a69..058d7f3 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -158,7 +158,7 @@
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
  * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
+ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/include/linux/sdb.h b/include/linux/sdb.h
deleted file mode 100644
index a2404a2..0000000
--- a/include/linux/sdb.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the official version 1.1 of sdb.h
- */
-#ifndef __SDB_H__
-#define __SDB_H__
-#ifdef __KERNEL__
-#include <linux/types.h>
-#else
-#include <stdint.h>
-#endif
-
-/*
- * All structures are 64 bytes long and are expected
- * to live in an array, one for each interconnect.
- * Most fields of the structures are shared among the
- * various types, and most-specific fields are at the
- * beginning (for alignment reasons, and to keep the
- * magic number at the head of the interconnect record
- */
-
-/* Product, 40 bytes at offset 24, 8-byte aligned
- *
- * device_id is vendor-assigned; version is device-specific,
- * date is hex (e.g 0x20120501), name is UTF-8, blank-filled
- * and not terminated with a 0 byte.
- */
-struct sdb_product {
-	uint64_t		vendor_id;	/* 0x18..0x1f */
-	uint32_t		device_id;	/* 0x20..0x23 */
-	uint32_t		version;	/* 0x24..0x27 */
-	uint32_t		date;		/* 0x28..0x2b */
-	uint8_t			name[19];	/* 0x2c..0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/*
- * Component, 56 bytes at offset 8, 8-byte aligned
- *
- * The address range is first to last, inclusive
- * (for example 0x100000 - 0x10ffff)
- */
-struct sdb_component {
-	uint64_t		addr_first;	/* 0x08..0x0f */
-	uint64_t		addr_last;	/* 0x10..0x17 */
-	struct sdb_product	product;	/* 0x18..0x3f */
-};
-
-/* Type of the SDB record */
-enum sdb_record_type {
-	sdb_type_interconnect	= 0x00,
-	sdb_type_device		= 0x01,
-	sdb_type_bridge		= 0x02,
-	sdb_type_integration	= 0x80,
-	sdb_type_repo_url	= 0x81,
-	sdb_type_synthesis	= 0x82,
-	sdb_type_empty		= 0xFF,
-};
-
-/* Type 0: interconnect (first of the array)
- *
- * sdb_records is the length of the table including this first
- * record, version is 1. The bus type is enumerated later.
- */
-#define				SDB_MAGIC	0x5344422d /* "SDB-" */
-struct sdb_interconnect {
-	uint32_t		sdb_magic;	/* 0x00-0x03 */
-	uint16_t		sdb_records;	/* 0x04-0x05 */
-	uint8_t			sdb_version;	/* 0x06 */
-	uint8_t			sdb_bus_type;	/* 0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 1: device
- *
- * class is 0 for "custom device", other values are
- * to be standardized; ABI version is for the driver,
- * bus-specific bits are defined by each bus (see below)
- */
-struct sdb_device {
-	uint16_t		abi_class;	/* 0x00-0x01 */
-	uint8_t			abi_ver_major;	/* 0x02 */
-	uint8_t			abi_ver_minor;	/* 0x03 */
-	uint32_t		bus_specific;	/* 0x04-0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 2: bridge
- *
- * child is the address of the nested SDB table
- */
-struct sdb_bridge {
-	uint64_t		sdb_child;	/* 0x00-0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 0x80: integration
- *
- * all types with bit 7 set are meta-information, so
- * software can ignore the types it doesn't know. Here we
- * just provide product information for an aggregate device
- */
-struct sdb_integration {
-	uint8_t			reserved[24];	/* 0x00-0x17 */
-	struct sdb_product	product;	/* 0x08-0x3f */
-};
-
-/* Type 0x81: Top module repository url
- *
- * again, an informative field that software can ignore
- */
-struct sdb_repo_url {
-	uint8_t			repo_url[63];	/* 0x00-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* Type 0x82: Synthesis tool information
- *
- * this informative record
- */
-struct sdb_synthesis {
-	uint8_t			syn_name[16];	/* 0x00-0x0f */
-	uint8_t			commit_id[16];	/* 0x10-0x1f */
-	uint8_t			tool_name[8];	/* 0x20-0x27 */
-	uint32_t		tool_version;	/* 0x28-0x2b */
-	uint32_t		date;		/* 0x2c-0x2f */
-	uint8_t			user_name[15];	/* 0x30-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* Type 0xff: empty
- *
- * this allows keeping empty slots during development,
- * so they can be filled later with minimal efforts and
- * no misleading description is ever shipped -- hopefully.
- * It can also be used to pad a table to a desired length.
- */
-struct sdb_empty {
-	uint8_t			reserved[63];	/* 0x00-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* The type of bus, for bus-specific flags */
-enum sdb_bus_type {
-	sdb_wishbone = 0x00,
-	sdb_data     = 0x01,
-};
-
-#define SDB_WB_WIDTH_MASK	0x0f
-#define SDB_WB_ACCESS8			0x01
-#define SDB_WB_ACCESS16			0x02
-#define SDB_WB_ACCESS32			0x04
-#define SDB_WB_ACCESS64			0x08
-#define SDB_WB_LITTLE_ENDIAN	0x80
-
-#define SDB_DATA_READ		0x04
-#define SDB_DATA_WRITE		0x02
-#define SDB_DATA_EXEC		0x01
-
-#endif /* __SDB_H__ */
diff --git a/include/linux/security.h b/include/linux/security.h
index 06eac4e..bbf44a4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1430,8 +1430,6 @@
 			       struct sockaddr *address, int addrlen);
 void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
-void security_sctp_assoc_established(struct sctp_association *asoc,
-				     struct sk_buff *skb);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1651,11 +1649,6 @@
 					  struct sock *newsk)
 {
 }
-
-static inline void security_sctp_assoc_established(struct sctp_association *asoc,
-						   struct sk_buff *skb)
-{
-}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 686a666..c8cb7e6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4226,7 +4226,7 @@
 		return;
 	}
 
-	 if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
+	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
 		__skb_checksum_complete(skb);
 		skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
 	}
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 1a5eaef..d424c1a 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -1,17 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* linux/spi/ads7846.h */
 
-/* Touchscreen characteristics vary between boards and models.  The
- * platform_data for the device's "struct device" holds this information.
- *
- * It's OK if the min/max values are zero.
- */
-enum ads7846_filter {
-	ADS7846_FILTER_OK,
-	ADS7846_FILTER_REPEAT,
-	ADS7846_FILTER_IGNORE,
-};
-
 struct ads7846_platform_data {
 	u16	model;			/* 7843, 7845, 7846, 7873. */
 	u16	vref_delay_usecs;	/* 0 for external vref; etc */
@@ -51,10 +40,6 @@
 	int	gpio_pendown_debounce;	/* platform specific debounce time for
 					 * the gpio_pendown */
 	int	(*get_pendown_state)(void);
-	int	(*filter_init)	(const struct ads7846_platform_data *pdata,
-				 void **filter_data);
-	int	(*filter)	(void *filter_data, int data_idx, int *val);
-	void	(*filter_cleanup)(void *filter_data);
 	void	(*wait_for_sync)(void);
 	bool	wakeup;
 	unsigned long irq_flags;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 50453b2..2d167ac 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -673,7 +673,7 @@
 
 #define PERF_MAX_TRACE_SIZE	8192
 
-#define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
+#define MAX_FILTER_STR_VAL	256U	/* Should handle KSYM_SYMBOL_LEN */
 
 enum event_trigger_type {
 	ETT_NONE		= (0),
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 1eaaa93..329d63b 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -15,7 +15,7 @@
 #else
 #define MODULE_VERMAGIC_SMP ""
 #endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPT_BUILD
 #define MODULE_VERMAGIC_PREEMPT "preempt "
 #elif defined(CONFIG_PREEMPT_RT)
 #define MODULE_VERMAGIC_PREEMPT "preempt_rt "
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 44d0e09..41edbc0 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -152,7 +152,6 @@
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
- * @suppress_used_validation: set to not have core validate used length
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
@@ -169,7 +168,6 @@
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
-	bool suppress_used_validation;
 	int (*validate)(struct virtio_device *dev);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index b465f8f..04e87f4b 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -120,10 +120,15 @@
 
 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
+		unsigned int nh_off = p_off;
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
+		/* UFO may not include transport header in gso_size. */
+		if (gso_type & SKB_GSO_UDP)
+			nh_off -= thlen;
+
 		/* Too small packets are not really GSO ones. */
-		if (skb->len - p_off > gso_size) {
+		if (skb->len - nh_off > gso_size) {
 			shinfo->gso_size = gso_size;
 			shinfo->gso_type = gso_type;
 
diff --git a/include/linux/zstd.h b/include/linux/zstd.h
index e87f78c..113408e 100644
--- a/include/linux/zstd.h
+++ b/include/linux/zstd.h
@@ -1,138 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
  */
 
-#ifndef ZSTD_H
-#define ZSTD_H
-
-/* ======   Dependency   ======*/
-#include <linux/types.h>   /* size_t */
-
-
-/*-*****************************************************************************
- * Introduction
- *
- * zstd, short for Zstandard, is a fast lossless compression algorithm,
- * targeting real-time compression scenarios at zlib-level and better
- * compression ratios. The zstd compression library provides in-memory
- * compression and decompression functions. The library supports compression
- * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled
- * ultra, should be used with caution, as they require more memory.
- * Compression can be done in:
- *  - a single step, reusing a context (described as Explicit memory management)
- *  - unbounded multiple steps (described as Streaming compression)
- * The compression ratio achievable on small data can be highly improved using
- * compression with a dictionary in:
- *  - a single step (described as Simple dictionary API)
- *  - a single step, reusing a dictionary (described as Fast dictionary API)
- ******************************************************************************/
-
-/*======  Helper functions  ======*/
+#ifndef LINUX_ZSTD_H
+#define LINUX_ZSTD_H
 
 /**
- * enum ZSTD_ErrorCode - zstd error codes
- *
- * Functions that return size_t can be checked for errors using ZSTD_isError()
- * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode().
+ * This is a kernel-style API that wraps the upstream zstd API, which cannot be
+ * used directly because the symbols aren't exported. It exposes the minimal
+ * functionality which is currently required by users of zstd in the kernel.
+ * Expose extra functions from lib/zstd/zstd.h as needed.
  */
-typedef enum {
-	ZSTD_error_no_error,
-	ZSTD_error_GENERIC,
-	ZSTD_error_prefix_unknown,
-	ZSTD_error_version_unsupported,
-	ZSTD_error_parameter_unknown,
-	ZSTD_error_frameParameter_unsupported,
-	ZSTD_error_frameParameter_unsupportedBy32bits,
-	ZSTD_error_frameParameter_windowTooLarge,
-	ZSTD_error_compressionParameter_unsupported,
-	ZSTD_error_init_missing,
-	ZSTD_error_memory_allocation,
-	ZSTD_error_stage_wrong,
-	ZSTD_error_dstSize_tooSmall,
-	ZSTD_error_srcSize_wrong,
-	ZSTD_error_corruption_detected,
-	ZSTD_error_checksum_wrong,
-	ZSTD_error_tableLog_tooLarge,
-	ZSTD_error_maxSymbolValue_tooLarge,
-	ZSTD_error_maxSymbolValue_tooSmall,
-	ZSTD_error_dictionary_corrupted,
-	ZSTD_error_dictionary_wrong,
-	ZSTD_error_dictionaryCreation_failed,
-	ZSTD_error_maxCode
-} ZSTD_ErrorCode;
+
+/* ======   Dependency   ====== */
+#include <linux/types.h>
+#include <linux/zstd_errors.h>
+#include <linux/zstd_lib.h>
+
+/* ======   Helper Functions   ====== */
+/**
+ * zstd_compress_bound() - maximum compressed size in worst case scenario
+ * @src_size: The size of the data to compress.
+ *
+ * Return:    The maximum compressed size in the worst case scenario.
+ */
+size_t zstd_compress_bound(size_t src_size);
 
 /**
- * ZSTD_maxCLevel() - maximum compression level available
- *
- * Return: Maximum compression level available.
- */
-int ZSTD_maxCLevel(void);
-/**
- * ZSTD_compressBound() - maximum compressed size in worst case scenario
- * @srcSize: The size of the data to compress.
- *
- * Return:   The maximum compressed size in the worst case scenario.
- */
-size_t ZSTD_compressBound(size_t srcSize);
-/**
- * ZSTD_isError() - tells if a size_t function result is an error code
+ * zstd_is_error() - tells if a size_t function result is an error code
  * @code:  The function result to check for error.
  *
  * Return: Non-zero iff the code is an error.
  */
-static __attribute__((unused)) unsigned int ZSTD_isError(size_t code)
-{
-	return code > (size_t)-ZSTD_error_maxCode;
-}
-/**
- * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode
- * @functionResult: The result of a function for which ZSTD_isError() is true.
- *
- * Return:          The ZSTD_ErrorCode corresponding to the functionResult or 0
- *                  if the functionResult isn't an error.
- */
-static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(
-	size_t functionResult)
-{
-	if (!ZSTD_isError(functionResult))
-		return (ZSTD_ErrorCode)0;
-	return (ZSTD_ErrorCode)(0 - functionResult);
-}
+unsigned int zstd_is_error(size_t code);
 
 /**
- * enum ZSTD_strategy - zstd compression search strategy
- *
- * From faster to stronger.
+ * enum zstd_error_code - zstd error codes
  */
-typedef enum {
-	ZSTD_fast,
-	ZSTD_dfast,
-	ZSTD_greedy,
-	ZSTD_lazy,
-	ZSTD_lazy2,
-	ZSTD_btlazy2,
-	ZSTD_btopt,
-	ZSTD_btopt2
-} ZSTD_strategy;
+typedef ZSTD_ErrorCode zstd_error_code;
 
 /**
- * struct ZSTD_compressionParameters - zstd compression parameters
+ * zstd_get_error_code() - translates an error function result to an error code
+ * @code:  The function result for which zstd_is_error(code) is true.
+ *
+ * Return: A unique error code for this error.
+ */
+zstd_error_code zstd_get_error_code(size_t code);
+
+/**
+ * zstd_get_error_name() - translates an error function result to a string
+ * @code:  The function result for which zstd_is_error(code) is true.
+ *
+ * Return: An error string corresponding to the error code.
+ */
+const char *zstd_get_error_name(size_t code);
+
+/**
+ * zstd_min_clevel() - minimum allowed compression level
+ *
+ * Return: The minimum allowed compression level.
+ */
+int zstd_min_clevel(void);
+
+/**
+ * zstd_max_clevel() - maximum allowed compression level
+ *
+ * Return: The maximum allowed compression level.
+ */
+int zstd_max_clevel(void);
+
+/* ======   Parameter Selection   ====== */
+
+/**
+ * enum zstd_strategy - zstd compression search strategy
+ *
+ * From faster to stronger. See zstd_lib.h.
+ */
+typedef ZSTD_strategy zstd_strategy;
+
+/**
+ * struct zstd_compression_parameters - zstd compression parameters
  * @windowLog:    Log of the largest match distance. Larger means more
  *                compression, and more memory needed during decompression.
- * @chainLog:     Fully searched segment. Larger means more compression, slower,
- *                and more memory (useless for fast).
+ * @chainLog:     Fully searched segment. Larger means more compression,
+ *                slower, and more memory (useless for fast).
  * @hashLog:      Dispatch table. Larger means more compression,
  *                slower, and more memory.
  * @searchLog:    Number of searches. Larger means more compression and slower.
@@ -141,1017 +100,348 @@
  * @targetLength: Acceptable match size for optimal parser (only). Larger means
  *                more compression, and slower.
  * @strategy:     The zstd compression strategy.
- */
-typedef struct {
-	unsigned int windowLog;
-	unsigned int chainLog;
-	unsigned int hashLog;
-	unsigned int searchLog;
-	unsigned int searchLength;
-	unsigned int targetLength;
-	ZSTD_strategy strategy;
-} ZSTD_compressionParameters;
-
-/**
- * struct ZSTD_frameParameters - zstd frame parameters
- * @contentSizeFlag: Controls whether content size will be present in the frame
- *                   header (when known).
- * @checksumFlag:    Controls whether a 32-bit checksum is generated at the end
- *                   of the frame for error detection.
- * @noDictIDFlag:    Controls whether dictID will be saved into the frame header
- *                   when using dictionary compression.
  *
- * The default value is all fields set to 0.
+ * See zstd_lib.h.
  */
-typedef struct {
-	unsigned int contentSizeFlag;
-	unsigned int checksumFlag;
-	unsigned int noDictIDFlag;
-} ZSTD_frameParameters;
+typedef ZSTD_compressionParameters zstd_compression_parameters;
 
 /**
- * struct ZSTD_parameters - zstd parameters
+ * struct zstd_frame_parameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the
+ *                   frame header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the
+ *                   end of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame
+ *                   header when using dictionary compression.
+ *
+ * The default value is all fields set to 0. See zstd_lib.h.
+ */
+typedef ZSTD_frameParameters zstd_frame_parameters;
+
+/**
+ * struct zstd_parameters - zstd parameters
  * @cParams: The compression parameters.
  * @fParams: The frame parameters.
  */
-typedef struct {
-	ZSTD_compressionParameters cParams;
-	ZSTD_frameParameters fParams;
-} ZSTD_parameters;
+typedef ZSTD_parameters zstd_parameters;
 
 /**
- * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+ * zstd_get_params() - returns zstd_parameters for selected level
+ * @level:              The compression level
+ * @estimated_src_size: The estimated source size to compress or 0
+ *                      if unknown.
  *
- * Return:            The selected ZSTD_compressionParameters.
+ * Return:              The selected zstd_parameters.
  */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel,
-	unsigned long long estimatedSrcSize, size_t dictSize);
+zstd_parameters zstd_get_params(int level,
+	unsigned long long estimated_src_size);
+
+/* ======   Single-pass Compression   ====== */
+
+typedef ZSTD_CCtx zstd_cctx;
 
 /**
- * ZSTD_getParams() - returns ZSTD_parameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
- *
- * The same as ZSTD_getCParams() except also selects the default frame
- * parameters (all zero).
- *
- * Return:            The selected ZSTD_parameters.
- */
-ZSTD_parameters ZSTD_getParams(int compressionLevel,
-	unsigned long long estimatedSrcSize, size_t dictSize);
-
-/*-*************************************
- * Explicit memory management
- **************************************/
-
-/**
- * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx
- * @cParams: The compression parameters to be used for compression.
+ * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
+ * @parameters: The compression parameters to be used.
  *
  * If multiple compression parameters might be used, the caller must call
- * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum
+ * zstd_cctx_workspace_bound() for each set of parameters and use the maximum
  * size.
  *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCCtx().
+ * Return:      A lower bound on the size of the workspace that is passed to
+ *              zstd_init_cctx().
  */
-size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams);
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
 
 /**
- * struct ZSTD_CCtx - the zstd compression context
+ * zstd_init_cctx() - initialize a zstd compression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
+ *                  determine how large the workspace must be.
  *
- * When compressing many times it is recommended to allocate a context just once
- * and reuse it for each successive compression operation.
+ * Return:          A zstd compression context or NULL on error.
  */
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;
-/**
- * ZSTD_initCCtx() - initialize a zstd compression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
- *
- * Return:         A compression context emplaced into workspace.
- */
-ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize);
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_compressCCtx() - compress src into dst
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
+ * zstd_compress_cctx() - compress src into dst with the initialized parameters
+ * @cctx:         The context. Must have been initialized with zstd_init_cctx().
+ * @dst:          The buffer to compress src into.
+ * @dst_capacity: The size of the destination buffer. May be any size, but
+ *                ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:          The data to compress.
+ * @src_size:     The size of the data to compress.
+ * @parameters:   The compression parameters to be used.
  *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
+ * Return:        The compressed size or an error, which can be checked using
+ *                zstd_is_error().
  */
-size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, ZSTD_parameters params);
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size, const zstd_parameters *parameters);
+
+/* ======   Single-pass Decompression   ====== */
+
+typedef ZSTD_DCtx zstd_dctx;
 
 /**
- * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx
+ * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
  *
  * Return: A lower bound on the size of the workspace that is passed to
- *         ZSTD_initDCtx().
+ *         zstd_init_dctx().
  */
-size_t ZSTD_DCtxWorkspaceBound(void);
+size_t zstd_dctx_workspace_bound(void);
 
 /**
- * struct ZSTD_DCtx - the zstd decompression context
+ * zstd_init_dctx() - initialize a zstd decompression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
+ *                  determine how large the workspace must be.
  *
- * When decompressing many times it is recommended to allocate a context just
- * once and reuse it for each successive decompression operation.
+ * Return:          A zstd decompression context or NULL on error.
  */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-/**
- * ZSTD_initDCtx() - initialize a zstd decompression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
- *
- * Return:         A decompression context emplaced into workspace.
- */
-ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize);
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_decompressDCtx() - decompress zstd compressed src into dst
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
+ * zstd_decompress_dctx() - decompress zstd compressed src into dst
+ * @dctx:         The decompression context.
+ * @dst:          The buffer to decompress src into.
+ * @dst_capacity: The size of the destination buffer. Must be at least as large
+ *                as the decompressed size. If the caller cannot upper bound the
+ *                decompressed size, then it's better to use the streaming API.
+ * @src:          The zstd compressed data to decompress. Multiple concatenated
+ *                frames and skippable frames are allowed.
+ * @src_size:     The exact size of the data to decompress.
  *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
+ * Return:        The decompressed size or an error, which can be checked using
+ *                zstd_is_error().
  */
-size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size);
 
-/*-************************
- * Simple dictionary API
- **************************/
+/* ======   Streaming Buffers   ====== */
 
 /**
- * ZSTD_compress_usingDict() - compress src into dst using a dictionary
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @dict:        The dictionary to use for compression.
- * @dictSize:    The size of the dictionary.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Compression using a predefined dictionary. The same dictionary must be used
- * during decompression.
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const void *dict, size_t dictSize,
-	ZSTD_parameters params);
-
-/**
- * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @dict:        The dictionary to use for decompression. The same dictionary
- *               must've been used to compress the data.
- * @dictSize:    The size of the dictionary.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const void *dict, size_t dictSize);
-
-/*-**************************
- * Fast dictionary API
- ***************************/
-
-/**
- * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict
- * @cParams: The compression parameters to be used for compression.
- *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCDict().
- */
-size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams);
-
-/**
- * struct ZSTD_CDict - a digested dictionary to be used for compression
- */
-typedef struct ZSTD_CDict_s ZSTD_CDict;
-
-/**
- * ZSTD_initCDict() - initialize a digested dictionary for compression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_CDict so it must outlive the returned ZSTD_CDict.
- * @dictSize:      The size of the dictionary.
- * @params:        The parameters to use for compression. See ZSTD_getParams().
- * @workspace:     The workspace. It must outlive the returned ZSTD_CDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_CDictWorkspaceBound(params.cParams).
- *
- * When compressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_CDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_CDict.
- *
- * Return:         The digested dictionary emplaced into workspace.
- */
-ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize,
-	ZSTD_parameters params, void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(cParams) where
- *               cParams are the compression parameters used to initialize the
- *               cdict.
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @cdict:       The digested dictionary to use for compression.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Compression using a digested dictionary. The same dictionary must be used
- * during decompression.
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const ZSTD_CDict *cdict);
-
-
-/**
- * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict
- *
- * Return:  A lower bound on the size of the workspace that is passed to
- *          ZSTD_initDDict().
- */
-size_t ZSTD_DDictWorkspaceBound(void);
-
-/**
- * struct ZSTD_DDict - a digested dictionary to be used for decompression
- */
-typedef struct ZSTD_DDict_s ZSTD_DDict;
-
-/**
- * ZSTD_initDDict() - initialize a digested dictionary for decompression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_DDict so it must outlive the returned ZSTD_DDict.
- * @dictSize:      The size of the dictionary.
- * @workspace:     The workspace. It must outlive the returned ZSTD_DDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_DDictWorkspaceBound().
- *
- * When decompressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_DDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_DDict.
- *
- * Return:         The digested dictionary emplaced into workspace.
- */
-ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize,
-	void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @ddict:       The digested dictionary to use for decompression. The same
- *               dictionary must've been used to compress the data.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst,
-	size_t dstCapacity, const void *src, size_t srcSize,
-	const ZSTD_DDict *ddict);
-
-
-/*-**************************
- * Streaming
- ***************************/
-
-/**
- * struct ZSTD_inBuffer - input buffer for streaming
+ * struct zstd_in_buffer - input buffer for streaming
  * @src:  Start of the input buffer.
  * @size: Size of the input buffer.
  * @pos:  Position where reading stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-typedef struct ZSTD_inBuffer_s {
-	const void *src;
-	size_t size;
-	size_t pos;
-} ZSTD_inBuffer;
+typedef ZSTD_inBuffer zstd_in_buffer;
 
 /**
- * struct ZSTD_outBuffer - output buffer for streaming
+ * struct zstd_out_buffer - output buffer for streaming
  * @dst:  Start of the output buffer.
  * @size: Size of the output buffer.
  * @pos:  Position where writing stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-typedef struct ZSTD_outBuffer_s {
-	void *dst;
-	size_t size;
-	size_t pos;
-} ZSTD_outBuffer;
+typedef ZSTD_outBuffer zstd_out_buffer;
 
+/* ======   Streaming Compression   ====== */
 
-
-/*-*****************************************************************************
- * Streaming compression - HowTo
- *
- * A ZSTD_CStream object is required to track streaming operation.
- * Use ZSTD_initCStream() to initialize a ZSTD_CStream object.
- * ZSTD_CStream objects can be reused multiple times on consecutive compression
- * operations. It is recommended to re-use ZSTD_CStream in situations where many
- * streaming operations will be achieved consecutively. Use one separate
- * ZSTD_CStream per thread for parallel execution.
- *
- * Use ZSTD_compressStream() repetitively to consume input stream.
- * The function will automatically update both `pos` fields.
- * Note that it may not consume the entire input, in which case `pos < size`,
- * and it's up to the caller to present again remaining data.
- * It returns a hint for the preferred number of bytes to use as an input for
- * the next function call.
- *
- * At any moment, it's possible to flush whatever data remains within internal
- * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might
- * still be some content left within the internal buffer if `output->size` is
- * too small. It returns the number of bytes left in the internal buffer and
- * must be called until it returns 0.
- *
- * ZSTD_endStream() instructs to finish a frame. It will perform a flush and
- * write frame epilogue. The epilogue is required for decoders to consider a
- * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush
- * the full content if `output->size` is too small. In which case, call again
- * ZSTD_endStream() to complete the flush. It returns the number of bytes left
- * in the internal buffer and must be called until it returns 0.
- ******************************************************************************/
+typedef ZSTD_CStream zstd_cstream;
 
 /**
- * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream
- * @cParams: The compression parameters to be used for compression.
+ * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
+ * @cparams: The compression parameters to be used for compression.
  *
  * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCStream() and ZSTD_initCStream_usingCDict().
+ *           zstd_init_cstream().
  */
-size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams);
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
 
 /**
- * struct ZSTD_CStream - the zstd streaming compression context
- */
-typedef struct ZSTD_CStream_s ZSTD_CStream;
-
-/*===== ZSTD_CStream management functions =====*/
-/**
- * ZSTD_initCStream() - initialize a zstd streaming compression context
- * @params:         The zstd compression parameters.
- * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must
- *                  pass the source size (zero means empty source). Otherwise,
- *                  the caller may optionally pass the source size, or zero if
- *                  unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace.
- *                  Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine
- *                  how large the workspace must be.
+ * zstd_init_cstream() - initialize a zstd streaming compression context
+ * @parameters        The zstd parameters to use for compression.
+ * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
+ *                    must pass the source size (zero means empty source).
+ *                    Otherwise, the caller may optionally pass the source
+ *                    size, or zero if unknown.
+ * @workspace:        The workspace to emplace the context into. It must outlive
+ *                    the returned context.
+ * @workspace_size:   The size of workspace.
+ *                    Use zstd_cstream_workspace_bound(params->cparams) to
+ *                    determine how large the workspace must be.
  *
- * Return:          The zstd streaming compression context.
+ * Return:            The zstd streaming compression context or NULL on error.
  */
-ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params,
-	unsigned long long pledgedSrcSize, void *workspace,
-	size_t workspaceSize);
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+	unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_initCStream_usingCDict() - initialize a streaming compression context
- * @cdict:          The digested dictionary to use for compression.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace. Call ZSTD_CStreamWorkspaceBound()
- *                  with the cParams used to initialize the cdict to determine
- *                  how large the workspace must be.
- *
- * Return:          The zstd streaming compression context.
- */
-ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict,
-	unsigned long long pledgedSrcSize, void *workspace,
-	size_t workspaceSize);
-
-/*===== Streaming compression functions =====*/
-/**
- * ZSTD_resetCStream() - reset the context using parameters from creation
- * @zcs:            The zstd streaming compression context to reset.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
+ * zstd_reset_cstream() - reset the context using parameters from creation
+ * @cstream:          The zstd streaming compression context to reset.
+ * @pledged_src_size: Optionally the source size, or zero if unknown.
  *
  * Resets the context using the parameters from creation. Skips dictionary
- * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame
+ * loading, since it can be reused. If `pledged_src_size` is non-zero the frame
  * content size is always written into the frame header.
  *
- * Return:          Zero or an error, which can be checked using ZSTD_isError().
+ * Return:            Zero or an error, which can be checked using
+ *                    zstd_is_error().
  */
-size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize);
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+	unsigned long long pledged_src_size);
+
 /**
- * ZSTD_compressStream() - streaming compress some of input into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
- * @input:  Source buffer. `input->pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input->pos < input->size`, and it's up to the caller to present
- *          remaining data again.
+ * zstd_compress_stream() - streaming compress some of input into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
+ * @input:   Source buffer. `input->pos` is updated to indicate how much data
+ *           was read. Note that it may not consume the entire input, in which
+ *           case `input->pos < input->size`, and it's up to the caller to
+ *           present remaining data again.
  *
  * The `input` and `output` buffers may be any size. Guaranteed to make some
  * forward progress if `input` and `output` are not empty.
  *
- * Return:  A hint for the number of bytes to use as the input for the next
- *          function call or an error, which can be checked using
- *          ZSTD_isError().
+ * Return:   A hint for the number of bytes to use as the input for the next
+ *           function call or an error, which can be checked using
+ *           zstd_is_error().
  */
-size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output,
-	ZSTD_inBuffer *input);
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
+
 /**
- * ZSTD_flushStream() - flush internal buffers into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
+ * zstd_flush_stream() - flush internal buffers into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
  *
- * ZSTD_flushStream() must be called until it returns 0, meaning all the data
- * has been flushed. Since ZSTD_flushStream() causes a block to be ended,
+ * zstd_flush_stream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since zstd_flush_stream() causes a block to be ended,
  * calling it too often will degrade the compression ratio.
  *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
 /**
- * ZSTD_endStream() - flush internal buffers into output and end the frame
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
+ * zstd_end_stream() - flush internal buffers into output and end the frame
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
  *
- * ZSTD_endStream() must be called until it returns 0, meaning all the data has
+ * zstd_end_stream() must be called until it returns 0, meaning all the data has
  * been flushed and the frame epilogue has been written.
  *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
+/* ======   Streaming Decompression   ====== */
+
+typedef ZSTD_DStream zstd_dstream;
 
 /**
- * ZSTD_CStreamInSize() - recommended size for the input buffer
+ * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
+ * @max_window_size: The maximum window size allowed for compressed frames.
  *
- * Return: The recommended size for the input buffer.
+ * Return:           A lower bound on the size of the workspace that is passed
+ *                   to zstd_init_dstream().
  */
-size_t ZSTD_CStreamInSize(void);
-/**
- * ZSTD_CStreamOutSize() - recommended size for the output buffer
- *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete compressed block.
- *
- * Return: The recommended size for the output buffer.
- */
-size_t ZSTD_CStreamOutSize(void);
-
-
-
-/*-*****************************************************************************
- * Streaming decompression - HowTo
- *
- * A ZSTD_DStream object is required to track streaming operations.
- * Use ZSTD_initDStream() to initialize a ZSTD_DStream object.
- * ZSTD_DStream objects can be re-used multiple times.
- *
- * Use ZSTD_decompressStream() repetitively to consume your input.
- * The function will update both `pos` fields.
- * If `input->pos < input->size`, some input has not been consumed.
- * It's up to the caller to present again remaining data.
- * If `output->pos < output->size`, decoder has flushed everything it could.
- * Returns 0 iff a frame is completely decoded and fully flushed.
- * Otherwise it returns a suggested next input size that will never load more
- * than the current frame.
- ******************************************************************************/
+size_t zstd_dstream_workspace_bound(size_t max_window_size);
 
 /**
- * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream
- * @maxWindowSize: The maximum window size allowed for compressed frames.
+ * zstd_init_dstream() - initialize a zstd streaming decompression context
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ * @workspace:       The workspace to emplace the context into. It must outlive
+ *                   the returned context.
+ * @workspaceSize:   The size of workspace.
+ *                   Use zstd_dstream_workspace_bound(max_window_size) to
+ *                   determine how large the workspace must be.
  *
- * Return:         A lower bound on the size of the workspace that is passed to
- *                 ZSTD_initDStream() and ZSTD_initDStream_usingDDict().
+ * Return:           The zstd streaming decompression context.
  */
-size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize);
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+	size_t workspace_size);
 
 /**
- * struct ZSTD_DStream - the zstd streaming decompression context
- */
-typedef struct ZSTD_DStream_s ZSTD_DStream;
-/*===== ZSTD_DStream management functions =====*/
-/**
- * ZSTD_initDStream() - initialize a zstd streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
- *
- * Return:         The zstd streaming decompression context.
- */
-ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace,
-	size_t workspaceSize);
-/**
- * ZSTD_initDStream_usingDDict() - initialize streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @ddict:         The digested dictionary to use for decompression.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
- *
- * Return:         The zstd streaming decompression context.
- */
-ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize,
-	const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize);
-
-/*===== Streaming decompression functions =====*/
-/**
- * ZSTD_resetDStream() - reset the context using parameters from creation
- * @zds:   The zstd streaming decompression context to reset.
+ * zstd_reset_dstream() - reset the context using parameters from creation
+ * @dstream: The zstd streaming decompression context to reset.
  *
  * Resets the context using the parameters from creation. Skips dictionary
  * loading, since it can be reused.
  *
- * Return: Zero or an error, which can be checked using ZSTD_isError().
+ * Return:   Zero or an error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_resetDStream(ZSTD_DStream *zds);
+size_t zstd_reset_dstream(zstd_dstream *dstream);
+
 /**
- * ZSTD_decompressStream() - streaming decompress some of input into output
- * @zds:    The zstd streaming decompression context.
- * @output: Destination buffer. `output.pos` is updated to indicate how much
- *          decompressed data was written.
- * @input:  Source buffer. `input.pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input.pos < input.size`, and it's up to the caller to present
- *          remaining data again.
+ * zstd_decompress_stream() - streaming decompress some of input into output
+ * @dstream: The zstd streaming decompression context.
+ * @output:  Destination buffer. `output.pos` is updated to indicate how much
+ *           decompressed data was written.
+ * @input:   Source buffer. `input.pos` is updated to indicate how much data was
+ *           read. Note that it may not consume the entire input, in which case
+ *           `input.pos < input.size`, and it's up to the caller to present
+ *           remaining data again.
  *
  * The `input` and `output` buffers may be any size. Guaranteed to make some
  * forward progress if `input` and `output` are not empty.
- * ZSTD_decompressStream() will not consume the last byte of the frame until
+ * zstd_decompress_stream() will not consume the last byte of the frame until
  * the entire frame is flushed.
  *
- * Return:  Returns 0 iff a frame is completely decoded and fully flushed.
- *          Otherwise returns a hint for the number of bytes to use as the input
- *          for the next function call or an error, which can be checked using
- *          ZSTD_isError(). The size hint will never load more than the frame.
+ * Return:   Returns 0 iff a frame is completely decoded and fully flushed.
+ *           Otherwise returns a hint for the number of bytes to use as the
+ *           input for the next function call or an error, which can be checked
+ *           using zstd_is_error(). The size hint will never load more than the
+ *           frame.
  */
-size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output,
-	ZSTD_inBuffer *input);
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
+
+/* ======   Frame Inspection Functions ====== */
 
 /**
- * ZSTD_DStreamInSize() - recommended size for the input buffer
+ * zstd_find_frame_compressed_size() - returns the size of a compressed frame
+ * @src:      Source buffer. It should point to the start of a zstd encoded
+ *            frame or a skippable frame.
+ * @src_size: The size of the source buffer. It must be at least as large as the
+ *            size of the frame.
  *
- * Return: The recommended size for the input buffer.
+ * Return:    The compressed size of the frame pointed to by `src` or an error,
+ *            which can be check with zstd_is_error().
+ *            Suitable to pass to ZSTD_decompress() or similar functions.
  */
-size_t ZSTD_DStreamInSize(void);
-/**
- * ZSTD_DStreamOutSize() - recommended size for the output buffer
- *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete decompressed block.
- *
- * Return: The recommended size for the output buffer.
- */
-size_t ZSTD_DStreamOutSize(void);
-
-
-/* --- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
-#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
-
-#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
-#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
-
-#define ZSTD_WINDOWLOG_MAX_32  27
-#define ZSTD_WINDOWLOG_MAX_64  27
-#define ZSTD_WINDOWLOG_MAX \
-	((unsigned int)(sizeof(size_t) == 4 \
-		? ZSTD_WINDOWLOG_MAX_32 \
-		: ZSTD_WINDOWLOG_MAX_64))
-#define ZSTD_WINDOWLOG_MIN 10
-#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX
-#define ZSTD_HASHLOG_MIN        6
-#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
-#define ZSTD_CHAINLOG_MIN      ZSTD_HASHLOG_MIN
-#define ZSTD_HASHLOG3_MAX      17
-#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
-#define ZSTD_SEARCHLOG_MIN      1
-/* only for ZSTD_fast, other strategies are limited to 6 */
-#define ZSTD_SEARCHLENGTH_MAX   7
-/* only for ZSTD_btopt, other strategies are limited to 4 */
-#define ZSTD_SEARCHLENGTH_MIN   3
-#define ZSTD_TARGETLENGTH_MIN   4
-#define ZSTD_TARGETLENGTH_MAX 999
-
-/* for static allocation */
-#define ZSTD_FRAMEHEADERSIZE_MAX 18
-#define ZSTD_FRAMEHEADERSIZE_MIN  6
-#define ZSTD_frameHeaderSize_prefix 5
-#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN
-#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX
-/* magic number + skippable frame length */
-#define ZSTD_skippableHeaderSize 8
-
-
-/*-*************************************
- * Compressed size functions
- **************************************/
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
 
 /**
- * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame
- * @src:     Source buffer. It should point to the start of a zstd encoded frame
- *           or a skippable frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           size of the frame.
- *
- * Return:   The compressed size of the frame pointed to by `src` or an error,
- *           which can be check with ZSTD_isError().
- *           Suitable to pass to ZSTD_decompress() or similar functions.
- */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Decompressed size functions
- **************************************/
-/**
- * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header
- * @src:     It should point to the start of a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
- *
- * Return:   The frame content size stored in the frame header if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the
- *           frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input.
- */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
-
-/**
- * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames
- * @src:     It should point to the start of a series of zstd encoded and/or
- *           skippable frames.
- * @srcSize: The exact size of the series of frames.
- *
- * If any zstd encoded frame in the series doesn't have the frame content size
- * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always
- * set when using ZSTD_compress(). The decompressed size can be very large.
- * If the source is untrusted, the decompressed size could be wrong or
- * intentionally modified. Always ensure the result fits within the
- * application's authorized limits. ZSTD_findDecompressedSize() handles multiple
- * frames, and so it must traverse the input to read each frame header. This is
- * efficient as most of the data is skipped, however it does mean that all frame
- * data must be present and valid.
- *
- * Return:   Decompressed size of all the data contained in the frames if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown.
- *           `ZSTD_CONTENTSIZE_ERROR` if an error occurred.
- */
-unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Advanced compression functions
- **************************************/
-/**
- * ZSTD_checkCParams() - ensure parameter values remain within authorized range
- * @cParams: The zstd compression parameters.
- *
- * Return:   Zero or an error, which can be checked using ZSTD_isError().
- */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams);
-
-/**
- * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize
- * @srcSize:  Optionally the estimated source size, or zero if unknown.
- * @dictSize: Optionally the estimated dictionary size, or zero if unknown.
- *
- * Return:    The optimized parameters.
- */
-ZSTD_compressionParameters ZSTD_adjustCParams(
-	ZSTD_compressionParameters cParams, unsigned long long srcSize,
-	size_t dictSize);
-
-/*--- Advanced decompression functions ---*/
-
-/**
- * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame
- * @buffer: The source buffer to check.
- * @size:   The size of the source buffer, must be at least 4 bytes.
- *
- * Return: True iff the buffer starts with a zstd or skippable frame identifier.
- */
-unsigned int ZSTD_isFrame(const void *buffer, size_t size);
-
-/**
- * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary
- * @dict:     The dictionary buffer.
- * @dictSize: The size of the dictionary buffer.
- *
- * Return:    The dictionary id stored within the dictionary or 0 if the
- *            dictionary is not a zstd dictionary. If it returns 0 the
- *            dictionary can still be loaded as a content-only dictionary.
- */
-unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize);
-
-/**
- * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict
- * @ddict: The ddict to find the id of.
- *
- * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not
- *         a zstd dictionary. If it returns 0 `ddict` will be loaded as a
- *         content-only dictionary.
- */
-unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict);
-
-/**
- * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame
- * @src:     Source buffer. It must be a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
- *
- * Return:   The dictionary id required to decompress the frame stored within
- *           `src` or 0 if the dictionary id could not be decoded. It can return
- *           0 if the frame does not require a dictionary, the dictionary id
- *           wasn't stored in the frame, `src` is not a zstd frame, or `srcSize`
- *           is too small.
- */
-unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize);
-
-/**
- * struct ZSTD_frameParams - zstd frame parameters stored in the frame header
- * @frameContentSize: The frame content size, or 0 if not present.
+ * struct zstd_frame_params - zstd frame parameters stored in the frame header
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ *                    present.
  * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax:     The maximum block size.
+ * @frameType:        The frame type (zstd or skippable)
+ * @headerSize:       The size of the frame header.
  * @dictID:           The dictionary id, or 0 if not present.
  * @checksumFlag:     Whether a checksum was used.
+ *
+ * See zstd_lib.h.
  */
-typedef struct {
-	unsigned long long frameContentSize;
-	unsigned int windowSize;
-	unsigned int dictID;
-	unsigned int checksumFlag;
-} ZSTD_frameParams;
+typedef ZSTD_frameHeader zstd_frame_header;
 
 /**
- * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame
- * @fparamsPtr: On success the frame parameters are written here.
- * @src:        The source buffer. It must point to a zstd or skippable frame.
- * @srcSize:    The size of the source buffer. `ZSTD_frameHeaderSize_max` is
- *              always large enough to succeed.
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
+ * @params:   On success the frame parameters are written here.
+ * @src:      The source buffer. It must point to a zstd or skippable frame.
+ * @src_size: The size of the source buffer.
  *
- * Return:      0 on success. If more data is required it returns how many bytes
- *              must be provided to make forward progress. Otherwise it returns
- *              an error, which can be checked using ZSTD_isError().
+ * Return:    0 on success. If more data is required it returns how many bytes
+ *            must be provided to make forward progress. Otherwise it returns
+ *            an error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src,
-	size_t srcSize);
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
+	size_t src_size);
 
-/*-*****************************************************************************
- * Buffer-less and synchronous inner streaming functions
- *
- * This is an advanced API, giving full control over buffer management, for
- * users which need direct control over memory.
- * But it's also a complex one, with many restrictions (documented below).
- * Prefer using normal streaming API for an easier experience
- ******************************************************************************/
-
-/*-*****************************************************************************
- * Buffer-less streaming compression (synchronous mode)
- *
- * A ZSTD_CCtx object is required to track streaming operations.
- * Use ZSTD_initCCtx() to initialize a context.
- * ZSTD_CCtx object can be re-used multiple times within successive compression
- * operations.
- *
- * Start by initializing a context.
- * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary
- * compression,
- * or ZSTD_compressBegin_advanced(), for finer parameter control.
- * It's also possible to duplicate a reference context which has already been
- * initialized, using ZSTD_copyCCtx()
- *
- * Then, consume your input using ZSTD_compressContinue().
- * There are some important considerations to keep in mind when using this
- * advanced function :
- * - ZSTD_compressContinue() has no internal buffer. It uses externally provided
- *   buffer only.
- * - Interface is synchronous : input is consumed entirely and produce 1+
- *   (or more) compressed blocks.
- * - Caller must ensure there is enough space in `dst` to store compressed data
- *   under worst case scenario. Worst case evaluation is provided by
- *   ZSTD_compressBound().
- *   ZSTD_compressContinue() doesn't guarantee recover after a failed
- *   compression.
- * - ZSTD_compressContinue() presumes prior input ***is still accessible and
- *   unmodified*** (up to maximum distance size, see WindowLog).
- *   It remembers all previous contiguous blocks, plus one separated memory
- *   segment (which can itself consists of multiple contiguous blocks)
- * - ZSTD_compressContinue() detects that prior input has been overwritten when
- *   `src` buffer overlaps. In which case, it will "discard" the relevant memory
- *   section from its history.
- *
- * Finish a frame with ZSTD_compressEnd(), which will write the last block(s)
- * and optional checksum. It's possible to use srcSize==0, in which case, it
- * will write a final empty block to end the frame. Without last block mark,
- * frames will be considered unfinished (corrupted) by decoders.
- *
- * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new
- * frame.
- ******************************************************************************/
-
-/*=====   Buffer-less streaming compression functions  =====*/
-size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel);
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict,
-	size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict,
-	size_t dictSize, ZSTD_parameters params,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-
-
-
-/*-*****************************************************************************
- * Buffer-less streaming decompression (synchronous mode)
- *
- * A ZSTD_DCtx object is required to track streaming operations.
- * Use ZSTD_initDCtx() to initialize a context.
- * A ZSTD_DCtx object can be re-used multiple times.
- *
- * First typical operation is to retrieve frame parameters, using
- * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide
- * important information to correctly decode the frame, such as the minimum
- * rolling buffer size to allocate to decompress data (`windowSize`), and the
- * dictionary ID used.
- * Note: content size is optional, it may not be present. 0 means unknown.
- * Note that these values could be wrong, either because of data malformation,
- * or because an attacker is spoofing deliberate false information. As a
- * consequence, check that values remain within valid application range,
- * especially `windowSize`, before allocation. Each application can set its own
- * limit, depending on local restrictions. For extended interoperability, it is
- * recommended to support at least 8 MB.
- * Frame parameters are extracted from the beginning of the compressed frame.
- * Data fragment must be large enough to ensure successful decoding, typically
- * `ZSTD_frameHeaderSize_max` bytes.
- * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled.
- *        >0: `srcSize` is too small, provide at least this many bytes.
- *        errorCode, which can be tested using ZSTD_isError().
- *
- * Start decompression, with ZSTD_decompressBegin() or
- * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared
- * context, using ZSTD_copyDCtx().
- *
- * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue()
- * alternatively.
- * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize'
- * to ZSTD_decompressContinue().
- * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will
- * fail.
- *
- * The result of ZSTD_decompressContinue() is the number of bytes regenerated
- * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an
- * error; it just means ZSTD_decompressContinue() has decoded some metadata
- * item. It can also be an error code, which can be tested with ZSTD_isError().
- *
- * ZSTD_decompressContinue() needs previous data blocks during decompression, up
- * to `windowSize`. They should preferably be located contiguously, prior to
- * current block. Alternatively, a round buffer of sufficient size is also
- * possible. Sufficient size is determined by frame parameters.
- * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't
- * follow each other, make sure that either the compressor breaks contiguity at
- * the same place, or that previous contiguous segment is large enough to
- * properly handle maximum back-reference.
- *
- * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
- * Context can then be reset to start a new decompression.
- *
- * Note: it's possible to know if next input to present is a header or a block,
- * using ZSTD_nextInputType(). This information is not required to properly
- * decode a frame.
- *
- * == Special case: skippable frames ==
- *
- * Skippable frames allow integration of user-defined data into a flow of
- * concatenated frames. Skippable frames will be ignored (skipped) by a
- * decompressor. The format of skippable frames is as follows:
- * a) Skippable frame ID - 4 Bytes, Little endian format, any value from
- *    0x184D2A50 to 0x184D2A5F
- * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
- * c) Frame Content - any content (User Data) of length equal to Frame Size
- * For skippable frames ZSTD_decompressContinue() always returns 0.
- * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0
- * what means that a frame is skippable.
- * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might
- *       actually be a zstd encoded frame with no content. For purposes of
- *       decompression, it is valid in both cases to skip the frame using
- *       ZSTD_findFrameCompressedSize() to find its size in bytes.
- * It also returns frame size as fparamsPtr->frameContentSize.
- ******************************************************************************/
-
-/*=====   Buffer-less streaming decompression functions  =====*/
-size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
-	size_t dictSize);
-void   ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx);
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-typedef enum {
-	ZSTDnit_frameHeader,
-	ZSTDnit_blockHeader,
-	ZSTDnit_block,
-	ZSTDnit_lastBlock,
-	ZSTDnit_checksum,
-	ZSTDnit_skippableFrame
-} ZSTD_nextInputType_e;
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx);
-
-/*-*****************************************************************************
- * Block functions
- *
- * Block functions produce and decode raw zstd blocks, without frame metadata.
- * Frame metadata cost is typically ~18 bytes, which can be non-negligible for
- * very small blocks (< 100 bytes). User will have to take in charge required
- * information to regenerate data, such as compressed and content sizes.
- *
- * A few rules to respect:
- * - Compressing and decompressing require a context structure
- *   + Use ZSTD_initCCtx() and ZSTD_initDCtx()
- * - It is necessary to init context before starting
- *   + compression : ZSTD_compressBegin()
- *   + decompression : ZSTD_decompressBegin()
- *   + variants _usingDict() are also allowed
- *   + copyCCtx() and copyDCtx() work too
- * - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
- *   + If you need to compress more, cut data into multiple blocks
- *   + Consider using the regular ZSTD_compress() instead, as frame metadata
- *     costs become negligible when source size is large.
- * - When a block is considered not compressible enough, ZSTD_compressBlock()
- *   result will be zero. In which case, nothing is produced into `dst`.
- *   + User must test for such outcome and deal directly with uncompressed data
- *   + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!!
- *   + In case of multiple successive blocks, decoder must be informed of
- *     uncompressed block existence to follow proper history. Use
- *     ZSTD_insertBlock() in such a case.
- ******************************************************************************/
-
-/* Define for static allocation */
-#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)
-/*=====   Raw zstd block functions  =====*/
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx);
-size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart,
-	size_t blockSize);
-
-#endif  /* ZSTD_H */
+#endif  /* LINUX_ZSTD_H */
diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h
new file mode 100644
index 0000000..58b6dd4
--- /dev/null
+++ b/include/linux/zstd_errors.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+
+/*===== dependency =====*/
+#include <linux/types.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#define ZSTDERRORLIB_VISIBILITY 
+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_workSpace_tooSmall= 66,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_dstBuffer_null   = 74,
+  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
new file mode 100644
index 0000000..b8c7dbf9
--- /dev/null
+++ b/include/linux/zstd_lib.h
@@ -0,0 +1,2432 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
+#include <linux/limits.h>   /* INT_MAX */
+#include <linux/types.h>   /* size_t */
+
+
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#define ZSTDLIB_VISIBILITY 
+#define ZSTDLIB_API ZSTDLIB_VISIBILITY
+
+
+/* *****************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    4
+#define ZSTD_VERSION_RELEASE  10
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);
+
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);
+
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+/* *************************************
+ *  Constants
+ ***************************************/
+
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() :
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+
+/*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+
+
+/* *************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
+
+
+/* *************************************
+*  Advanced compression API
+***************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression is performed in parallel, within worker thread(s).
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+typedef enum {
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+
+/* *************************************
+*  Advanced decompression API
+***************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
+typedef enum {
+
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001,
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/* **************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /*< start of input buffer */
+  size_t size;        /*< size of input buffer */
+  size_t pos;         /*< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /*< start of output buffer */
+  size_t size;        /*< size of output buffer */
+  size_t pos;         /*< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
+
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream;  /*< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() :
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /*< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Advanced parameters and dictionary compression can only be used through the
+ * new API.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream;  /*< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
+
+/*===== Streaming decompression functions =====*/
+
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/* ************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/* *********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+
+
+/* ******************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/* *****************************************************************************
+ * Advanced dictionary and prefix API
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() :
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() :
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() :
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() :
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() :
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif  /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/* **************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
+
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
+
+
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
+
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
+typedef struct {
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned windowLog;       /*< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /*< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /*< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /*< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /*< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /*< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /*< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /*< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /*< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /*< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum = 1
+} ZSTD_forceIgnoreChecksum_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+  ZSTD_lcm_auto = 0,          /*< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /*< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /*< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+
+/* *************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ *
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
+ */
+
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
+
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
+ *
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);
+
+
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                            const void* src, size_t srcSize, unsigned magicVariant);
+
+
+/* *************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static
+__attribute__((__unused__))
+ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /*< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
+
+/* ! Thread pool :
+ * These prototypes make it possible to share a thread pool among multiple compression contexts.
+ * This can limit resources for applications with multiple threads where each one uses
+ * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ * Note that the lifetime of such pool must exist while being used.
+ * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ * to use an internal thread pool).
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+ */
+typedef struct POOL_ctx_s ZSTD_threadPool;
+ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
+ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+
+
+/*
+ * This API is temporary and is expected to change or disappear in the future!
+ */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    const ZSTD_CCtx_params* cctxParams,
+    ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    ZSTD_customMem customMem);
+
+
+/* *************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
+ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now REDUNDANT.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning in some future version */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
+
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_t enum definition for details.
+ */
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+/* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * useable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
+
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
+
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ *
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
+
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ *
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
+
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+
+/*! ZSTD_CCtxParams_setParameter() :
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+/* *************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
+ */
+#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
+
+/* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
+ *
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
+ */
+#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
+
+
+/*! ZSTD_DCtx_setFormat() :
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
+
+
+/* ******************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+
+/*! ZSTD_initCStream_srcSize() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingDict() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
+
+/*! ZSTD_initCStream_advanced() :
+ * This function is deprecated, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingCDict() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * note : cdict will just be referenced, and must outlive compression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/*! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
+ */
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+
+
+/*=====   Advanced Streaming decompression functions  =====*/
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/* *******************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
+
+/*
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/*       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c412dde..83b8070 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -485,6 +485,7 @@
 		 struct fib6_config *cfg, gfp_t gfp_flags,
 		 struct netlink_ext_ack *extack);
 void fib6_nh_release(struct fib6_nh *fib6_nh);
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
 
 int call_fib6_entry_notifiers(struct net *net,
 			      enum fib_event_type event_type,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index afbce90..45e0339 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -47,6 +47,7 @@
 			    struct fib6_config *cfg, gfp_t gfp_flags,
 			    struct netlink_ext_ack *extack);
 	void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+	void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
 	void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
 	int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
 	void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index a964dae..ea85956 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -30,6 +30,7 @@
 	NCI_UP,
 	NCI_DATA_EXCHANGE,
 	NCI_DATA_EXCHANGE_TO,
+	NCI_UNREG,
 };
 
 /* NCI device states */
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index ddcee12..145acb8 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -19,6 +19,8 @@
  *
  */
 
+#include <linux/types.h>
+
 #define NL802154_GENL_NAME "nl802154"
 
 enum nl802154_commands {
@@ -150,10 +152,9 @@
 };
 
 enum nl802154_iftype {
-	/* for backwards compatibility TODO */
-	NL802154_IFTYPE_UNSPEC = -1,
+	NL802154_IFTYPE_UNSPEC = (~(__u32)0),
 
-	NL802154_IFTYPE_NODE,
+	NL802154_IFTYPE_NODE = 0,
 	NL802154_IFTYPE_MONITOR,
 	NL802154_IFTYPE_COORD,
 
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 3855f06..a408240 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -216,14 +216,24 @@
 	page_pool_put_full_page(pool, page, true);
 }
 
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
+		(sizeof(dma_addr_t) > sizeof(unsigned long))
+
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	return page->dma_addr;
+	dma_addr_t ret = page->dma_addr;
+
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
+	return ret;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
 	page->dma_addr = addr;
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		page->dma_addr_upper = upper_32_bits(addr);
 }
 
 static inline void page_pool_set_frag_count(struct page *page, long nr)
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 2758d9d..c2a79ae 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -30,7 +30,7 @@
  * constant as well and the compiler checks they are the same.
  */
 #define MODULE_ALIAS_RDMA_NETLINK(_index, _val)                                \
-	static inline void __chk_##_index(void)                                \
+	static inline void __maybe_unused __chk_##_index(void)                 \
 	{                                                                      \
 		BUILD_BUG_ON(_index != _val);                                  \
 	}                                                                      \
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index cd9eaa83..477a800 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -73,7 +73,7 @@
 struct scsi_cmnd {
 	struct scsi_request req;
 	struct scsi_device *device;
-	struct list_head eh_entry; /* entry for the host eh_cmd_q */
+	struct list_head eh_entry; /* entry for the host eh_abort_list/eh_cmd_q */
 	struct delayed_work abort_work;
 
 	struct rcu_head rcu;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 83a7890..d1c6fc8 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -226,12 +226,6 @@
 
 	struct device		sdev_gendev,
 				sdev_dev;
-	/*
-	 * The array size 6 provides space for one attribute group for the
-	 * SCSI core, four attribute groups defined by SCSI LLDs and one
-	 * terminating NULL pointer.
-	 */
-	const struct attribute_group *gendev_attr_groups[6];
 
 	struct execute_work	ew; /* used to get process context on put */
 	struct work_struct	requeue_work;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index ae71595..ebe059b 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -551,6 +551,7 @@
 
 	struct mutex		scan_mutex;/* serialize scanning activity */
 
+	struct list_head	eh_abort_list;
 	struct list_head	eh_cmd_q;
 	struct task_struct    * ehandler;  /* Error recovery thread. */
 	struct completion     * eh_action; /* Wait for specific actions on the
diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h
index eeb1142..4d1dfa1 100644
--- a/include/soc/mscc/ocelot_vcap.h
+++ b/include/soc/mscc/ocelot_vcap.h
@@ -703,6 +703,8 @@
 			   struct netlink_ext_ack *extack);
 int ocelot_vcap_filter_del(struct ocelot *ocelot,
 			   struct ocelot_vcap_filter *rule);
+int ocelot_vcap_filter_replace(struct ocelot *ocelot,
+			       struct ocelot_vcap_filter *filter);
 struct ocelot_vcap_filter *
 ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block,
 				    unsigned long cookie, bool tc_offload);
diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index 653dfff..1051b84 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -36,6 +36,13 @@
 #define SNDRV_DMA_TYPE_CONTINUOUS	1	/* continuous no-DMA memory */
 #define SNDRV_DMA_TYPE_DEV		2	/* generic device continuous */
 #define SNDRV_DMA_TYPE_DEV_WC		5	/* continuous write-combined */
+#ifdef CONFIG_SND_DMA_SGBUF
+#define SNDRV_DMA_TYPE_DEV_SG		3	/* generic device SG-buffer */
+#define SNDRV_DMA_TYPE_DEV_WC_SG	6	/* SG write-combined */
+#else
+#define SNDRV_DMA_TYPE_DEV_SG	SNDRV_DMA_TYPE_DEV /* no SG-buf support */
+#define SNDRV_DMA_TYPE_DEV_WC_SG	SNDRV_DMA_TYPE_DEV_WC
+#endif
 #ifdef CONFIG_GENERIC_ALLOCATOR
 #define SNDRV_DMA_TYPE_DEV_IRAM		4	/* generic device iram-buffer */
 #else
@@ -44,13 +51,6 @@
 #define SNDRV_DMA_TYPE_VMALLOC		7	/* vmalloc'ed buffer */
 #define SNDRV_DMA_TYPE_NONCONTIG	8	/* non-coherent SG buffer */
 #define SNDRV_DMA_TYPE_NONCOHERENT	9	/* non-coherent buffer */
-#ifdef CONFIG_SND_DMA_SGBUF
-#define SNDRV_DMA_TYPE_DEV_SG		SNDRV_DMA_TYPE_NONCONTIG
-#define SNDRV_DMA_TYPE_DEV_WC_SG	6	/* SG write-combined */
-#else
-#define SNDRV_DMA_TYPE_DEV_SG	SNDRV_DMA_TYPE_DEV /* no SG-buf support */
-#define SNDRV_DMA_TYPE_DEV_WC_SG	SNDRV_DMA_TYPE_DEV_WC
-#endif
 
 /*
  * info for buffer allocation
diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 31f4c4f..ac0893d 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -147,7 +147,7 @@
  */
 /* Descriptor for SST ASoC machine driver */
 struct snd_soc_acpi_mach {
-	const u8 id[ACPI_ID_LEN];
+	u8 id[ACPI_ID_LEN];
 	const struct snd_soc_acpi_codecs *comp_ids;
 	const u32 link_mask;
 	const struct snd_soc_acpi_link_adr *links;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index bca73e8..499f5fa 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -1016,31 +1016,32 @@
 		      __entry->vnode, __entry->off, __entry->i_size)
 	    );
 
-TRACE_EVENT(afs_page_dirty,
-	    TP_PROTO(struct afs_vnode *vnode, const char *where, struct page *page),
+TRACE_EVENT(afs_folio_dirty,
+	    TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio),
 
-	    TP_ARGS(vnode, where, page),
+	    TP_ARGS(vnode, where, folio),
 
 	    TP_STRUCT__entry(
 		    __field(struct afs_vnode *,		vnode		)
 		    __field(const char *,		where		)
-		    __field(pgoff_t,			page		)
+		    __field(pgoff_t,			index		)
 		    __field(unsigned long,		from		)
 		    __field(unsigned long,		to		)
 			     ),
 
 	    TP_fast_assign(
+		    unsigned long priv = (unsigned long)folio_get_private(folio);
 		    __entry->vnode = vnode;
 		    __entry->where = where;
-		    __entry->page = page->index;
-		    __entry->from = afs_page_dirty_from(page, page->private);
-		    __entry->to = afs_page_dirty_to(page, page->private);
-		    __entry->to |= (afs_is_page_dirty_mmapped(page->private) ?
-				    (1UL << (BITS_PER_LONG - 1)) : 0);
+		    __entry->index = folio_index(folio);
+		    __entry->from  = afs_folio_dirty_from(folio, priv);
+		    __entry->to    = afs_folio_dirty_to(folio, priv);
+		    __entry->to   |= (afs_is_folio_dirty_mmapped(priv) ?
+				      (1UL << (BITS_PER_LONG - 1)) : 0);
 			   ),
 
 	    TP_printk("vn=%p %lx %s %lx-%lx%s",
-		      __entry->vnode, __entry->page, __entry->where,
+		      __entry->vnode, __entry->index, __entry->where,
 		      __entry->from,
 		      __entry->to & ~(1UL << (BITS_PER_LONG - 1)),
 		      __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "")
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 4e881d9..f8cb916 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -570,9 +570,10 @@
 );
 
 TRACE_EVENT(f2fs_map_blocks,
-	TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret),
+	TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map,
+				int create, int flag, int ret),
 
-	TP_ARGS(inode, map, ret),
+	TP_ARGS(inode, map, create, flag, ret),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -583,11 +584,14 @@
 		__field(unsigned int,	m_flags)
 		__field(int,	m_seg_type)
 		__field(bool,	m_may_create)
+		__field(bool,	m_multidev_dio)
+		__field(int,	create)
+		__field(int,	flag)
 		__field(int,	ret)
 	),
 
 	TP_fast_assign(
-		__entry->dev		= inode->i_sb->s_dev;
+		__entry->dev		= map->m_bdev->bd_dev;
 		__entry->ino		= inode->i_ino;
 		__entry->m_lblk		= map->m_lblk;
 		__entry->m_pblk		= map->m_pblk;
@@ -595,12 +599,16 @@
 		__entry->m_flags	= map->m_flags;
 		__entry->m_seg_type	= map->m_seg_type;
 		__entry->m_may_create	= map->m_may_create;
+		__entry->m_multidev_dio	= map->m_multidev_dio;
+		__entry->create		= create;
+		__entry->flag		= flag;
 		__entry->ret		= ret;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
-		"start blkaddr = 0x%llx, len = 0x%llx, flags = %u,"
-		"seg_type = %d, may_create = %d, err = %d",
+		"start blkaddr = 0x%llx, len = 0x%llx, flags = %u, "
+		"seg_type = %d, may_create = %d, multidevice = %d, "
+		"create = %d, flag = %d, err = %d",
 		show_dev_ino(__entry),
 		(unsigned long long)__entry->m_lblk,
 		(unsigned long long)__entry->m_pblk,
@@ -608,6 +616,9 @@
 		__entry->m_flags,
 		__entry->m_seg_type,
 		__entry->m_may_create,
+		__entry->m_multidev_dio,
+		__entry->create,
+		__entry->flag,
 		__entry->ret)
 );
 
@@ -807,20 +818,20 @@
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(ino_t,	ino)
-		__field(const char *,	name)
+		__string(name,	dentry->d_name.name)
 		__field(unsigned int, flags)
 	),
 
 	TP_fast_assign(
 		__entry->dev	= dir->i_sb->s_dev;
 		__entry->ino	= dir->i_ino;
-		__entry->name	= dentry->d_name.name;
+		__assign_str(name, dentry->d_name.name);
 		__entry->flags	= flags;
 	),
 
 	TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u",
 		show_dev_ino(__entry),
-		__entry->name,
+		__get_str(name),
 		__entry->flags)
 );
 
@@ -834,7 +845,7 @@
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(ino_t,	ino)
-		__field(const char *,	name)
+		__string(name,	dentry->d_name.name)
 		__field(nid_t,	cino)
 		__field(int,	err)
 	),
@@ -842,14 +853,14 @@
 	TP_fast_assign(
 		__entry->dev	= dir->i_sb->s_dev;
 		__entry->ino	= dir->i_ino;
-		__entry->name	= dentry->d_name.name;
+		__assign_str(name, dentry->d_name.name);
 		__entry->cino	= ino;
 		__entry->err	= err;
 	),
 
 	TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d",
 		show_dev_ino(__entry),
-		__entry->name,
+		__get_str(name),
 		__entry->cino,
 		__entry->err)
 );
diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
index 3ba6331..c9048f3e 100644
--- a/include/trace/events/rpcgss.h
+++ b/include/trace/events/rpcgss.h
@@ -8,7 +8,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM rpcgss
 
-#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#if !defined(_TRACE_RPCGSS_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_RPCGSS_H
 
 #include <linux/tracepoint.h>
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 78f0719..1daa452 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1130,6 +1130,7 @@
 #define KVM_CAP_BINARY_STATS_FD 203
 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 #define KVM_CAP_ARM_MTE 205
+#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index f950bff..03e5b77 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -14,6 +14,7 @@
 
 #include <linux/const.h>
 #include <linux/ioctl.h>
+#include <linux/types.h>
 
 /*
  * The struct used to pass data via the following ioctl. Similar to the
@@ -66,6 +67,17 @@
 	long pll_clock;     /* base PLL frequency */
 };
 
+struct rtc_param {
+	__u64 param;
+	union {
+		__u64 uvalue;
+		__s64 svalue;
+		__u64 ptr;
+	};
+	__u32 index;
+	__u32 __pad;
+};
+
 /*
  * ioctl calls that are permitted to the /dev/rtc interface, if
  * any of the RTC drivers are enabled.
@@ -95,6 +107,9 @@
 #define RTC_PLL_GET	_IOR('p', 0x11, struct rtc_pll_info)  /* Get PLL correction */
 #define RTC_PLL_SET	_IOW('p', 0x12, struct rtc_pll_info)  /* Set PLL correction */
 
+#define RTC_PARAM_GET	_IOW('p', 0x13, struct rtc_param)  /* Get parameter */
+#define RTC_PARAM_SET	_IOW('p', 0x14, struct rtc_param)  /* Set parameter */
+
 #define RTC_VL_DATA_INVALID	_BITUL(0) /* Voltage too low, RTC data is invalid */
 #define RTC_VL_BACKUP_LOW	_BITUL(1) /* Backup voltage is low */
 #define RTC_VL_BACKUP_EMPTY	_BITUL(2) /* Backup empty or not present */
@@ -114,7 +129,21 @@
 #define RTC_FEATURE_ALARM		0
 #define RTC_FEATURE_ALARM_RES_MINUTE	1
 #define RTC_FEATURE_NEED_WEEK_DAY	2
-#define RTC_FEATURE_CNT			3
+#define RTC_FEATURE_ALARM_RES_2S	3
+#define RTC_FEATURE_UPDATE_INTERRUPT	4
+#define RTC_FEATURE_CORRECTION		5
+#define RTC_FEATURE_BACKUP_SWITCH_MODE	6
+#define RTC_FEATURE_CNT			7
+
+/* parameter list */
+#define RTC_PARAM_FEATURES		0
+#define RTC_PARAM_CORRECTION		1
+#define RTC_PARAM_BACKUP_SWITCH_MODE	2
+
+#define RTC_BSM_DISABLED	0
+#define RTC_BSM_DIRECT		1
+#define RTC_BSM_LEVEL		2
+#define RTC_BSM_STANDBY		3
 
 #define RTC_MAX_FREQ	8192
 
diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h
index 70e01c6..e9122f1 100644
--- a/include/uapi/linux/virtio_mem.h
+++ b/include/uapi/linux/virtio_mem.h
@@ -68,9 +68,10 @@
  * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
  *
  * There are no guarantees what will happen if unplugged memory is
- * read/written. Such memory should, in general, not be touched. E.g.,
- * even writing might succeed, but the values will simply be discarded at
- * random points in time.
+ * read/written. In general, unplugged memory should not be touched, because
+ * the resulting action is undefined. There is one exception: without
+ * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, unplugged memory inside the usable
+ * region can be read, to simplify creation of memory dumps.
  *
  * It can happen that the device cannot process a request, because it is
  * busy. The device driver has to retry later.
@@ -87,6 +88,8 @@
 
 /* node_id is an ACPI PXM and is valid */
 #define VIRTIO_MEM_F_ACPI_PXM		0
+/* unplugged memory must not be accessed */
+#define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE	1
 
 
 /* --- virtio-mem: guest -> host requests --- */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index b94074c..b13eb86 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -112,6 +112,7 @@
 	const char *name;       /* defaults to ids[0].devicetype */
 	const struct xenbus_device_id *ids;
 	bool allow_rebind; /* avoid setting xenstore closed during remove */
+	bool not_essential;     /* is not mandatory for boot progress */
 	int (*probe)(struct xenbus_device *dev,
 		     const struct xenbus_device_id *id);
 	void (*otherend_changed)(struct xenbus_device *dev,
diff --git a/init/Kconfig b/init/Kconfig
index 45bcaa8..4b7bac1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -885,6 +885,11 @@
 config CC_HAS_INT128
 	def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
 
+config CC_IMPLICIT_FALLTHROUGH
+	string
+	default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
+	default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
+
 #
 # For architectures that know their GCC __int128 support is sound
 #
diff --git a/init/Makefile b/init/Makefile
index 2846113..04eeee12 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -30,7 +30,7 @@
 quiet_cmd_compile.h = CHK     $@
       cmd_compile.h = \
 	$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@	\
-	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)"	\
+	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)"	\
 	"$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)"
 
 include/generated/compile.h: FORCE
diff --git a/ipc/shm.c b/ipc/shm.c
index 4942bdd..b3048eb 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -62,9 +62,18 @@
 	struct pid		*shm_lprid;
 	struct ucounts		*mlock_ucounts;
 
-	/* The task created the shm object.  NULL if the task is dead. */
+	/*
+	 * The task created the shm object, for
+	 * task_lock(shp->shm_creator)
+	 */
 	struct task_struct	*shm_creator;
-	struct list_head	shm_clist;	/* list by creator */
+
+	/*
+	 * List by creator. task_lock(->shm_creator) required for read/write.
+	 * If list_empty(), then the creator is dead already.
+	 */
+	struct list_head	shm_clist;
+	struct ipc_namespace	*ns;
 } __randomize_layout;
 
 /* shm_mode upper byte flags */
@@ -115,6 +124,7 @@
 	struct shmid_kernel *shp;
 
 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+	WARN_ON(ns != shp->ns);
 
 	if (shp->shm_nattch) {
 		shp->shm_perm.mode |= SHM_DEST;
@@ -225,10 +235,43 @@
 	kfree(shp);
 }
 
-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+/*
+ * It has to be called with shp locked.
+ * It must be called before ipc_rmid()
+ */
+static inline void shm_clist_rm(struct shmid_kernel *shp)
 {
-	list_del(&s->shm_clist);
-	ipc_rmid(&shm_ids(ns), &s->shm_perm);
+	struct task_struct *creator;
+
+	/* ensure that shm_creator does not disappear */
+	rcu_read_lock();
+
+	/*
+	 * A concurrent exit_shm may do a list_del_init() as well.
+	 * Just do nothing if exit_shm already did the work
+	 */
+	if (!list_empty(&shp->shm_clist)) {
+		/*
+		 * shp->shm_creator is guaranteed to be valid *only*
+		 * if shp->shm_clist is not empty.
+		 */
+		creator = shp->shm_creator;
+
+		task_lock(creator);
+		/*
+		 * list_del_init() is a nop if the entry was already removed
+		 * from the list.
+		 */
+		list_del_init(&shp->shm_clist);
+		task_unlock(creator);
+	}
+	rcu_read_unlock();
+}
+
+static inline void shm_rmid(struct shmid_kernel *s)
+{
+	shm_clist_rm(s);
+	ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
 }
 
 
@@ -283,7 +326,7 @@
 	shm_file = shp->shm_file;
 	shp->shm_file = NULL;
 	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid(ns, shp);
+	shm_rmid(shp);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shm_file))
 		shmem_lock(shm_file, 0, shp->mlock_ucounts);
@@ -303,10 +346,10 @@
  *
  * 2) sysctl kernel.shm_rmid_forced is set to 1.
  */
-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+static bool shm_may_destroy(struct shmid_kernel *shp)
 {
 	return (shp->shm_nattch == 0) &&
-	       (ns->shm_rmid_forced ||
+	       (shp->ns->shm_rmid_forced ||
 		(shp->shm_perm.mode & SHM_DEST));
 }
 
@@ -337,7 +380,7 @@
 	ipc_update_pid(&shp->shm_lprid, task_tgid(current));
 	shp->shm_dtim = ktime_get_real_seconds();
 	shp->shm_nattch--;
-	if (shm_may_destroy(ns, shp))
+	if (shm_may_destroy(shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
@@ -358,10 +401,10 @@
 	 *
 	 * As shp->* are changed under rwsem, it's safe to skip shp locking.
 	 */
-	if (shp->shm_creator != NULL)
+	if (!list_empty(&shp->shm_clist))
 		return 0;
 
-	if (shm_may_destroy(ns, shp)) {
+	if (shm_may_destroy(shp)) {
 		shm_lock_by_ptr(shp);
 		shm_destroy(ns, shp);
 	}
@@ -379,48 +422,97 @@
 /* Locking assumes this will only be called with task == current */
 void exit_shm(struct task_struct *task)
 {
-	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
-	struct shmid_kernel *shp, *n;
+	for (;;) {
+		struct shmid_kernel *shp;
+		struct ipc_namespace *ns;
 
-	if (list_empty(&task->sysvshm.shm_clist))
-		return;
+		task_lock(task);
 
-	/*
-	 * If kernel.shm_rmid_forced is not set then only keep track of
-	 * which shmids are orphaned, so that a later set of the sysctl
-	 * can clean them up.
-	 */
-	if (!ns->shm_rmid_forced) {
-		down_read(&shm_ids(ns).rwsem);
-		list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
-			shp->shm_creator = NULL;
-		/*
-		 * Only under read lock but we are only called on current
-		 * so no entry on the list will be shared.
-		 */
-		list_del(&task->sysvshm.shm_clist);
-		up_read(&shm_ids(ns).rwsem);
-		return;
-	}
-
-	/*
-	 * Destroy all already created segments, that were not yet mapped,
-	 * and mark any mapped as orphan to cover the sysctl toggling.
-	 * Destroy is skipped if shm_may_destroy() returns false.
-	 */
-	down_write(&shm_ids(ns).rwsem);
-	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
-		shp->shm_creator = NULL;
-
-		if (shm_may_destroy(ns, shp)) {
-			shm_lock_by_ptr(shp);
-			shm_destroy(ns, shp);
+		if (list_empty(&task->sysvshm.shm_clist)) {
+			task_unlock(task);
+			break;
 		}
-	}
 
-	/* Remove the list head from any segments still attached. */
-	list_del(&task->sysvshm.shm_clist);
-	up_write(&shm_ids(ns).rwsem);
+		shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
+				shm_clist);
+
+		/*
+		 * 1) Get pointer to the ipc namespace. It is worth to say
+		 * that this pointer is guaranteed to be valid because
+		 * shp lifetime is always shorter than namespace lifetime
+		 * in which shp lives.
+		 * We taken task_lock it means that shp won't be freed.
+		 */
+		ns = shp->ns;
+
+		/*
+		 * 2) If kernel.shm_rmid_forced is not set then only keep track of
+		 * which shmids are orphaned, so that a later set of the sysctl
+		 * can clean them up.
+		 */
+		if (!ns->shm_rmid_forced)
+			goto unlink_continue;
+
+		/*
+		 * 3) get a reference to the namespace.
+		 *    The refcount could be already 0. If it is 0, then
+		 *    the shm objects will be free by free_ipc_work().
+		 */
+		ns = get_ipc_ns_not_zero(ns);
+		if (!ns) {
+unlink_continue:
+			list_del_init(&shp->shm_clist);
+			task_unlock(task);
+			continue;
+		}
+
+		/*
+		 * 4) get a reference to shp.
+		 *   This cannot fail: shm_clist_rm() is called before
+		 *   ipc_rmid(), thus the refcount cannot be 0.
+		 */
+		WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
+
+		/*
+		 * 5) unlink the shm segment from the list of segments
+		 *    created by current.
+		 *    This must be done last. After unlinking,
+		 *    only the refcounts obtained above prevent IPC_RMID
+		 *    from destroying the segment or the namespace.
+		 */
+		list_del_init(&shp->shm_clist);
+
+		task_unlock(task);
+
+		/*
+		 * 6) we have all references
+		 *    Thus lock & if needed destroy shp.
+		 */
+		down_write(&shm_ids(ns).rwsem);
+		shm_lock_by_ptr(shp);
+		/*
+		 * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
+		 * safe to call ipc_rcu_putref here
+		 */
+		ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+
+		if (ipc_valid_object(&shp->shm_perm)) {
+			if (shm_may_destroy(shp))
+				shm_destroy(ns, shp);
+			else
+				shm_unlock(shp);
+		} else {
+			/*
+			 * Someone else deleted the shp from namespace
+			 * idr/kht while we have waited.
+			 * Just unlock and continue.
+			 */
+			shm_unlock(shp);
+		}
+
+		up_write(&shm_ids(ns).rwsem);
+		put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
+	}
 }
 
 static vm_fault_t shm_fault(struct vm_fault *vmf)
@@ -676,7 +768,11 @@
 	if (error < 0)
 		goto no_id;
 
+	shp->ns = ns;
+
+	task_lock(current);
 	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
+	task_unlock(current);
 
 	/*
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
@@ -1567,7 +1663,8 @@
 	down_write(&shm_ids(ns).rwsem);
 	shp = shm_lock(ns, shmid);
 	shp->shm_nattch--;
-	if (shm_may_destroy(ns, shp))
+
+	if (shm_may_destroy(shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
diff --git a/ipc/util.c b/ipc/util.c
index d48d8cf..fa2d86e 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -447,8 +447,8 @@
 static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
 	if (ipcp->key != IPC_PRIVATE)
-		rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
-				       ipc_kht_params);
+		WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
+				       ipc_kht_params));
 }
 
 /**
@@ -498,7 +498,7 @@
 {
 	int idx = ipcid_to_idx(ipcp->id);
 
-	idr_remove(&ids->ipcs_idr, idx);
+	WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp);
 	ipc_kht_remove(ids, ipcp);
 	ids->in_use--;
 	ipcp->deleted = true;
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 60f1bfc..ce77f02 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,12 +1,23 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+config PREEMPT_NONE_BUILD
+	bool
+
+config PREEMPT_VOLUNTARY_BUILD
+	bool
+
+config PREEMPT_BUILD
+	bool
+	select PREEMPTION
+	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
+
 choice
 	prompt "Preemption Model"
-	default PREEMPT_NONE_BEHAVIOUR
+	default PREEMPT_NONE
 
-config PREEMPT_NONE_BEHAVIOUR
+config PREEMPT_NONE
 	bool "No Forced Preemption (Server)"
-	select PREEMPT_NONE if !PREEMPT_DYNAMIC
+	select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC
 	help
 	  This is the traditional Linux preemption model, geared towards
 	  throughput. It will still provide good latencies most of the
@@ -18,10 +29,10 @@
 	  raw processing power of the kernel, irrespective of scheduling
 	  latencies.
 
-config PREEMPT_VOLUNTARY_BEHAVIOUR
+config PREEMPT_VOLUNTARY
 	bool "Voluntary Kernel Preemption (Desktop)"
 	depends on !ARCH_NO_PREEMPT
-	select PREEMPT_VOLUNTARY if !PREEMPT_DYNAMIC
+	select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC
 	help
 	  This option reduces the latency of the kernel by adding more
 	  "explicit preemption points" to the kernel code. These new
@@ -37,10 +48,10 @@
 
 	  Select this if you are building a kernel for a desktop system.
 
-config PREEMPT_BEHAVIOUR
+config PREEMPT
 	bool "Preemptible Kernel (Low-Latency Desktop)"
 	depends on !ARCH_NO_PREEMPT
-	select PREEMPT
+	select PREEMPT_BUILD
 	help
 	  This option reduces the latency of the kernel by making
 	  all kernel code (that is not executing in a critical section)
@@ -58,7 +69,7 @@
 
 config PREEMPT_RT
 	bool "Fully Preemptible Kernel (Real-Time)"
-	depends on EXPERT && ARCH_SUPPORTS_RT && !PREEMPT_DYNAMIC
+	depends on EXPERT && ARCH_SUPPORTS_RT
 	select PREEMPTION
 	help
 	  This option turns the kernel into a real-time kernel by replacing
@@ -75,17 +86,6 @@
 
 endchoice
 
-config PREEMPT_NONE
-	bool
-
-config PREEMPT_VOLUNTARY
-	bool
-
-config PREEMPT
-	bool
-	select PREEMPTION
-	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
-
 config PREEMPT_COUNT
        bool
 
@@ -95,8 +95,8 @@
 
 config PREEMPT_DYNAMIC
 	bool "Preemption behaviour defined on boot"
-	depends on HAVE_PREEMPT_DYNAMIC
-	select PREEMPT
+	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
+	select PREEMPT_BUILD
 	default y
 	help
 	  This option allows to define the preemption model on the kernel
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 2ca643a..43eb350 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1809,6 +1809,8 @@
 		return &bpf_sysctl_get_new_value_proto;
 	case BPF_FUNC_sysctl_set_new_value:
 		return &bpf_sysctl_set_new_value_proto;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	default:
 		return cgroup_base_func_proto(func_id, prog);
 	}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ffd469..649f076 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1364,8 +1364,6 @@
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
-	case BPF_FUNC_ktime_get_coarse_ns:
-		return &bpf_ktime_get_coarse_ns_proto;
 	case BPF_FUNC_ringbuf_output:
 		return &bpf_ringbuf_output_proto;
 	case BPF_FUNC_ringbuf_reserve:
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 50f96ea..1033ee8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -132,6 +132,21 @@
 	return map;
 }
 
+static void bpf_map_write_active_inc(struct bpf_map *map)
+{
+	atomic64_inc(&map->writecnt);
+}
+
+static void bpf_map_write_active_dec(struct bpf_map *map)
+{
+	atomic64_dec(&map->writecnt);
+}
+
+bool bpf_map_write_active(const struct bpf_map *map)
+{
+	return atomic64_read(&map->writecnt) != 0;
+}
+
 static u32 bpf_map_value_size(const struct bpf_map *map)
 {
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -601,11 +616,8 @@
 {
 	struct bpf_map *map = vma->vm_file->private_data;
 
-	if (vma->vm_flags & VM_MAYWRITE) {
-		mutex_lock(&map->freeze_mutex);
-		map->writecnt++;
-		mutex_unlock(&map->freeze_mutex);
-	}
+	if (vma->vm_flags & VM_MAYWRITE)
+		bpf_map_write_active_inc(map);
 }
 
 /* called for all unmapped memory region (including initial) */
@@ -613,11 +625,8 @@
 {
 	struct bpf_map *map = vma->vm_file->private_data;
 
-	if (vma->vm_flags & VM_MAYWRITE) {
-		mutex_lock(&map->freeze_mutex);
-		map->writecnt--;
-		mutex_unlock(&map->freeze_mutex);
-	}
+	if (vma->vm_flags & VM_MAYWRITE)
+		bpf_map_write_active_dec(map);
 }
 
 static const struct vm_operations_struct bpf_map_default_vmops = {
@@ -668,7 +677,7 @@
 		goto out;
 
 	if (vma->vm_flags & VM_MAYWRITE)
-		map->writecnt++;
+		bpf_map_write_active_inc(map);
 out:
 	mutex_unlock(&map->freeze_mutex);
 	return err;
@@ -1139,6 +1148,7 @@
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
+	bpf_map_write_active_inc(map);
 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
@@ -1174,6 +1184,7 @@
 free_key:
 	kvfree(key);
 err_put:
+	bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
@@ -1196,6 +1207,7 @@
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
+	bpf_map_write_active_inc(map);
 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
@@ -1226,6 +1238,7 @@
 out:
 	kvfree(key);
 err_put:
+	bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
@@ -1533,6 +1546,7 @@
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
+	bpf_map_write_active_inc(map);
 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
 	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
@@ -1597,6 +1611,7 @@
 free_key:
 	kvfree(key);
 err_put:
+	bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
@@ -1624,8 +1639,7 @@
 	}
 
 	mutex_lock(&map->freeze_mutex);
-
-	if (map->writecnt) {
+	if (bpf_map_write_active(map)) {
 		err = -EBUSY;
 		goto err_put;
 	}
@@ -4171,6 +4185,9 @@
 			    union bpf_attr __user *uattr,
 			    int cmd)
 {
+	bool has_read  = cmd == BPF_MAP_LOOKUP_BATCH ||
+			 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
+	bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
 	struct bpf_map *map;
 	int err, ufd;
 	struct fd f;
@@ -4183,16 +4200,13 @@
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-
-	if ((cmd == BPF_MAP_LOOKUP_BATCH ||
-	     cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
-	    !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+	if (has_write)
+		bpf_map_write_active_inc(map);
+	if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
 		err = -EPERM;
 		goto err_put;
 	}
-
-	if (cmd != BPF_MAP_LOOKUP_BATCH &&
-	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+	if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -4205,8 +4219,9 @@
 		BPF_DO_BATCH(map->ops->map_update_batch);
 	else
 		BPF_DO_BATCH(map->ops->map_delete_batch);
-
 err_put:
+	if (has_write)
+		bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 890b3ec..50efda5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1151,7 +1151,8 @@
 			/* transfer reg's id which is unique for every map_lookup_elem
 			 * as UID of the inner map.
 			 */
-			reg->map_uid = reg->id;
+			if (map_value_has_timer(map->inner_map_meta))
+				reg->map_uid = reg->id;
 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
 			reg->type = PTR_TO_XDP_SOCK;
 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
@@ -4055,7 +4056,22 @@
 
 static bool bpf_map_is_rdonly(const struct bpf_map *map)
 {
-	return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+	/* A map is considered read-only if the following condition are true:
+	 *
+	 * 1) BPF program side cannot change any of the map content. The
+	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
+	 *    and was set at map creation time.
+	 * 2) The map value(s) have been initialized from user space by a
+	 *    loader and then "frozen", such that no new map update/delete
+	 *    operations from syscall side are possible for the rest of
+	 *    the map's lifetime from that point onwards.
+	 * 3) Any parallel/pending map update/delete operations from syscall
+	 *    side have been completed. Only after that point, it's safe to
+	 *    assume that map value(s) are immutable.
+	 */
+	return (map->map_flags & BPF_F_RDONLY_PROG) &&
+	       READ_ONCE(map->frozen) &&
+	       !bpf_map_write_active(map);
 }
 
 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
@@ -11631,6 +11647,13 @@
 		}
 	}
 
+	if (map_value_has_timer(map)) {
+		if (is_tracing_prog_type(prog_type)) {
+			verbose(env, "tracing progs cannot use bpf_timer yet\n");
+			return -EINVAL;
+		}
+	}
+
 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
 	    !bpf_offload_prog_map_match(prog, map)) {
 		verbose(env, "offload device mismatch between prog and map\n");
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 192e43a..407a256 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -31,6 +31,7 @@
 #include <linux/smpboot.h>
 #include <linux/relay.h>
 #include <linux/slab.h>
+#include <linux/scs.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/cpuset.h>
 
@@ -588,6 +589,12 @@
 	int ret;
 
 	/*
+	 * Reset stale stack state from the last time this CPU was online.
+	 */
+	scs_task_reset(idle);
+	kasan_unpoison_task_stack(idle);
+
+	/*
 	 * Some architectures have to walk the irq descriptors to
 	 * setup the vector space for the cpu which comes online.
 	 * Prevent irq alloc/free across the bringup.
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index 4508201..0b6379a 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -48,7 +48,7 @@
 		 * the selector is loaded by userspace.
 		 */
 		if (unlikely(__get_user(state, sd->selector))) {
-			force_fatal_sig(SIGSEGV);
+			force_exit_sig(SIGSEGV);
 			return true;
 		}
 
@@ -56,7 +56,7 @@
 			return false;
 
 		if (state != SYSCALL_DISPATCH_FILTER_BLOCK) {
-			force_fatal_sig(SIGSYS);
+			force_exit_sig(SIGSYS);
 			return true;
 		}
 	}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f2253ea..30d94f6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7154,7 +7154,6 @@
 static u64 perf_virt_to_phys(u64 virt)
 {
 	u64 phys_addr = 0;
-	struct page *p = NULL;
 
 	if (!virt)
 		return 0;
@@ -7173,14 +7172,15 @@
 		 * If failed, leave phys_addr as 0.
 		 */
 		if (current->mm != NULL) {
+			struct page *p;
+
 			pagefault_disable();
-			if (get_user_page_fast_only(virt, 0, &p))
+			if (get_user_page_fast_only(virt, 0, &p)) {
 				phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+				put_page(p);
+			}
 			pagefault_enable();
 		}
-
-		if (p)
-			put_page(p);
 	}
 
 	return phys_addr;
@@ -9759,6 +9759,9 @@
 				continue;
 			if (event->attr.config != entry->type)
 				continue;
+			/* Cannot deliver synchronous signal to other task. */
+			if (event->attr.sigtrap)
+				continue;
 			if (perf_tp_event_match(event, &data, regs))
 				perf_swevent_event(event, count, &data, regs);
 		}
diff --git a/kernel/fork.c b/kernel/fork.c
index 5de23f3..3244cc5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2277,6 +2277,7 @@
 	p->pdeath_signal = 0;
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
+	clear_posix_cputimers_work(p);
 
 #ifdef CONFIG_KRETPROBES
 	p->kretprobe_instances.first = NULL;
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 6a5ecee..7f350ae 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -529,10 +529,10 @@
 
 	/*
 	 * Checking the first MSI descriptor is sufficient. MSIX supports
-	 * masking and MSI does so when the maskbit is set.
+	 * masking and MSI does so when the can_mask attribute is set.
 	 */
 	desc = first_msi_entry(dev);
-	return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
+	return desc->msi_attrib.is_msix || desc->msi_attrib.can_mask;
 }
 
 int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index c51387a..04a74d0 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -105,9 +105,9 @@
  * atomic_long_cmpxchg() will be used to obtain writer lock.
  *
  * There are three places where the lock handoff bit may be set or cleared.
- * 1) rwsem_mark_wake() for readers.
- * 2) rwsem_try_write_lock() for writers.
- * 3) Error path of rwsem_down_write_slowpath().
+ * 1) rwsem_mark_wake() for readers		-- set, clear
+ * 2) rwsem_try_write_lock() for writers	-- set, clear
+ * 3) rwsem_del_waiter()			-- clear
  *
  * For all the above cases, wait_lock will be held. A writer must also
  * be the first one in the wait_list to be eligible for setting the handoff
@@ -334,6 +334,9 @@
 	struct task_struct *task;
 	enum rwsem_waiter_type type;
 	unsigned long timeout;
+
+	/* Writer only, not initialized in reader */
+	bool handoff_set;
 };
 #define rwsem_first_waiter(sem) \
 	list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
@@ -344,12 +347,6 @@
 	RWSEM_WAKE_READ_OWNED	/* Waker thread holds the read lock */
 };
 
-enum writer_wait_state {
-	WRITER_NOT_FIRST,	/* Writer is not first in wait list */
-	WRITER_FIRST,		/* Writer is first in wait list     */
-	WRITER_HANDOFF		/* Writer is first & handoff needed */
-};
-
 /*
  * The typical HZ value is either 250 or 1000. So set the minimum waiting
  * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
@@ -365,6 +362,31 @@
  */
 #define MAX_READERS_WAKEUP	0x100
 
+static inline void
+rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+{
+	lockdep_assert_held(&sem->wait_lock);
+	list_add_tail(&waiter->list, &sem->wait_list);
+	/* caller will set RWSEM_FLAG_WAITERS */
+}
+
+/*
+ * Remove a waiter from the wait_list and clear flags.
+ *
+ * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
+ * this function. Modify with care.
+ */
+static inline void
+rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+{
+	lockdep_assert_held(&sem->wait_lock);
+	list_del(&waiter->list);
+	if (likely(!list_empty(&sem->wait_list)))
+		return;
+
+	atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
+}
+
 /*
  * handle the lock release when processes blocked on it that can now run
  * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
@@ -376,6 +398,8 @@
  *   preferably when the wait_lock is released
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only marked woken if downgrading is false
+ *
+ * Implies rwsem_del_waiter() for all woken readers.
  */
 static void rwsem_mark_wake(struct rw_semaphore *sem,
 			    enum rwsem_wake_type wake_type,
@@ -490,17 +514,24 @@
 
 	adjustment = woken * RWSEM_READER_BIAS - adjustment;
 	lockevent_cond_inc(rwsem_wake_reader, woken);
-	if (list_empty(&sem->wait_list)) {
-		/* hit end of list above */
-		adjustment -= RWSEM_FLAG_WAITERS;
-	}
 
-	/*
-	 * When we've woken a reader, we no longer need to force writers
-	 * to give up the lock and we can clear HANDOFF.
-	 */
-	if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
-		adjustment -= RWSEM_FLAG_HANDOFF;
+	oldcount = atomic_long_read(&sem->count);
+	if (list_empty(&sem->wait_list)) {
+		/*
+		 * Combined with list_move_tail() above, this implies
+		 * rwsem_del_waiter().
+		 */
+		adjustment -= RWSEM_FLAG_WAITERS;
+		if (oldcount & RWSEM_FLAG_HANDOFF)
+			adjustment -= RWSEM_FLAG_HANDOFF;
+	} else if (woken) {
+		/*
+		 * When we've woken a reader, we no longer need to force
+		 * writers to give up the lock and we can clear HANDOFF.
+		 */
+		if (oldcount & RWSEM_FLAG_HANDOFF)
+			adjustment -= RWSEM_FLAG_HANDOFF;
+	}
 
 	if (adjustment)
 		atomic_long_add(adjustment, &sem->count);
@@ -532,12 +563,12 @@
  * race conditions between checking the rwsem wait list and setting the
  * sem->count accordingly.
  *
- * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
- * bit is set or the lock is acquired with handoff bit cleared.
+ * Implies rwsem_del_waiter() on success.
  */
 static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
-					enum writer_wait_state wstate)
+					struct rwsem_waiter *waiter)
 {
+	bool first = rwsem_first_waiter(sem) == waiter;
 	long count, new;
 
 	lockdep_assert_held(&sem->wait_lock);
@@ -546,13 +577,19 @@
 	do {
 		bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
 
-		if (has_handoff && wstate == WRITER_NOT_FIRST)
-			return false;
+		if (has_handoff) {
+			if (!first)
+				return false;
+
+			/* First waiter inherits a previously set handoff bit */
+			waiter->handoff_set = true;
+		}
 
 		new = count;
 
 		if (count & RWSEM_LOCK_MASK) {
-			if (has_handoff || (wstate != WRITER_HANDOFF))
+			if (has_handoff || (!rt_task(waiter->task) &&
+					    !time_after(jiffies, waiter->timeout)))
 				return false;
 
 			new |= RWSEM_FLAG_HANDOFF;
@@ -569,9 +606,17 @@
 	 * We have either acquired the lock with handoff bit cleared or
 	 * set the handoff bit.
 	 */
-	if (new & RWSEM_FLAG_HANDOFF)
+	if (new & RWSEM_FLAG_HANDOFF) {
+		waiter->handoff_set = true;
+		lockevent_inc(rwsem_wlock_handoff);
 		return false;
+	}
 
+	/*
+	 * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
+	 * success.
+	 */
+	list_del(&waiter->list);
 	rwsem_set_owner(sem);
 	return true;
 }
@@ -956,7 +1001,7 @@
 		}
 		adjustment += RWSEM_FLAG_WAITERS;
 	}
-	list_add_tail(&waiter.list, &sem->wait_list);
+	rwsem_add_waiter(sem, &waiter);
 
 	/* we're now waiting on the lock, but no longer actively locking */
 	count = atomic_long_add_return(adjustment, &sem->count);
@@ -1002,11 +1047,7 @@
 	return sem;
 
 out_nolock:
-	list_del(&waiter.list);
-	if (list_empty(&sem->wait_list)) {
-		atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
-				   &sem->count);
-	}
+	rwsem_del_waiter(sem, &waiter);
 	raw_spin_unlock_irq(&sem->wait_lock);
 	__set_current_state(TASK_RUNNING);
 	lockevent_inc(rwsem_rlock_fail);
@@ -1020,9 +1061,7 @@
 rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 {
 	long count;
-	enum writer_wait_state wstate;
 	struct rwsem_waiter waiter;
-	struct rw_semaphore *ret = sem;
 	DEFINE_WAKE_Q(wake_q);
 
 	/* do optimistic spinning and steal lock if possible */
@@ -1038,16 +1077,13 @@
 	waiter.task = current;
 	waiter.type = RWSEM_WAITING_FOR_WRITE;
 	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+	waiter.handoff_set = false;
 
 	raw_spin_lock_irq(&sem->wait_lock);
-
-	/* account for this before adding a new element to the list */
-	wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
-
-	list_add_tail(&waiter.list, &sem->wait_list);
+	rwsem_add_waiter(sem, &waiter);
 
 	/* we're now waiting on the lock */
-	if (wstate == WRITER_NOT_FIRST) {
+	if (rwsem_first_waiter(sem) != &waiter) {
 		count = atomic_long_read(&sem->count);
 
 		/*
@@ -1083,13 +1119,16 @@
 	/* wait until we successfully acquire the lock */
 	set_current_state(state);
 	for (;;) {
-		if (rwsem_try_write_lock(sem, wstate)) {
+		if (rwsem_try_write_lock(sem, &waiter)) {
 			/* rwsem_try_write_lock() implies ACQUIRE on success */
 			break;
 		}
 
 		raw_spin_unlock_irq(&sem->wait_lock);
 
+		if (signal_pending_state(state, current))
+			goto out_nolock;
+
 		/*
 		 * After setting the handoff bit and failing to acquire
 		 * the lock, attempt to spin on owner to accelerate lock
@@ -1098,7 +1137,7 @@
 		 * In this case, we attempt to acquire the lock again
 		 * without sleeping.
 		 */
-		if (wstate == WRITER_HANDOFF) {
+		if (waiter.handoff_set) {
 			enum owner_state owner_state;
 
 			preempt_disable();
@@ -1109,66 +1148,26 @@
 				goto trylock_again;
 		}
 
-		/* Block until there are no active lockers. */
-		for (;;) {
-			if (signal_pending_state(state, current))
-				goto out_nolock;
-
-			schedule();
-			lockevent_inc(rwsem_sleep_writer);
-			set_current_state(state);
-			/*
-			 * If HANDOFF bit is set, unconditionally do
-			 * a trylock.
-			 */
-			if (wstate == WRITER_HANDOFF)
-				break;
-
-			if ((wstate == WRITER_NOT_FIRST) &&
-			    (rwsem_first_waiter(sem) == &waiter))
-				wstate = WRITER_FIRST;
-
-			count = atomic_long_read(&sem->count);
-			if (!(count & RWSEM_LOCK_MASK))
-				break;
-
-			/*
-			 * The setting of the handoff bit is deferred
-			 * until rwsem_try_write_lock() is called.
-			 */
-			if ((wstate == WRITER_FIRST) && (rt_task(current) ||
-			    time_after(jiffies, waiter.timeout))) {
-				wstate = WRITER_HANDOFF;
-				lockevent_inc(rwsem_wlock_handoff);
-				break;
-			}
-		}
+		schedule();
+		lockevent_inc(rwsem_sleep_writer);
+		set_current_state(state);
 trylock_again:
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
 	__set_current_state(TASK_RUNNING);
-	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
 	lockevent_inc(rwsem_wlock);
-
-	return ret;
+	return sem;
 
 out_nolock:
 	__set_current_state(TASK_RUNNING);
 	raw_spin_lock_irq(&sem->wait_lock);
-	list_del(&waiter.list);
-
-	if (unlikely(wstate == WRITER_HANDOFF))
-		atomic_long_add(-RWSEM_FLAG_HANDOFF,  &sem->count);
-
-	if (list_empty(&sem->wait_list))
-		atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
-	else
+	rwsem_del_waiter(sem, &waiter);
+	if (!list_empty(&sem->wait_list))
 		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 	raw_spin_unlock_irq(&sem->wait_lock);
 	wake_up_q(&wake_q);
 	lockevent_inc(rwsem_wlock_fail);
-
 	return ERR_PTR(-EINTR);
 }
 
@@ -1249,17 +1248,14 @@
 
 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
 
-	/*
-	 * Optimize for the case when the rwsem is not locked at all.
-	 */
-	tmp = RWSEM_UNLOCKED_VALUE;
-	do {
+	tmp = atomic_long_read(&sem->count);
+	while (!(tmp & RWSEM_READ_FAILED_MASK)) {
 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
-					tmp + RWSEM_READER_BIAS)) {
+						    tmp + RWSEM_READER_BIAS)) {
 			rwsem_set_reader_owned(sem);
 			return 1;
 		}
-	} while (!(tmp & RWSEM_READ_FAILED_MASK));
+	}
 	return 0;
 }
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 9ed9b74..e6af502 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -693,7 +693,7 @@
 		goto Unlock;
 
 	error = swsusp_read(&flags);
-	swsusp_close(FMODE_READ);
+	swsusp_close(FMODE_READ | FMODE_EXCL);
 	if (!error)
 		error = hibernation_restore(flags & SF_PLATFORM_MODE);
 
@@ -983,7 +983,7 @@
 	/* The snapshot device should not be opened while we're running */
 	if (!hibernate_acquire()) {
 		error = -EBUSY;
-		swsusp_close(FMODE_READ);
+		swsusp_close(FMODE_READ | FMODE_EXCL);
 		goto Unlock;
 	}
 
@@ -1018,7 +1018,7 @@
 	pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
 	return error;
  Close_Finish:
-	swsusp_close(FMODE_READ);
+	swsusp_close(FMODE_READ | FMODE_EXCL);
 	goto Finish;
 }
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 740723b..ad241b4 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -177,7 +177,7 @@
 		if (res <= 0)
 			goto unlock;
 	} else {
-		res = PAGE_SIZE - pg_offp;
+		res = PAGE_SIZE;
 	}
 
 	if (!data_of(data->handle)) {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 013bfd6..57b132b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3253,6 +3253,11 @@
 	preempt_enable();
 }
 
+void printk_trigger_flush(void)
+{
+	defer_console_output();
+}
+
 int vprintk_deferred(const char *fmt, va_list args)
 {
 	int r;
diff --git a/kernel/reboot.c b/kernel/reboot.c
index d6e0f9f..6bcc5d6 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -33,6 +33,7 @@
 #define DEFAULT_REBOOT_MODE
 #endif
 enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
+EXPORT_SYMBOL_GPL(reboot_mode);
 enum reboot_mode panic_reboot_mode = REBOOT_UNDEFINED;
 
 /*
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 2067080..8629b37 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -31,7 +31,7 @@
 	ag->tg->rt_se = NULL;
 	ag->tg->rt_rq = NULL;
 #endif
-	sched_offline_group(ag->tg);
+	sched_release_group(ag->tg);
 	sched_destroy_group(ag->tg);
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 523fd60..76f9dee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3726,6 +3726,9 @@
 
 bool cpus_share_cache(int this_cpu, int that_cpu)
 {
+	if (this_cpu == that_cpu)
+		return true;
+
 	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
 }
 
@@ -6625,13 +6628,13 @@
 static void __init preempt_dynamic_init(void)
 {
 	if (preempt_dynamic_mode == preempt_dynamic_undefined) {
-		if (IS_ENABLED(CONFIG_PREEMPT_NONE_BEHAVIOUR)) {
+		if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
 			sched_dynamic_update(preempt_dynamic_none);
-		} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY_BEHAVIOUR)) {
+		} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
 			sched_dynamic_update(preempt_dynamic_voluntary);
 		} else {
 			/* Default static call setting, nothing to do */
-			WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_BEHAVIOUR));
+			WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
 			preempt_dynamic_mode = preempt_dynamic_full;
 			pr_info("Dynamic Preempt: full\n");
 		}
@@ -8616,9 +8619,6 @@
 	idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY;
 	kthread_set_per_cpu(idle, cpu);
 
-	scs_task_reset(idle);
-	kasan_unpoison_task_stack(idle);
-
 #ifdef CONFIG_SMP
 	/*
 	 * It's possible that init_idle() gets called multiple times on a task,
@@ -8774,7 +8774,6 @@
 		finish_arch_post_lock_switch();
 	}
 
-	scs_task_reset(current);
 	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
 }
 
@@ -9716,6 +9715,22 @@
 	kmem_cache_free(task_group_cache, tg);
 }
 
+static void sched_free_group_rcu(struct rcu_head *rcu)
+{
+	sched_free_group(container_of(rcu, struct task_group, rcu));
+}
+
+static void sched_unregister_group(struct task_group *tg)
+{
+	unregister_fair_sched_group(tg);
+	unregister_rt_sched_group(tg);
+	/*
+	 * We have to wait for yet another RCU grace period to expire, as
+	 * print_cfs_stats() might run concurrently.
+	 */
+	call_rcu(&tg->rcu, sched_free_group_rcu);
+}
+
 /* allocate runqueue etc for a new task group */
 struct task_group *sched_create_group(struct task_group *parent)
 {
@@ -9759,25 +9774,35 @@
 }
 
 /* rcu callback to free various structures associated with a task group */
-static void sched_free_group_rcu(struct rcu_head *rhp)
+static void sched_unregister_group_rcu(struct rcu_head *rhp)
 {
 	/* Now it should be safe to free those cfs_rqs: */
-	sched_free_group(container_of(rhp, struct task_group, rcu));
+	sched_unregister_group(container_of(rhp, struct task_group, rcu));
 }
 
 void sched_destroy_group(struct task_group *tg)
 {
 	/* Wait for possible concurrent references to cfs_rqs complete: */
-	call_rcu(&tg->rcu, sched_free_group_rcu);
+	call_rcu(&tg->rcu, sched_unregister_group_rcu);
 }
 
-void sched_offline_group(struct task_group *tg)
+void sched_release_group(struct task_group *tg)
 {
 	unsigned long flags;
 
-	/* End participation in shares distribution: */
-	unregister_fair_sched_group(tg);
-
+	/*
+	 * Unlink first, to avoid walk_tg_tree_from() from finding us (via
+	 * sched_cfs_period_timer()).
+	 *
+	 * For this to be effective, we have to wait for all pending users of
+	 * this task group to leave their RCU critical section to ensure no new
+	 * user will see our dying task group any more. Specifically ensure
+	 * that tg_unthrottle_up() won't add decayed cfs_rq's to it.
+	 *
+	 * We therefore defer calling unregister_fair_sched_group() to
+	 * sched_unregister_group() which is guarantied to get called only after the
+	 * current RCU grace period has expired.
+	 */
 	spin_lock_irqsave(&task_group_lock, flags);
 	list_del_rcu(&tg->list);
 	list_del_rcu(&tg->siblings);
@@ -9896,7 +9921,7 @@
 {
 	struct task_group *tg = css_tg(css);
 
-	sched_offline_group(tg);
+	sched_release_group(tg);
 }
 
 static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
@@ -9906,7 +9931,7 @@
 	/*
 	 * Relies on the RCU grace period between css_released() and this.
 	 */
-	sched_free_group(tg);
+	sched_unregister_group(tg);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 13950be..6e476f6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11456,8 +11456,6 @@
 {
 	int i;
 
-	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
-
 	for_each_possible_cpu(i) {
 		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
@@ -11534,6 +11532,8 @@
 	struct rq *rq;
 	int cpu;
 
+	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
+
 	for_each_possible_cpu(cpu) {
 		if (tg->se[cpu])
 			remove_entity_load_avg(tg->se[cpu]);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index bb945f8..b48baab 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -137,13 +137,17 @@
 	return rt_rq->rq;
 }
 
+void unregister_rt_sched_group(struct task_group *tg)
+{
+	if (tg->rt_se)
+		destroy_rt_bandwidth(&tg->rt_bandwidth);
+
+}
+
 void free_rt_sched_group(struct task_group *tg)
 {
 	int i;
 
-	if (tg->rt_se)
-		destroy_rt_bandwidth(&tg->rt_bandwidth);
-
 	for_each_possible_cpu(i) {
 		if (tg->rt_rq)
 			kfree(tg->rt_rq[i]);
@@ -250,6 +254,8 @@
 	return &rq->rt;
 }
 
+void unregister_rt_sched_group(struct task_group *tg) { }
+
 void free_rt_sched_group(struct task_group *tg) { }
 
 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7f1612d..0e66749 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -488,6 +488,7 @@
 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
 extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
 
+extern void unregister_rt_sched_group(struct task_group *tg);
 extern void free_rt_sched_group(struct task_group *tg);
 extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
 extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
@@ -503,7 +504,7 @@
 extern void sched_online_group(struct task_group *tg,
 			       struct task_group *parent);
 extern void sched_destroy_group(struct task_group *tg);
-extern void sched_offline_group(struct task_group *tg);
+extern void sched_release_group(struct task_group *tg);
 
 extern void sched_move_task(struct task_struct *tsk);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 7c4b7ae..a629b11 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1298,6 +1298,12 @@
 	return ret;
 }
 
+enum sig_handler {
+	HANDLER_CURRENT, /* If reachable use the current handler */
+	HANDLER_SIG_DFL, /* Always use SIG_DFL handler semantics */
+	HANDLER_EXIT,	 /* Only visible as the process exit code */
+};
+
 /*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1310,7 +1316,8 @@
  * that is why we also clear SIGNAL_UNKILLABLE.
  */
 static int
-force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool sigdfl)
+force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
+	enum sig_handler handler)
 {
 	unsigned long int flags;
 	int ret, blocked, ignored;
@@ -1321,9 +1328,10 @@
 	action = &t->sighand->action[sig-1];
 	ignored = action->sa.sa_handler == SIG_IGN;
 	blocked = sigismember(&t->blocked, sig);
-	if (blocked || ignored || sigdfl) {
+	if (blocked || ignored || (handler != HANDLER_CURRENT)) {
 		action->sa.sa_handler = SIG_DFL;
-		action->sa.sa_flags |= SA_IMMUTABLE;
+		if (handler == HANDLER_EXIT)
+			action->sa.sa_flags |= SA_IMMUTABLE;
 		if (blocked) {
 			sigdelset(&t->blocked, sig);
 			recalc_sigpending_and_wake(t);
@@ -1343,7 +1351,7 @@
 
 int force_sig_info(struct kernel_siginfo *info)
 {
-	return force_sig_info_to_task(info, current, false);
+	return force_sig_info_to_task(info, current, HANDLER_CURRENT);
 }
 
 /*
@@ -1660,7 +1668,20 @@
 	info.si_code = SI_KERNEL;
 	info.si_pid = 0;
 	info.si_uid = 0;
-	force_sig_info_to_task(&info, current, true);
+	force_sig_info_to_task(&info, current, HANDLER_SIG_DFL);
+}
+
+void force_exit_sig(int sig)
+{
+	struct kernel_siginfo info;
+
+	clear_siginfo(&info);
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = 0;
+	info.si_uid = 0;
+	force_sig_info_to_task(&info, current, HANDLER_EXIT);
 }
 
 /*
@@ -1693,7 +1714,7 @@
 	info.si_flags = flags;
 	info.si_isr = isr;
 #endif
-	return force_sig_info_to_task(&info, t, false);
+	return force_sig_info_to_task(&info, t, HANDLER_CURRENT);
 }
 
 int force_sig_fault(int sig, int code, void __user *addr
@@ -1813,7 +1834,8 @@
 	info.si_errno = reason;
 	info.si_arch = syscall_get_arch(current);
 	info.si_syscall = syscall;
-	return force_sig_info_to_task(&info, current, force_coredump);
+	return force_sig_info_to_task(&info, current,
+		force_coredump ? HANDLER_EXIT : HANDLER_CURRENT);
 }
 
 /* For the crazy architectures that include trap information in
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 643d412..96b4e78 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1159,13 +1159,28 @@
 }
 
 /*
+ * Clear existing posix CPU timers task work.
+ */
+void clear_posix_cputimers_work(struct task_struct *p)
+{
+	/*
+	 * A copied work entry from the old task is not meaningful, clear it.
+	 * N.B. init_task_work will not do this.
+	 */
+	memset(&p->posix_cputimers_work.work, 0,
+	       sizeof(p->posix_cputimers_work.work));
+	init_task_work(&p->posix_cputimers_work.work,
+		       posix_cpu_timers_work);
+	p->posix_cputimers_work.scheduled = false;
+}
+
+/*
  * Initialize posix CPU timers task work in init task. Out of line to
  * keep the callback static and to avoid header recursion hell.
  */
 void __init posix_cputimers_init_work(void)
 {
-	init_task_work(&current->posix_cputimers_work.work,
-		       posix_cpu_timers_work);
+	clear_posix_cputimers_work(current);
 }
 
 /*
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7396488..ae97550 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1111,8 +1111,6 @@
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
-	case BPF_FUNC_ktime_get_coarse_ns:
-		return &bpf_ktime_get_coarse_ns_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_current_pid_tgid:
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f9139dc..88de94d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3812,6 +3812,18 @@
 		iter->fmt[i] = '\0';
 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
 
+		/*
+		 * If iter->seq is full, the above call no longer guarantees
+		 * that ap is in sync with fmt processing, and further calls
+		 * to va_arg() can return wrong positional arguments.
+		 *
+		 * Ensure that ap is no longer used in this case.
+		 */
+		if (iter->seq.full) {
+			p = "";
+			break;
+		}
+
 		if (star)
 			len = va_arg(ap, int);
 
@@ -6706,9 +6718,7 @@
 		cnt = PAGE_SIZE - 1;
 
 	/* reset all but tr, trace, and overruns */
-	memset(&iter->seq, 0,
-	       sizeof(struct trace_iterator) -
-	       offsetof(struct trace_iterator, seq));
+	memset_startat(iter, 0, seq);
 	cpumask_clear(iter->started);
 	trace_seq_init(&iter->seq);
 	iter->pos = -1;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6b60ab9..38715aa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1366,14 +1366,26 @@
 	if (eflags & EVENT_FILE_FL_TRIGGER_COND)
 		*tt = event_triggers_call(file, buffer, entry, event);
 
-	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
-	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
-	     !filter_match_preds(file->filter, entry))) {
-		__trace_event_discard_commit(buffer, event);
-		return true;
-	}
+	if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED |
+				    EVENT_FILE_FL_FILTERED |
+				    EVENT_FILE_FL_PID_FILTER))))
+		return false;
+
+	if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
+		goto discard;
+
+	if (file->flags & EVENT_FILE_FL_FILTERED &&
+	    !filter_match_preds(file->filter, entry))
+		goto discard;
+
+	if ((file->flags & EVENT_FILE_FL_PID_FILTER) &&
+	    trace_event_ignore_this_pid(file))
+		goto discard;
 
 	return false;
+ discard:
+	__trace_event_discard_commit(buffer, event);
+	return true;
 }
 
 /**
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4021b9a..92be9cb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2678,12 +2678,24 @@
 trace_create_new_event(struct trace_event_call *call,
 		       struct trace_array *tr)
 {
+	struct trace_pid_list *no_pid_list;
+	struct trace_pid_list *pid_list;
 	struct trace_event_file *file;
+	unsigned int first;
 
 	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
 	if (!file)
 		return NULL;
 
+	pid_list = rcu_dereference_protected(tr->filtered_pids,
+					     lockdep_is_held(&event_mutex));
+	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+					     lockdep_is_held(&event_mutex));
+
+	if (!trace_pid_list_first(pid_list, &first) ||
+	    !trace_pid_list_first(no_pid_list, &first))
+		file->flags |= EVENT_FILE_FL_PID_FILTER;
+
 	file->event_call = call;
 	file->tr = tr;
 	atomic_set(&file->sm_ref, 0);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 0abc9a4..9555b8e 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1953,9 +1953,10 @@
 		if (!hist_field->type)
 			goto free;
 
-		if (field->filter_type == FILTER_STATIC_STRING)
+		if (field->filter_type == FILTER_STATIC_STRING) {
 			hist_field->fn = hist_field_string;
-		else if (field->filter_type == FILTER_DYN_STRING)
+			hist_field->size = field->size;
+		} else if (field->filter_type == FILTER_DYN_STRING)
 			hist_field->fn = hist_field_dynstring;
 		else
 			hist_field->fn = hist_field_pstring;
@@ -2575,27 +2576,27 @@
 
 	/* Split the expression string at the root operator */
 	if (!sep)
-		goto free;
+		return ERR_PTR(-EINVAL);
+
 	*sep = '\0';
 	operand1_str = str;
 	str = sep+1;
 
-	if (!operand1_str || !str)
-		goto free;
+	/* Binary operator requires both operands */
+	if (*operand1_str == '\0' || *str == '\0')
+		return ERR_PTR(-EINVAL);
 
 	operand_flags = 0;
 
 	/* LHS of string is an expression e.g. a+b in a+b+c */
 	operand1 = parse_expr(hist_data, file, operand1_str, operand_flags, NULL, n_subexprs);
-	if (IS_ERR(operand1)) {
-		ret = PTR_ERR(operand1);
-		operand1 = NULL;
-		goto free;
-	}
+	if (IS_ERR(operand1))
+		return ERR_CAST(operand1);
+
 	if (operand1->flags & HIST_FIELD_FL_STRING) {
 		hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str));
 		ret = -EINVAL;
-		goto free;
+		goto free_op1;
 	}
 
 	/* RHS of string is another expression e.g. c in a+b+c */
@@ -2603,13 +2604,12 @@
 	operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs);
 	if (IS_ERR(operand2)) {
 		ret = PTR_ERR(operand2);
-		operand2 = NULL;
-		goto free;
+		goto free_op1;
 	}
 	if (operand2->flags & HIST_FIELD_FL_STRING) {
 		hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
 		ret = -EINVAL;
-		goto free;
+		goto free_operands;
 	}
 
 	switch (field_op) {
@@ -2627,12 +2627,12 @@
 		break;
 	default:
 		ret = -EINVAL;
-		goto free;
+		goto free_operands;
 	}
 
 	ret = check_expr_operands(file->tr, operand1, operand2, &var1, &var2);
 	if (ret)
-		goto free;
+		goto free_operands;
 
 	operand_flags = var1 ? var1->flags : operand1->flags;
 	operand2_flags = var2 ? var2->flags : operand2->flags;
@@ -2651,12 +2651,13 @@
 	expr = create_hist_field(hist_data, NULL, flags, var_name);
 	if (!expr) {
 		ret = -ENOMEM;
-		goto free;
+		goto free_operands;
 	}
 
 	operand1->read_once = true;
 	operand2->read_once = true;
 
+	/* The operands are now owned and free'd by 'expr' */
 	expr->operands[0] = operand1;
 	expr->operands[1] = operand2;
 
@@ -2667,7 +2668,7 @@
 		if (!divisor) {
 			hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
 			ret = -EDOM;
-			goto free;
+			goto free_expr;
 		}
 
 		/*
@@ -2707,18 +2708,22 @@
 		expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
 		if (!expr->type) {
 			ret = -ENOMEM;
-			goto free;
+			goto free_expr;
 		}
 
 		expr->name = expr_str(expr, 0);
 	}
 
 	return expr;
-free:
-	destroy_hist_field(operand1, 0);
-	destroy_hist_field(operand2, 0);
-	destroy_hist_field(expr, 0);
 
+free_operands:
+	destroy_hist_field(operand2, 0);
+free_op1:
+	destroy_hist_field(operand1, 0);
+	return ERR_PTR(ret);
+
+free_expr:
+	destroy_hist_field(expr, 0);
 	return ERR_PTR(ret);
 }
 
@@ -3024,8 +3029,10 @@
 		if (val->flags & HIST_FIELD_FL_STRING) {
 			char *str = elt_data->field_var_str[j++];
 			char *val_str = (char *)(uintptr_t)var_val;
+			unsigned int size;
 
-			strscpy(str, val_str, STR_VAR_LEN_MAX);
+			size = min(val->size, STR_VAR_LEN_MAX);
+			strscpy(str, val_str, size);
 			var_val = (u64)(uintptr_t)str;
 		}
 		tracing_map_set_var(elt, var_idx, var_val);
@@ -4912,6 +4919,7 @@
 			if (hist_field->flags & HIST_FIELD_FL_STRING) {
 				unsigned int str_start, var_str_idx, idx;
 				char *str, *val_str;
+				unsigned int size;
 
 				str_start = hist_data->n_field_var_str +
 					hist_data->n_save_var_str;
@@ -4920,7 +4928,9 @@
 
 				str = elt_data->field_var_str[idx];
 				val_str = (char *)(uintptr_t)hist_val;
-				strscpy(str, val_str, STR_VAR_LEN_MAX);
+
+				size = min(hist_field->size, STR_VAR_LEN_MAX);
+				strscpy(str, val_str, size);
 
 				hist_val = (u64)(uintptr_t)str;
 			}
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 3e4a165..7520d43 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -55,7 +55,8 @@
 	struct list_head	list;
 	struct trace_array	*tr;
 };
-struct list_head osnoise_instances;
+
+static struct list_head osnoise_instances;
 
 static bool osnoise_has_registered_instances(void)
 {
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 0a5c0db..f5f0039 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1313,6 +1313,7 @@
 		return 0;
 
 	list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+		tu = container_of(pos, struct trace_uprobe, tp);
 		err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
 		if (err) {
 			uprobe_perf_close(call, event);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9ef7ce1..5c12bde 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -346,8 +346,9 @@
 	int "Warn for stack frames larger than"
 	range 0 8192
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
-	default 1536 if (!64BIT && (PARISC || XTENSA))
-	default 1024 if (!64BIT && !PARISC)
+	default 2048 if PARISC
+	default 1536 if (!64BIT && XTENSA)
+	default 1024 if !64BIT
 	default 2048 if 64BIT
 	help
 	  Tell gcc to warn at build time for stack frames larger than this.
diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
index 6b629ab..a512b99 100644
--- a/lib/decompress_unzstd.c
+++ b/lib/decompress_unzstd.c
@@ -68,11 +68,7 @@
 #ifdef STATIC
 # define UNZSTD_PREBOOT
 # include "xxhash.c"
-# include "zstd/entropy_common.c"
-# include "zstd/fse_decompress.c"
-# include "zstd/huf_decompress.c"
-# include "zstd/zstd_common.c"
-# include "zstd/decompress.c"
+# include "zstd/decompress_sources.h"
 #endif
 
 #include <linux/decompress/mm.h>
@@ -91,11 +87,15 @@
 
 static int INIT handle_zstd_error(size_t ret, void (*error)(char *x))
 {
-	const int err = ZSTD_getErrorCode(ret);
+	const zstd_error_code err = zstd_get_error_code(ret);
 
-	if (!ZSTD_isError(ret))
+	if (!zstd_is_error(ret))
 		return 0;
 
+	/*
+	 * zstd_get_error_name() cannot be used because error takes a char *
+	 * not a const char *
+	 */
 	switch (err) {
 	case ZSTD_error_memory_allocation:
 		error("ZSTD decompressor ran out of memory");
@@ -124,28 +124,28 @@
 				  long out_len, long *in_pos,
 				  void (*error)(char *x))
 {
-	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+	const size_t wksp_size = zstd_dctx_workspace_bound();
 	void *wksp = large_malloc(wksp_size);
-	ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size);
+	zstd_dctx *dctx = zstd_init_dctx(wksp, wksp_size);
 	int err;
 	size_t ret;
 
 	if (dctx == NULL) {
-		error("Out of memory while allocating ZSTD_DCtx");
+		error("Out of memory while allocating zstd_dctx");
 		err = -1;
 		goto out;
 	}
 	/*
 	 * Find out how large the frame actually is, there may be junk at
-	 * the end of the frame that ZSTD_decompressDCtx() can't handle.
+	 * the end of the frame that zstd_decompress_dctx() can't handle.
 	 */
-	ret = ZSTD_findFrameCompressedSize(in_buf, in_len);
+	ret = zstd_find_frame_compressed_size(in_buf, in_len);
 	err = handle_zstd_error(ret, error);
 	if (err)
 		goto out;
 	in_len = (long)ret;
 
-	ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len);
+	ret = zstd_decompress_dctx(dctx, out_buf, out_len, in_buf, in_len);
 	err = handle_zstd_error(ret, error);
 	if (err)
 		goto out;
@@ -167,14 +167,14 @@
 			 long *in_pos,
 			 void (*error)(char *x))
 {
-	ZSTD_inBuffer in;
-	ZSTD_outBuffer out;
-	ZSTD_frameParams params;
+	zstd_in_buffer in;
+	zstd_out_buffer out;
+	zstd_frame_header header;
 	void *in_allocated = NULL;
 	void *out_allocated = NULL;
 	void *wksp = NULL;
 	size_t wksp_size;
-	ZSTD_DStream *dstream;
+	zstd_dstream *dstream;
 	int err;
 	size_t ret;
 
@@ -238,13 +238,13 @@
 	out.size = out_len;
 
 	/*
-	 * We need to know the window size to allocate the ZSTD_DStream.
+	 * We need to know the window size to allocate the zstd_dstream.
 	 * Since we are streaming, we need to allocate a buffer for the sliding
 	 * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX
 	 * (8 MB), so it is important to use the actual value so as not to
 	 * waste memory when it is smaller.
 	 */
-	ret = ZSTD_getFrameParams(&params, in.src, in.size);
+	ret = zstd_get_frame_header(&header, in.src, in.size);
 	err = handle_zstd_error(ret, error);
 	if (err)
 		goto out;
@@ -253,19 +253,19 @@
 		err = -1;
 		goto out;
 	}
-	if (params.windowSize > ZSTD_WINDOWSIZE_MAX) {
+	if (header.windowSize > ZSTD_WINDOWSIZE_MAX) {
 		error("ZSTD-compressed data has too large a window size");
 		err = -1;
 		goto out;
 	}
 
 	/*
-	 * Allocate the ZSTD_DStream now that we know how much memory is
+	 * Allocate the zstd_dstream now that we know how much memory is
 	 * required.
 	 */
-	wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize);
+	wksp_size = zstd_dstream_workspace_bound(header.windowSize);
 	wksp = large_malloc(wksp_size);
-	dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size);
+	dstream = zstd_init_dstream(header.windowSize, wksp, wksp_size);
 	if (dstream == NULL) {
 		error("Out of memory while allocating ZSTD_DStream");
 		err = -1;
@@ -298,7 +298,7 @@
 			in.size = in_len;
 		}
 		/* Returns zero when the frame is complete. */
-		ret = ZSTD_decompressStream(dstream, &out, &in);
+		ret = zstd_decompress_stream(dstream, &out, &in);
 		err = handle_zstd_error(ret, error);
 		if (err)
 			goto out;
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index f9e8900..199ab20 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -75,6 +75,12 @@
 		touch_softlockup_watchdog();
 	}
 
+	/*
+	 * Force flush any remote buffers that might be stuck in IRQ context
+	 * and therefore could not run their irq_work.
+	 */
+	printk_trigger_flush();
+
 	clear_bit_unlock(0, &backtrace_flag);
 	put_cpu();
 }
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 67ed689..0643573 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -869,6 +869,7 @@
 	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	OPTIMIZER_HIDE_VAR(size);
 	KUNIT_EXPECT_KASAN_FAIL(test,
 		kasan_ptr_result = memchr(ptr, '1', size + 1));
 
@@ -894,6 +895,7 @@
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 	memset(arr, 0, sizeof(arr));
 
+	OPTIMIZER_HIDE_VAR(size);
 	KUNIT_EXPECT_KASAN_FAIL(test,
 		kasan_int_result = memcmp(ptr, arr, size+1));
 	kfree(ptr);
diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
index f5d778e..fc45339 100644
--- a/lib/zstd/Makefile
+++ b/lib/zstd/Makefile
@@ -1,10 +1,44 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
 obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
 obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
 
-ccflags-y += -O3
+zstd_compress-y := \
+		zstd_compress_module.o \
+		common/debug.o \
+		common/entropy_common.o \
+		common/error_private.o \
+		common/fse_decompress.o \
+		common/zstd_common.o \
+		compress/fse_compress.o \
+		compress/hist.o \
+		compress/huf_compress.o \
+		compress/zstd_compress.o \
+		compress/zstd_compress_literals.o \
+		compress/zstd_compress_sequences.o \
+		compress/zstd_compress_superblock.o \
+		compress/zstd_double_fast.o \
+		compress/zstd_fast.o \
+		compress/zstd_lazy.o \
+		compress/zstd_ldm.o \
+		compress/zstd_opt.o \
 
-zstd_compress-y := fse_compress.o huf_compress.o compress.o \
-		   entropy_common.o fse_decompress.o zstd_common.o
-zstd_decompress-y := huf_decompress.o decompress.o \
-		     entropy_common.o fse_decompress.o zstd_common.o
+zstd_decompress-y := \
+		zstd_decompress_module.o \
+		common/debug.o \
+		common/entropy_common.o \
+		common/error_private.o \
+		common/fse_decompress.o \
+		common/zstd_common.o \
+		decompress/huf_decompress.o \
+		decompress/zstd_ddict.o \
+		decompress/zstd_decompress.o \
+		decompress/zstd_decompress_block.o \
diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h
deleted file mode 100644
index 5d6343c..0000000
--- a/lib/zstd/bitstream.h
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * bitstream
- * Part of FSE library
- * header file (to include)
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef BITSTREAM_H_MODULE
-#define BITSTREAM_H_MODULE
-
-/*
-*  This API consists of small unitary functions, which must be inlined for best performance.
-*  Since link-time-optimization is not available for all compilers,
-*  these functions are defined into a .h to be included.
-*/
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include "error_private.h" /* error codes and messages */
-#include "mem.h"	   /* unaligned access routines */
-
-/*=========================================
-*  Target specific
-=========================================*/
-#define STREAM_ACCUMULATOR_MIN_32 25
-#define STREAM_ACCUMULATOR_MIN_64 57
-#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
-
-/*-******************************************
-*  bitStream encoding API (write forward)
-********************************************/
-/* bitStream can mix input from multiple sources.
-*  A critical property of these streams is that they encode and decode in **reverse** direction.
-*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
-*/
-typedef struct {
-	size_t bitContainer;
-	int bitPos;
-	char *startPtr;
-	char *ptr;
-	char *endPtr;
-} BIT_CStream_t;
-
-ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity);
-ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
-ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC);
-ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC);
-
-/* Start with initCStream, providing the size of buffer to write into.
-*  bitStream will never write outside of this buffer.
-*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
-*
-*  bits are first added to a local register.
-*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
-*  Writing data into memory is an explicit operation, performed by the flushBits function.
-*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
-*  After a flushBits, a maximum of 7 bits might still be stored into local register.
-*
-*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
-*
-*  Last operation is to close the bitStream.
-*  The function returns the final size of CStream in bytes.
-*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
-*/
-
-/*-********************************************
-*  bitStream decoding API (read backward)
-**********************************************/
-typedef struct {
-	size_t bitContainer;
-	unsigned bitsConsumed;
-	const char *ptr;
-	const char *start;
-} BIT_DStream_t;
-
-typedef enum {
-	BIT_DStream_unfinished = 0,
-	BIT_DStream_endOfBuffer = 1,
-	BIT_DStream_completed = 2,
-	BIT_DStream_overflow = 3
-} BIT_DStream_status; /* result of BIT_reloadDStream() */
-/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
-
-ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize);
-ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits);
-ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD);
-ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD);
-
-/* Start by invoking BIT_initDStream().
-*  A chunk of the bitStream is then stored into a local register.
-*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-*  You can then retrieve bitFields stored into the local register, **in reverse order**.
-*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
-*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
-*  Otherwise, it can be less than that, so proceed accordingly.
-*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
-*/
-
-/*-****************************************
-*  unsafe API
-******************************************/
-ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
-/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
-
-ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC);
-/* unsafe version; does not check buffer overflow */
-
-ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits);
-/* faster, but works only if nbBits >= 1 */
-
-/*-**************************************************************
-*  Internal functions
-****************************************************************/
-ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); }
-
-/*=====    Local Constants   =====*/
-static const unsigned BIT_mask[] = {0,       1,       3,       7,	0xF,      0x1F,     0x3F,     0x7F,      0xFF,
-				    0x1FF,   0x3FF,   0x7FF,   0xFFF,    0x1FFF,   0x3FFF,   0x7FFF,   0xFFFF,    0x1FFFF,
-				    0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */
-
-/*-**************************************************************
-*  bitStream encoding
-****************************************************************/
-/*! BIT_initCStream() :
- *  `dstCapacity` must be > sizeof(void*)
- *  @return : 0 if success,
-			  otherwise an error code (can be tested using ERR_isError() ) */
-ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity)
-{
-	bitC->bitContainer = 0;
-	bitC->bitPos = 0;
-	bitC->startPtr = (char *)startPtr;
-	bitC->ptr = bitC->startPtr;
-	bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
-	if (dstCapacity <= sizeof(bitC->ptr))
-		return ERROR(dstSize_tooSmall);
-	return 0;
-}
-
-/*! BIT_addBits() :
-	can add up to 26 bits into `bitC`.
-	Does not check for register overflow ! */
-ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
-{
-	bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
-	bitC->bitPos += nbBits;
-}
-
-/*! BIT_addBitsFast() :
- *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
-ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
-{
-	bitC->bitContainer |= value << bitC->bitPos;
-	bitC->bitPos += nbBits;
-}
-
-/*! BIT_flushBitsFast() :
- *  unsafe version; does not check buffer overflow */
-ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC)
-{
-	size_t const nbBytes = bitC->bitPos >> 3;
-	ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
-	bitC->ptr += nbBytes;
-	bitC->bitPos &= 7;
-	bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
-}
-
-/*! BIT_flushBits() :
- *  safe version; check for buffer overflow, and prevents it.
- *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
-ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC)
-{
-	size_t const nbBytes = bitC->bitPos >> 3;
-	ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
-	bitC->ptr += nbBytes;
-	if (bitC->ptr > bitC->endPtr)
-		bitC->ptr = bitC->endPtr;
-	bitC->bitPos &= 7;
-	bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
-}
-
-/*! BIT_closeCStream() :
- *  @return : size of CStream, in bytes,
-			  or 0 if it could not fit into dstBuffer */
-ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC)
-{
-	BIT_addBitsFast(bitC, 1, 1); /* endMark */
-	BIT_flushBits(bitC);
-
-	if (bitC->ptr >= bitC->endPtr)
-		return 0; /* doesn't fit within authorized budget : cancel */
-
-	return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
-}
-
-/*-********************************************************
-* bitStream decoding
-**********************************************************/
-/*! BIT_initDStream() :
-*   Initialize a BIT_DStream_t.
-*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
-*   `srcSize` must be the *exact* size of the bitStream, in bytes.
-*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
-*/
-ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize)
-{
-	if (srcSize < 1) {
-		memset(bitD, 0, sizeof(*bitD));
-		return ERROR(srcSize_wrong);
-	}
-
-	if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
-		bitD->start = (const char *)srcBuffer;
-		bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer);
-		bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
-		{
-			BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
-			bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
-			if (lastByte == 0)
-				return ERROR(GENERIC); /* endMark not present */
-		}
-	} else {
-		bitD->start = (const char *)srcBuffer;
-		bitD->ptr = bitD->start;
-		bitD->bitContainer = *(const BYTE *)(bitD->start);
-		switch (srcSize) {
-		case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16);
-			fallthrough;
-		case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24);
-			fallthrough;
-		case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32);
-			fallthrough;
-		case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24;
-			fallthrough;
-		case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16;
-			fallthrough;
-		case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8;
-			fallthrough;
-		default:;
-		}
-		{
-			BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
-			bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-			if (lastByte == 0)
-				return ERROR(GENERIC); /* endMark not present */
-		}
-		bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8;
-	}
-
-	return srcSize;
-}
-
-ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; }
-
-ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; }
-
-ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; }
-
-/*! BIT_lookBits() :
- *  Provides next n bits from local register.
- *  local register is not modified.
- *  On 32-bits, maxNbBits==24.
- *  On 64-bits, maxNbBits==56.
- *  @return : value extracted
- */
-ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits)
-{
-	U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
-	return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask);
-}
-
-/*! BIT_lookBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
-ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits)
-{
-	U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
-	return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask);
-}
-
-ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; }
-
-/*! BIT_readBits() :
- *  Read (consume) next n bits from local register and update.
- *  Pay attention to not read more than nbBits contained into local register.
- *  @return : extracted value.
- */
-ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits)
-{
-	size_t const value = BIT_lookBits(bitD, nbBits);
-	BIT_skipBits(bitD, nbBits);
-	return value;
-}
-
-/*! BIT_readBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
-ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits)
-{
-	size_t const value = BIT_lookBitsFast(bitD, nbBits);
-	BIT_skipBits(bitD, nbBits);
-	return value;
-}
-
-/*! BIT_reloadDStream() :
-*   Refill `bitD` from buffer previously set in BIT_initDStream() .
-*   This function is safe, it guarantees it will not read beyond src buffer.
-*   @return : status of `BIT_DStream_t` internal register.
-			  if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
-ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD)
-{
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */
-		return BIT_DStream_overflow;
-
-	if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
-		bitD->ptr -= bitD->bitsConsumed >> 3;
-		bitD->bitsConsumed &= 7;
-		bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
-		return BIT_DStream_unfinished;
-	}
-	if (bitD->ptr == bitD->start) {
-		if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8)
-			return BIT_DStream_endOfBuffer;
-		return BIT_DStream_completed;
-	}
-	{
-		U32 nbBytes = bitD->bitsConsumed >> 3;
-		BIT_DStream_status result = BIT_DStream_unfinished;
-		if (bitD->ptr - nbBytes < bitD->start) {
-			nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
-			result = BIT_DStream_endOfBuffer;
-		}
-		bitD->ptr -= nbBytes;
-		bitD->bitsConsumed -= nbBytes * 8;
-		bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
-		return result;
-	}
-}
-
-/*! BIT_endOfDStream() :
-*   @return Tells if DStream has exactly reached its end (all bits consumed).
-*/
-ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream)
-{
-	return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8));
-}
-
-#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
new file mode 100644
index 0000000..28248ab
--- /dev/null
+++ b/lib/zstd/common/bitstream.h
@@ -0,0 +1,437 @@
+/* ******************************************************************
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "mem.h"            /* unaligned access routines */
+#include "compiler.h"       /* UNLIKELY() */
+#include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
+#include "error_private.h"  /* error codes and messages */
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+
+#define STREAM_ACCUMULATOR_MIN_32  25
+#define STREAM_ACCUMULATOR_MIN_64  57
+#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
+typedef struct {
+    size_t bitContainer;
+    unsigned bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct {
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+    const char* limitPtr;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (U32 val)
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   else   /* Software version */
+        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+                                                 11, 14, 16, 18, 22, 25,  3, 30,
+                                                  8, 12, 20, 28, 15, 17, 24,  7,
+                                                 19, 27, 23,  6, 26,  5,  4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = {
+    0,          1,         3,         7,         0xF,       0x1F,
+    0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
+    0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
+    0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
+    0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+    0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(size_t)
+ *  @return : 0 if success,
+ *            otherwise an error code (can be tested using ERR_isError()) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! BIT_addBits() :
+ *  can add up to 31 bits into `bitC`.
+ *  Note : does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                            size_t value, unsigned nbBits)
+{
+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    assert(nbBits < BIT_MASK_SIZE);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_,
+ *  meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                size_t value, unsigned nbBits)
+{
+    assert((value>>nbBits) == 0);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+ *            or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+*  bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+ *  Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    bitD->start = (const char*)srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                ZSTD_FALLTHROUGH;
+
+        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                ZSTD_FALLTHROUGH;
+
+        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                ZSTD_FALLTHROUGH;
+
+        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                ZSTD_FALLTHROUGH;
+
+        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                ZSTD_FALLTHROUGH;
+
+        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                ZSTD_FALLTHROUGH;
+
+        default: break;
+        }
+        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
+        }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+{
+    return bitContainer >> start;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+{
+    U32 const regMask = sizeof(bitContainer)*8 - 1;
+    /* if start > regMask, bitstream is corrupted, and result is undefined */
+    assert(nbBits < BIT_MASK_SIZE);
+    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+    assert(nbBits < BIT_MASK_SIZE);
+    return bitContainer & BIT_mask[nbBits];
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ * @return : value extracted */
+MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
+{
+    /* arbitrate between double-shift and shift+mask */
+#if 1
+    /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+     * bitstream is likely corrupted, and result is undefined */
+    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+    /* this code path is slower on my os-x laptop */
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+ *  unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
+}
+
+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value. */
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+ *  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStreamFast() :
+ *  Similar to BIT_reloadDStream(), but with two differences:
+ *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ *     point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+        return BIT_DStream_overflow;
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
+/*! BIT_reloadDStream() :
+ *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+ *  This function is safe, it guarantees it will not read beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->limitPtr) {
+        return BIT_reloadDStreamFast(bitD);
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    /* start < ptr < limitPtr */
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
new file mode 100644
index 0000000..f5a9c70
--- /dev/null
+++ b/lib/zstd/common/compiler.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+/* force inlining */
+
+#if !defined(ZSTD_NO_INLINE)
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
+#else
+#  define INLINE_KEYWORD
+#endif
+
+#define FORCE_INLINE_ATTR __attribute__((always_inline))
+
+#else
+
+#define INLINE_KEYWORD
+#define FORCE_INLINE_ATTR
+
+#endif
+
+/*
+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+  This explictly marks such functions as __cdecl so that the code will still compile
+  if a CC other than __cdecl has been made the default.
+*/
+#define WIN_CDECL
+
+/*
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+/*
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+#  define HINT_INLINE static INLINE_KEYWORD
+#else
+#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#define UNUSED_ATTR __attribute__((unused))
+
+/* force no inlining */
+#define FORCE_NOINLINE static __attribute__((__noinline__))
+
+
+/* target attribute */
+#ifndef __has_attribute
+  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
+#endif
+#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+  #if ((defined(__clang__) && __has_attribute(__target__)) \
+      || (defined(__GNUC__) \
+          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X86)) \
+      && !defined(__BMI2__)
+  #  define DYNAMIC_BMI2 1
+  #else
+  #  define DYNAMIC_BMI2 0
+  #endif
+#endif
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#  define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#elif defined(__aarch64__)
+#  define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#  define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#else
+#  define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
+#  define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#endif  /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s)  {            \
+    const char* const _ptr = (const char*)(p);  \
+    size_t const _size = (size_t)(s);     \
+    size_t _pos;                          \
+    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+        PREFETCH_L2(_ptr + _pos);         \
+    }                                     \
+}
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  else
+#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+#  endif
+#else
+#  define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+
+/* disable warnings */
+
+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+#  define __has_feature(x) 0
+#endif
+
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+# define ZSTD_HAS_C_ATTRIBUTE(x) 0
+#endif
+
+/* Only use C++ attributes in C++. Some compilers report support for C++
+ * attributes when compiling with C.
+ */
+#define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
+
+/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
+ * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
+ * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * - Else: __attribute__((__fallthrough__))
+ */
+#define ZSTD_FALLTHROUGH fallthrough
+
+/* detects whether we are being compiled under msan */
+
+
+/* detects whether we are being compiled under asan */
+
+
+#endif /* ZSTD_COMPILER_H */
diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h
new file mode 100644
index 0000000..0db7b42
--- /dev/null
+++ b/lib/zstd/common/cpu.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/*
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include "mem.h"
+
+
+typedef struct {
+    U32 f1c;
+    U32 f1d;
+    U32 f7b;
+    U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+    U32 f1c = 0;
+    U32 f1d = 0;
+    U32 f7b = 0;
+    U32 f7c = 0;
+#if defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+    /* The following block like the normal cpuid branch below, but gcc
+     * reserves ebx for use of its pic register so we must specially
+     * handle the save and restore to avoid clobbering the register
+     */
+    U32 n;
+    __asm__(
+        "pushl %%ebx\n\t"
+        "cpuid\n\t"
+        "popl %%ebx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "popl %%ebx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1));
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "movl %%ebx, %%eax\n\t"
+          "popl %%ebx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "edx");
+    }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+    U32 n;
+    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+    }
+    if (n >= 7) {
+      U32 f7a;
+      __asm__("cpuid"
+              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+              : "a"(7), "c"(0)
+              : "edx");
+    }
+#endif
+    {
+        ZSTD_cpuid_t cpuid;
+        cpuid.f1c = f1c;
+        cpuid.f1d = f1d;
+        cpuid.f7b = f7b;
+        cpuid.f7c = f7c;
+        return cpuid;
+    }
+}
+
+#define X(name, r, bit)                                                        \
+  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
+    return ((cpuid.r) & (1U << bit)) != 0;                                     \
+  }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+  C(sse3, 0)
+  C(pclmuldq, 1)
+  C(dtes64, 2)
+  C(monitor, 3)
+  C(dscpl, 4)
+  C(vmx, 5)
+  C(smx, 6)
+  C(eist, 7)
+  C(tm2, 8)
+  C(ssse3, 9)
+  C(cnxtid, 10)
+  C(fma, 12)
+  C(cx16, 13)
+  C(xtpr, 14)
+  C(pdcm, 15)
+  C(pcid, 17)
+  C(dca, 18)
+  C(sse41, 19)
+  C(sse42, 20)
+  C(x2apic, 21)
+  C(movbe, 22)
+  C(popcnt, 23)
+  C(tscdeadline, 24)
+  C(aes, 25)
+  C(xsave, 26)
+  C(osxsave, 27)
+  C(avx, 28)
+  C(f16c, 29)
+  C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+  D(fpu, 0)
+  D(vme, 1)
+  D(de, 2)
+  D(pse, 3)
+  D(tsc, 4)
+  D(msr, 5)
+  D(pae, 6)
+  D(mce, 7)
+  D(cx8, 8)
+  D(apic, 9)
+  D(sep, 11)
+  D(mtrr, 12)
+  D(pge, 13)
+  D(mca, 14)
+  D(cmov, 15)
+  D(pat, 16)
+  D(pse36, 17)
+  D(psn, 18)
+  D(clfsh, 19)
+  D(ds, 21)
+  D(acpi, 22)
+  D(mmx, 23)
+  D(fxsr, 24)
+  D(sse, 25)
+  D(sse2, 26)
+  D(ss, 27)
+  D(htt, 28)
+  D(tm, 29)
+  D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+  B(bmi1, 3)
+  B(hle, 4)
+  B(avx2, 5)
+  B(smep, 7)
+  B(bmi2, 8)
+  B(erms, 9)
+  B(invpcid, 10)
+  B(rtm, 11)
+  B(mpx, 14)
+  B(avx512f, 16)
+  B(avx512dq, 17)
+  B(rdseed, 18)
+  B(adx, 19)
+  B(smap, 20)
+  B(avx512ifma, 21)
+  B(pcommit, 22)
+  B(clflushopt, 23)
+  B(clwb, 24)
+  B(avx512pf, 26)
+  B(avx512er, 27)
+  B(avx512cd, 28)
+  B(sha, 29)
+  B(avx512bw, 30)
+  B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+  C(prefetchwt1, 0)
+  C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c
new file mode 100644
index 0000000..bb863c9
--- /dev/null
+++ b/lib/zstd/common/debug.c
@@ -0,0 +1,24 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+#include "debug.h"
+
+int g_debuglevel = DEBUGLEVEL;
diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h
new file mode 100644
index 0000000..6dd88d1
--- /dev/null
+++ b/lib/zstd/common/debug.h
@@ -0,0 +1,101 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact.
+ * static assert only works with compile-time constants.
+ * Also, this variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : release mode, no debug, all run-time checks disabled
+ * 1 : enables assert() only, no display
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively trigger high verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL>=1)
+#  define ZSTD_DEPS_NEED_ASSERT
+#  include "zstd_deps.h"
+#else
+#  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
+#    define assert(condition) ((void)0)   /* disable assert (default) */
+#  endif
+#endif
+
+#if (DEBUGLEVEL>=2)
+#  define ZSTD_DEPS_NEED_IO
+#  include "zstd_deps.h"
+extern int g_debuglevel; /* the variable is only declared,
+                            it actually lives in debug.c,
+                            and is shared by the whole process.
+                            It's not thread-safe.
+                            It's useful when enabling very verbose levels
+                            on selective conditions (such as position in src) */
+
+#  define RAWLOG(l, ...) {                                       \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
+            }   }
+#  define DEBUGLOG(l, ...) {                                     \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+                    ZSTD_DEBUG_PRINT(" \n");                     \
+            }   }
+#else
+#  define RAWLOG(l, ...)      {}    /* disabled */
+#  define DEBUGLOG(l, ...)    {}    /* disabled */
+#endif
+
+
+
+#endif /* DEBUG_H_12987983217 */
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
new file mode 100644
index 0000000..53b47a2
--- /dev/null
+++ b/lib/zstd/common/entropy_common.c
@@ -0,0 +1,357 @@
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include "mem.h"
+#include "error_private.h"       /* ERR_*, ERROR */
+#define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
+#include "huf.h"
+
+
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static U32 FSE_ctz(U32 val)
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_ctz(val);
+#   else   /* Software version */
+        U32 count = 0;
+        while ((val & 1) == 0) {
+            val >>= 1;
+            ++count;
+        }
+        return count;
+#   endif
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                           const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    unsigned const maxSV1 = *maxSVPtr + 1;
+    int previous0 = 0;
+
+    if (hbSize < 8) {
+        /* This function only works when hbSize >= 8 */
+        char buffer[8] = {0};
+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
+        {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+                                                    buffer, sizeof(buffer));
+            if (FSE_isError(countSize)) return countSize;
+            if (countSize > hbSize) return ERROR(corruption_detected);
+            return countSize;
+    }   }
+    assert(hbSize >= 8);
+
+    /* init */
+    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    for (;;) {
+        if (previous0) {
+            /* Count the number of repeats. Each time the
+             * 2-bit repeat code is 0b11 there is another
+             * repeat.
+             * Avoid UB by setting the high bit to 1.
+             */
+            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            while (repeats >= 12) {
+                charnum += 3 * 12;
+                if (LIKELY(ip <= iend-7)) {
+                    ip += 3;
+                } else {
+                    bitCount -= (int)(8 * (iend - 7 - ip));
+                    bitCount &= 31;
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> bitCount;
+                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            }
+            charnum += 3 * repeats;
+            bitStream >>= 2 * repeats;
+            bitCount += 2 * repeats;
+
+            /* Add the final repeat which isn't 0b11. */
+            assert((bitStream & 3) < 3);
+            charnum += bitStream & 3;
+            bitCount += 2;
+
+            /* This is an error, but break and return an error
+             * at the end, because returning out of a loop makes
+             * it harder for the compiler to optimize.
+             */
+            if (charnum >= maxSV1) break;
+
+            /* We don't need to set the normalized count to 0
+             * because we already memset the whole buffer to 0.
+             */
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                assert((bitCount >> 3) <= 3); /* For first condition to work */
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+        {
+            int const max = (2*threshold-1) - remaining;
+            int count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = bitStream & (threshold-1);
+                bitCount += nbBits-1;
+            } else {
+                count = bitStream & (2*threshold-1);
+                if (count >= threshold) count -= max;
+                bitCount += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            /* When it matters (small blocks), this is a
+             * predictable branch, because we don't use -1.
+             */
+            if (count >= 0) {
+                remaining -= count;
+            } else {
+                assert(count == -1);
+                remaining += count;
+            }
+            normalizedCounter[charnum++] = (short)count;
+            previous0 = !count;
+
+            assert(threshold > 1);
+            if (remaining < threshold) {
+                /* This branch can be folded into the
+                 * threshold update condition because we
+                 * know that threshold > 1.
+                 */
+                if (remaining <= 1) break;
+                nbBits = BIT_highbit32(remaining) + 1;
+                threshold = 1 << (nbBits - 1);
+            }
+            if (charnum >= maxSV1) break;
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+    }   }
+    if (remaining != 1) return ERROR(corruption_detected);
+    /* Only possible when there are too many zeros. */
+    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
+    if (bitCount > 32) return ERROR(corruption_detected);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    return ip-istart;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+size_t FSE_readNCount(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
+}
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                   U32* nbSymbolsPtr, U32* tableLogPtr,
+                   const void* src, size_t srcSize,
+                   void* workSpace, size_t wkspSize,
+                   int bmi2)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128) {  /* special header */
+        oSize = iSize - 127;
+        iSize = ((oSize+1)/2);
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        if (oSize >= hwSize) return ERROR(corruption_detected);
+        ip += 1;
+        {   U32 n;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BIT_highbit32(rest);
+            U32 const lastWeight = BIT_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize,
+                     int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+}
diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c
new file mode 100644
index 0000000..6d1135f
--- /dev/null
+++ b/lib/zstd/common/error_private.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+#ifdef ZSTD_STRIP_ERROR_STRINGS
+    (void)code;
+    return "Error strings stripped";
+#else
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(parameter_unsupported): return "Unsupported parameter";
+    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size is incorrect";
+    case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
+        /* following error codes are not stable and may be removed or changed in a future version */
+    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+#endif
+}
diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h
new file mode 100644
index 0000000..d14e686
--- /dev/null
+++ b/lib/zstd/common/error_private.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include "zstd_deps.h"    /* size_t */
+#include <linux/zstd_errors.h>  /* enum list */
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#define ERR_STATIC static __attribute__((unused))
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#undef ERROR   /* already defined on Visual Studio */
+#define ERROR(name) ZSTD_ERROR(name)
+#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+
+#endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h
new file mode 100644
index 0000000..0bb174c
--- /dev/null
+++ b/lib/zstd/common/fse.h
@@ -0,0 +1,710 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef FSE_H
+#define FSE_H
+
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+#include "zstd_deps.h"    /* size_t, ptrdiff_t */
+
+
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define FSE_PUBLIC_API
+#endif
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR    0
+#define FSE_VERSION_MINOR    9
+#define FSE_VERSION_RELEASE  0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /*< library version number; to be used when checking dll version */
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+/*! FSE_compress() :
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
+
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    @return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[] (see hist.h)
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    useLowProbCount is a boolean parameter which trades off compressed size for
+    faster header decoding. When it is set to 1, the compressed data will be slightly
+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+    is a good default, since header deserialization makes a big speed difference.
+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                                 const short* normalizedCounter,
+                                 unsigned maxSymbolValue, unsigned tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize);
+
+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize, int bmi2);
+
+/*! Constructor and Destructor of FSE_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+#endif  /* FSE_H */
+
+#if !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+#define FSE_H_FSE_STATIC_LINKING_ONLY
+
+/* *** Dependency *** */
+#include "bitstream.h"
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
+
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
+
+/* *****************************************
+ *  FSE advanced API
+ ***************************************** */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/*< same as FSE_optimalTableLog(), which used `minus==2` */
+
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/*< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
+ */
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
+
+typedef enum {
+   FSE_repeat_none,  /*< Cannot use the previous table */
+   FSE_repeat_check, /*< Can use the previous table but it must be checked */
+   FSE_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
+ } FSE_repeat;
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   Hence their body are included in next section.
+*/
+typedef struct {
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/*<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct {
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+    statePtr->stateLog = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+    FSE_initCState(statePtr, ct);
+    {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*)(statePtr->stateTable);
+        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
+{
+    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+    U32 const threshold = (minNbBits+1) << 16;
+    assert(tableLog < 16);
+    assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
+    {   U32 const tableSize = 1 << tableLog;
+        U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+        U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
+        U32 const bitMultiplier = 1 << accuracyLog;
+        assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+        assert(normalizedDeltaFromThreshold <= bitMultiplier);
+        return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
+    }
+}
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+#endif
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
new file mode 100644
index 0000000..2c8bbe3
--- /dev/null
+++ b/lib/zstd/common/fse_decompress.c
@@ -0,0 +1,390 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "debug.h"      /* assert */
+#include "bitstream.h"
+#include "compiler.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#include "error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+    ZSTD_free(dt);
+}
+
+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    U16* symbolNext = (U16*)workSpace;
+    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSE_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].symbol = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+
+    /* Init */
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state1);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state2);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+        void* dst, size_t dstCapacity,
+        const void* cSrc, size_t cSrcSize,
+        unsigned maxLog, void* workSpace, size_t wkspSize,
+        int bmi2)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
+
+    /* normal FSE decoding mode */
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+
+    {
+        const void* ptr = wksp->dtable;
+        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+        const U32 fastMode = DTableH->fastMode;
+
+        /* select fast mode (static) */
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h
new file mode 100644
index 0000000..88c5586
--- /dev/null
+++ b/lib/zstd/common/huf.h
@@ -0,0 +1,356 @@
+/* ******************************************************************
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+/* *** Dependencies *** */
+#include "zstd_deps.h"    /* size_t */
+
+
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ *        HUF symbols remain "private" (internal symbols for library only).
+ *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+#  define HUF_PUBLIC_API
+#endif
+
+
+/* ========================== */
+/* ***  simple functions  *** */
+/* ========================== */
+
+/* HUF_compress() :
+ *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/* HUF_decompress() :
+ *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ *  into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ *  Note : in contrast with FSE, HUF_decompress can regenerate
+ *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ *         because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ *           or an error code, which can be tested using HUF_isError()
+ */
+HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /*< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /*< maximum compressed size (worst case) */
+
+/* Error Management */
+HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /*< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /*< provides error code string (useful for debugging) */
+
+
+/* ***   Advanced function   *** */
+
+/* HUF_compress2() :
+ *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               unsigned maxSymbolValue, unsigned tableLog);
+
+/* HUF_compress4X_wksp() :
+ *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     unsigned maxSymbolValue, unsigned tableLog,
+                                     void* workSpace, size_t wkspSize);
+
+#endif   /* HUF_H_298734234 */
+
+/* ******************************************************************
+ *  WARNING !!
+ *  The following section contains advanced and experimental definitions
+ *  which shall never be used in the context of a dynamic library,
+ *  because they are not guaranteed to remain stable in the future.
+ *  Only consider them in association with static linking.
+ * *****************************************************************/
+#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+#define HUF_H_HUF_STATIC_LINKING_ONLY
+
+/* *** Dependencies *** */
+#include "mem.h"   /* U32 */
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+
+
+/* *** Constants *** */
+#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
+#define HUF_SYMBOLVALUE_MAX  255
+
+#define HUF_TABLELOG_ABSOLUTEMAX  15  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+#  error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt */
+typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< decodes RLE and uncompressed */
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+#endif
+
+
+/* ****************************************
+ *  HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ *  2. (optional) refine tableLog using HUF_optimalTableLog()
+ *  3. build Huffman table from count using HUF_buildCTable()
+ *  4. save Huffman table to memory buffer using HUF_writeCTable()
+ *  5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ *  The following API allows targeting specific sub-functions for advanced tasks.
+ *  For example, it's possible to compress several blocks using the same 'CTable',
+ *  or to save and regenerate 'CTable' using external methods.
+ */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+
+typedef enum {
+   HUF_repeat_none,  /*< Cannot use the previous table */
+   HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+   HUF_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
+ } HUF_repeat;
+/* HUF_compress4X_repeat() :
+ *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,    /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+/* HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+ */
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+                       const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+                             void* workSpace, size_t wkspSize);
+
+/*! HUF_readStats() :
+ *  Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+/*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                          const void* src, size_t srcSize,
+                          void* workspace, size_t wkspSize,
+                          int bmi2);
+
+/* HUF_readCTable() :
+ *  Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
+
+/* HUF_getNbBits() :
+ *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ *  Note 1 : is not inlined, as HUF_CElt definition is private
+ *  Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
+
+/*
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
+
+/* HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+/*
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+
+/* ====================== */
+/* single stream variants */
+/* ====================== */
+
+size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+/* HUF_compress1X_repeat() :
+ *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,   /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+#endif
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /*< automatic selection of sing or double symbol decoder, based on DTable */
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+
+#endif /* HUF_STATIC_LINKING_ONLY */
+
diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
new file mode 100644
index 0000000..dcdd586
--- /dev/null
+++ b/lib/zstd/common/mem.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <asm/unaligned.h>  /* get_unaligned, put_unaligned* */
+#include <linux/compiler.h>  /* inline */
+#include <linux/swab.h>  /* swab32, swab64 */
+#include <linux/types.h>  /* size_t, ptrdiff_t */
+#include "debug.h"  /* DEBUG_STATIC_ASSERT */
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#define MEM_STATIC static inline
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+typedef uint8_t  BYTE;
+typedef uint16_t U16;
+typedef int16_t  S16;
+typedef uint32_t U32;
+typedef int32_t  S32;
+typedef uint64_t U64;
+typedef int64_t  S64;
+
+/*-**************************************************************
+*  Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16 MEM_read16(const void* memPtr);
+MEM_STATIC U32 MEM_read32(const void* memPtr);
+MEM_STATIC U64 MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32 MEM_swap32(U32 in);
+MEM_STATIC U64 MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+/*-**************************************************************
+*  Memory I/O Implementation
+*****************************************************************/
+MEM_STATIC unsigned MEM_32bits(void)
+{
+    return sizeof(size_t) == 4;
+}
+
+MEM_STATIC unsigned MEM_64bits(void)
+{
+    return sizeof(size_t) == 8;
+}
+
+#if defined(__LITTLE_ENDIAN)
+#define MEM_LITTLE_ENDIAN 1
+#else
+#define MEM_LITTLE_ENDIAN 0
+#endif
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    return MEM_LITTLE_ENDIAN;
+}
+
+MEM_STATIC U16 MEM_read16(const void *memPtr)
+{
+    return get_unaligned((const U16 *)memPtr);
+}
+
+MEM_STATIC U32 MEM_read32(const void *memPtr)
+{
+    return get_unaligned((const U32 *)memPtr);
+}
+
+MEM_STATIC U64 MEM_read64(const void *memPtr)
+{
+    return get_unaligned((const U64 *)memPtr);
+}
+
+MEM_STATIC size_t MEM_readST(const void *memPtr)
+{
+    return get_unaligned((const size_t *)memPtr);
+}
+
+MEM_STATIC void MEM_write16(void *memPtr, U16 value)
+{
+    put_unaligned(value, (U16 *)memPtr);
+}
+
+MEM_STATIC void MEM_write32(void *memPtr, U32 value)
+{
+    put_unaligned(value, (U32 *)memPtr);
+}
+
+MEM_STATIC void MEM_write64(void *memPtr, U64 value)
+{
+    put_unaligned(value, (U64 *)memPtr);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void *memPtr)
+{
+    return get_unaligned_le16(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
+{
+    put_unaligned_le16(val, memPtr);
+}
+
+MEM_STATIC U32 MEM_readLE24(const void *memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
+{
+	MEM_writeLE16(memPtr, (U16)val);
+	((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
+}
+
+MEM_STATIC U32 MEM_readLE32(const void *memPtr)
+{
+    return get_unaligned_le32(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
+{
+    put_unaligned_le32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readLE64(const void *memPtr)
+{
+    return get_unaligned_le64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64)
+{
+    put_unaligned_le64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readLEST(const void *memPtr)
+{
+	if (MEM_32bits())
+		return (size_t)MEM_readLE32(memPtr);
+	else
+		return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val)
+{
+	if (MEM_32bits())
+		MEM_writeLE32(memPtr, (U32)val);
+	else
+		MEM_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void *memPtr)
+{
+    return get_unaligned_be32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32)
+{
+    put_unaligned_be32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void *memPtr)
+{
+    return get_unaligned_be64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64)
+{
+    put_unaligned_be64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void *memPtr)
+{
+	if (MEM_32bits())
+		return (size_t)MEM_readBE32(memPtr);
+	else
+		return (size_t)MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val)
+{
+	if (MEM_32bits())
+		MEM_writeBE32(memPtr, (U32)val);
+	else
+		MEM_writeBE64(memPtr, (U64)val);
+}
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+    return swab32(in);
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+    return swab64(in);
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_swap32((U32)in);
+    else
+        return (size_t)MEM_swap64((U64)in);
+}
+
+#endif /* MEM_H_MODULE */
diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
new file mode 100644
index 0000000..3d7e35b
--- /dev/null
+++ b/lib/zstd/common/zstd_common.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+#include "error_private.h"
+#include "zstd_internal.h"
+
+
+/*-****************************************
+*  Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+#undef ZSTD_isError   /* defined within zstd_internal.h */
+/*! ZSTD_isError() :
+ *  tells if a return value is an error code
+ *  symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName() :
+ *  provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+/*! ZSTD_getError() :
+ *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+
+/*! ZSTD_getErrorString() :
+ *  provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
+
+
+
+/*=**************************************************************
+*  Custom allocator
+****************************************************************/
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return ZSTD_malloc(size);
+}
+
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        ZSTD_memset(ptr, 0, size);
+        return ptr;
+    }
+    return ZSTD_calloc(1, size);
+}
+
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            ZSTD_free(ptr);
+    }
+}
diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
new file mode 100644
index 0000000..7a5bf44
--- /dev/null
+++ b/lib/zstd/common/zstd_deps.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+#include <linux/limits.h>
+#include <linux/stddef.h>
+
+#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n))
+#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n))
+#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n))
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/*
+ * Define malloc as always failing. That means the user must
+ * either use ZSTD_customMem or statically allocate memory.
+ * Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+#define ZSTD_DEPS_MALLOC
+
+#define ZSTD_malloc(s) ({ (void)(s); NULL; })
+#define ZSTD_free(p) ((void)(p))
+#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; })
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+#define ZSTD_DEPS_MATH64
+
+#include <linux/math64.h>
+
+static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
+  return div_u64(dividend, divisor);
+}
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 1, meaning
+ * it is disabled in production.
+ * Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+#define ZSTD_DEPS_ASSERT
+
+#include <linux/kernel.h>
+
+#define assert(x) WARN_ON((x))
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 2, meaning
+ * it is disabled in production.
+ * Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+#define ZSTD_DEPS_IO
+
+#include <linux/printk.h>
+
+#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/*
+ * Only requested when MSAN is enabled.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+#define ZSTD_DEPS_STDINT
+
+/*
+ * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
+ * is an unsigned long.
+ */
+typedef long intptr_t;
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h
new file mode 100644
index 0000000..fc6f3a9
--- /dev/null
+++ b/lib/zstd/common/zstd_internal.h
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "compiler.h"
+#include "mem.h"
+#include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
+#include "error_private.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include <linux/zstd.h>
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
+#include <linux/xxhash.h>                /* XXH_reset, update, digest */
+#define ZSTD_TRACE 0
+
+
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define ZSTD_isError ERR_isError   /* for inlining */
+#define FSE_isError  ERR_isError
+#define HUF_isError  ERR_isError
+
+
+/*-*************************************
+*  shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
+
+/*
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
+
+/*
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
+
+/*
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM    (1<<12)
+
+#define ZSTD_REP_NUM      3                 /* number of repcodes */
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
+
+#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML   52
+#define MaxLL   35
+#define DefaultMaxOff 28
+#define MaxOff  31
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
+
+static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 6, 7, 8, 9,10,11,12,
+    13,14,15,16
+};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
+     4, 3, 2, 2, 2, 2, 2, 2,
+     2, 2, 2, 2, 2, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     2, 3, 2, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1
+};
+#define LL_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 4, 5, 7, 8, 9,10,11,
+    12,13,14,15,16
+};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
+     1, 4, 3, 2, 2, 2, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1,-1,-1,
+    -1,-1,-1,-1,-1
+};
+#define ML_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
+     1, 1, 1, 1, 1, 1, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1,-1
+};
+#define OF_DEFAULTNORMLOG 5  /* for static allocation */
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+    ZSTD_memcpy(dst, src, 8);
+}
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) {
+    ZSTD_memcpy(dst, src, 16);
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
+
+/*! ZSTD_wildcopy() :
+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first COPY16() call because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
+         */
+#ifdef __aarch64__
+        do {
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#else
+        ZSTD_copy16(op, ip);
+        if (16 >= length) return;
+        op += 16;
+        ip += 16;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#endif
+    }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0) {
+        ZSTD_memcpy(dst, src, length);
+    }
+    return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+
+
+/*-*******************************************
+*  Private declarations
+*********************************************/
+typedef struct seqDef_s {
+    U32 offset;         /* Offset code of the sequence */
+    U16 litLength;
+    U16 matchLength;
+} seqDef;
+
+typedef struct {
+    seqDef* sequencesStart;
+    seqDef* sequences;      /* ptr to end of sequences */
+    BYTE* litStart;
+    BYTE* lit;              /* ptr to end of literals */
+    BYTE* llCode;
+    BYTE* mlCode;
+    BYTE* ofCode;
+    size_t maxNbSeq;
+    size_t maxNbLit;
+
+    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
+    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
+} seqStore_t;
+
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/*
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthID == 1) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthID == 2) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
+
+/*
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo;   /* decompress & legacy */
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+/* custom memory allocation functions */
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
+
+
+MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   else   /* Software version */
+        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
+
+
+typedef struct {
+    blockType_e blockType;
+    U32 lastBlock;
+    U32 origSize;
+} blockProperties_t;   /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr);
+
+/*! ZSTD_decodeSeqHeaders() :
+ *  decode sequence header from src */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                       const void* src, size_t srcSize);
+
+
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
deleted file mode 100644
index b080264..0000000
--- a/lib/zstd/compress.c
+++ /dev/null
@@ -1,3485 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "fse.h"
-#include "huf.h"
-#include "mem.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h> /* memset */
-
-/*-*************************************
-*  Constants
-***************************************/
-static const U32 g_searchStrength = 8; /* control skip over incompressible data */
-#define HASH_READ_SIZE 8
-typedef enum { ZSTDcs_created = 0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
-
-/*-*************************************
-*  Helper functions
-***************************************/
-size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
-
-/*-*************************************
-*  Sequence storage
-***************************************/
-static void ZSTD_resetSeqStore(seqStore_t *ssPtr)
-{
-	ssPtr->lit = ssPtr->litStart;
-	ssPtr->sequences = ssPtr->sequencesStart;
-	ssPtr->longLengthID = 0;
-}
-
-/*-*************************************
-*  Context memory management
-***************************************/
-struct ZSTD_CCtx_s {
-	const BYTE *nextSrc;  /* next block here to continue on curr prefix */
-	const BYTE *base;     /* All regular indexes relative to this position */
-	const BYTE *dictBase; /* extDict indexes relative to this position */
-	U32 dictLimit;	/* below that point, need extDict */
-	U32 lowLimit;	 /* below that point, no more data */
-	U32 nextToUpdate;     /* index from which to continue dictionary update */
-	U32 nextToUpdate3;    /* index from which to continue dictionary update */
-	U32 hashLog3;	 /* dispatch table : larger == faster, more memory */
-	U32 loadedDictEnd;    /* index of end of dictionary */
-	U32 forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
-	U32 forceRawDict;     /* Force loading dictionary in "content-only" mode (no header analysis) */
-	ZSTD_compressionStage_e stage;
-	U32 rep[ZSTD_REP_NUM];
-	U32 repToConfirm[ZSTD_REP_NUM];
-	U32 dictID;
-	ZSTD_parameters params;
-	void *workSpace;
-	size_t workSpaceSize;
-	size_t blockSize;
-	U64 frameContentSize;
-	struct xxh64_state xxhState;
-	ZSTD_customMem customMem;
-
-	seqStore_t seqStore; /* sequences storage ptrs */
-	U32 *hashTable;
-	U32 *hashTable3;
-	U32 *chainTable;
-	HUF_CElt *hufTable;
-	U32 flagStaticTables;
-	HUF_repeat flagStaticHufTable;
-	FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
-	FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
-	FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
-	unsigned tmpCounters[HUF_COMPRESS_WORKSPACE_SIZE_U32];
-};
-
-size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams)
-{
-	size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
-	U32 const divider = (cParams.searchLength == 3) ? 3 : 4;
-	size_t const maxNbSeq = blockSize / divider;
-	size_t const tokenSpace = blockSize + 11 * maxNbSeq;
-	size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
-	size_t const hSize = ((size_t)1) << cParams.hashLog;
-	U32 const hashLog3 = (cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
-	size_t const h3Size = ((size_t)1) << hashLog3;
-	size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-	size_t const optSpace =
-	    ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-	size_t const workspaceSize = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
-				     (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
-
-	return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_CCtx)) + ZSTD_ALIGN(workspaceSize);
-}
-
-static ZSTD_CCtx *ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_CCtx *cctx;
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-	cctx = (ZSTD_CCtx *)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
-	if (!cctx)
-		return NULL;
-	memset(cctx, 0, sizeof(ZSTD_CCtx));
-	cctx->customMem = customMem;
-	return cctx;
-}
-
-ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	ZSTD_CCtx *cctx = ZSTD_createCCtx_advanced(stackMem);
-	if (cctx) {
-		cctx->workSpace = ZSTD_stackAllocAll(cctx->customMem.opaque, &cctx->workSpaceSize);
-	}
-	return cctx;
-}
-
-size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx)
-{
-	if (cctx == NULL)
-		return 0; /* support free on NULL */
-	ZSTD_free(cctx->workSpace, cctx->customMem);
-	ZSTD_free(cctx, cctx->customMem);
-	return 0; /* reserved as a potential error code in the future */
-}
-
-const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx) /* hidden interface */ { return &(ctx->seqStore); }
-
-static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx *cctx) { return cctx->params; }
-
-/** ZSTD_checkParams() :
-	ensure param values remain within authorized range.
-	@return : 0, or an error code if one value is beyond authorized range */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
-{
-#define CLAMPCHECK(val, min, max)                                       \
-	{                                                               \
-		if ((val < min) | (val > max))                          \
-			return ERROR(compressionParameter_unsupported); \
-	}
-	CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
-	CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
-	CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
-	CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
-	CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
-	CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-	if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2)
-		return ERROR(compressionParameter_unsupported);
-	return 0;
-}
-
-/** ZSTD_cycleLog() :
- *  condition for correct operation : hashLog > 1 */
-static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
-{
-	U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
-	return hashLog - btScale;
-}
-
-/** ZSTD_adjustCParams() :
-	optimize `cPar` for a given input (`srcSize` and `dictSize`).
-	mostly downsizing to reduce memory consumption and initialization.
-	Both `srcSize` and `dictSize` are optional (use 0 if unknown),
-	but if both are 0, no optimization can be done.
-	Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
-ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
-{
-	if (srcSize + dictSize == 0)
-		return cPar; /* no size information available : no adjustment */
-
-	/* resize params, to use less memory when necessary */
-	{
-		U32 const minSrcSize = (srcSize == 0) ? 500 : 0;
-		U64 const rSize = srcSize + dictSize + minSrcSize;
-		if (rSize < ((U64)1 << ZSTD_WINDOWLOG_MAX)) {
-			U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
-			if (cPar.windowLog > srcLog)
-				cPar.windowLog = srcLog;
-		}
-	}
-	if (cPar.hashLog > cPar.windowLog)
-		cPar.hashLog = cPar.windowLog;
-	{
-		U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
-		if (cycleLog > cPar.windowLog)
-			cPar.chainLog -= (cycleLog - cPar.windowLog);
-	}
-
-	if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
-		cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
-
-	return cPar;
-}
-
-static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
-{
-	return (param1.cParams.hashLog == param2.cParams.hashLog) & (param1.cParams.chainLog == param2.cParams.chainLog) &
-	       (param1.cParams.strategy == param2.cParams.strategy) & ((param1.cParams.searchLength == 3) == (param2.cParams.searchLength == 3));
-}
-
-/*! ZSTD_continueCCtx() :
-	reuse CCtx without reset (note : requires no dictionary) */
-static size_t ZSTD_continueCCtx(ZSTD_CCtx *cctx, ZSTD_parameters params, U64 frameContentSize)
-{
-	U32 const end = (U32)(cctx->nextSrc - cctx->base);
-	cctx->params = params;
-	cctx->frameContentSize = frameContentSize;
-	cctx->lowLimit = end;
-	cctx->dictLimit = end;
-	cctx->nextToUpdate = end + 1;
-	cctx->stage = ZSTDcs_init;
-	cctx->dictID = 0;
-	cctx->loadedDictEnd = 0;
-	{
-		int i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			cctx->rep[i] = repStartValue[i];
-	}
-	cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
-	xxh64_reset(&cctx->xxhState, 0);
-	return 0;
-}
-
-typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
-
-/*! ZSTD_resetCCtx_advanced() :
-	note : `params` must be validated */
-static size_t ZSTD_resetCCtx_advanced(ZSTD_CCtx *zc, ZSTD_parameters params, U64 frameContentSize, ZSTD_compResetPolicy_e const crp)
-{
-	if (crp == ZSTDcrp_continue)
-		if (ZSTD_equivalentParams(params, zc->params)) {
-			zc->flagStaticTables = 0;
-			zc->flagStaticHufTable = HUF_repeat_none;
-			return ZSTD_continueCCtx(zc, params, frameContentSize);
-		}
-
-	{
-		size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
-		U32 const divider = (params.cParams.searchLength == 3) ? 3 : 4;
-		size_t const maxNbSeq = blockSize / divider;
-		size_t const tokenSpace = blockSize + 11 * maxNbSeq;
-		size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
-		size_t const hSize = ((size_t)1) << params.cParams.hashLog;
-		U32 const hashLog3 = (params.cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
-		size_t const h3Size = ((size_t)1) << hashLog3;
-		size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-		void *ptr;
-
-		/* Check if workSpace is large enough, alloc a new one if needed */
-		{
-			size_t const optSpace = ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) +
-						(ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-			size_t const neededSpace = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
-						   (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
-			if (zc->workSpaceSize < neededSpace) {
-				ZSTD_free(zc->workSpace, zc->customMem);
-				zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
-				if (zc->workSpace == NULL)
-					return ERROR(memory_allocation);
-				zc->workSpaceSize = neededSpace;
-			}
-		}
-
-		if (crp != ZSTDcrp_noMemset)
-			memset(zc->workSpace, 0, tableSpace); /* reset tables only */
-		xxh64_reset(&zc->xxhState, 0);
-		zc->hashLog3 = hashLog3;
-		zc->hashTable = (U32 *)(zc->workSpace);
-		zc->chainTable = zc->hashTable + hSize;
-		zc->hashTable3 = zc->chainTable + chainSize;
-		ptr = zc->hashTable3 + h3Size;
-		zc->hufTable = (HUF_CElt *)ptr;
-		zc->flagStaticTables = 0;
-		zc->flagStaticHufTable = HUF_repeat_none;
-		ptr = ((U32 *)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
-
-		zc->nextToUpdate = 1;
-		zc->nextSrc = NULL;
-		zc->base = NULL;
-		zc->dictBase = NULL;
-		zc->dictLimit = 0;
-		zc->lowLimit = 0;
-		zc->params = params;
-		zc->blockSize = blockSize;
-		zc->frameContentSize = frameContentSize;
-		{
-			int i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				zc->rep[i] = repStartValue[i];
-		}
-
-		if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
-			zc->seqStore.litFreq = (U32 *)ptr;
-			zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1 << Litbits);
-			zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL + 1);
-			zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML + 1);
-			ptr = zc->seqStore.offCodeFreq + (MaxOff + 1);
-			zc->seqStore.matchTable = (ZSTD_match_t *)ptr;
-			ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM + 1;
-			zc->seqStore.priceTable = (ZSTD_optimal_t *)ptr;
-			ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM + 1;
-			zc->seqStore.litLengthSum = 0;
-		}
-		zc->seqStore.sequencesStart = (seqDef *)ptr;
-		ptr = zc->seqStore.sequencesStart + maxNbSeq;
-		zc->seqStore.llCode = (BYTE *)ptr;
-		zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
-		zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
-		zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
-
-		zc->stage = ZSTDcs_init;
-		zc->dictID = 0;
-		zc->loadedDictEnd = 0;
-
-		return 0;
-	}
-}
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx)
-{
-	int i;
-	for (i = 0; i < ZSTD_REP_NUM; i++)
-		cctx->rep[i] = 0;
-}
-
-/*! ZSTD_copyCCtx() :
-*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
-*   Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
-*   @return : 0, or an error code */
-size_t ZSTD_copyCCtx(ZSTD_CCtx *dstCCtx, const ZSTD_CCtx *srcCCtx, unsigned long long pledgedSrcSize)
-{
-	if (srcCCtx->stage != ZSTDcs_init)
-		return ERROR(stage_wrong);
-
-	memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
-	{
-		ZSTD_parameters params = srcCCtx->params;
-		params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-		ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
-	}
-
-	/* copy tables */
-	{
-		size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
-		size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
-		size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
-		size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-		memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
-	}
-
-	/* copy dictionary offsets */
-	dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
-	dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3;
-	dstCCtx->nextSrc = srcCCtx->nextSrc;
-	dstCCtx->base = srcCCtx->base;
-	dstCCtx->dictBase = srcCCtx->dictBase;
-	dstCCtx->dictLimit = srcCCtx->dictLimit;
-	dstCCtx->lowLimit = srcCCtx->lowLimit;
-	dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd;
-	dstCCtx->dictID = srcCCtx->dictID;
-
-	/* copy entropy tables */
-	dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
-	dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
-	if (srcCCtx->flagStaticTables) {
-		memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
-		memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
-		memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
-	}
-	if (srcCCtx->flagStaticHufTable) {
-		memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256 * 4);
-	}
-
-	return 0;
-}
-
-/*! ZSTD_reduceTable() :
-*   reduce table indexes by `reducerValue` */
-static void ZSTD_reduceTable(U32 *const table, U32 const size, U32 const reducerValue)
-{
-	U32 u;
-	for (u = 0; u < size; u++) {
-		if (table[u] < reducerValue)
-			table[u] = 0;
-		else
-			table[u] -= reducerValue;
-	}
-}
-
-/*! ZSTD_reduceIndex() :
-*   rescale all indexes to avoid future overflow (indexes are U32) */
-static void ZSTD_reduceIndex(ZSTD_CCtx *zc, const U32 reducerValue)
-{
-	{
-		U32 const hSize = 1 << zc->params.cParams.hashLog;
-		ZSTD_reduceTable(zc->hashTable, hSize, reducerValue);
-	}
-
-	{
-		U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
-		ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
-	}
-
-	{
-		U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
-		ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue);
-	}
-}
-
-/*-*******************************************************
-*  Block entropic compression
-*********************************************************/
-
-/* See doc/zstd_compression_format.md for detailed format description */
-
-size_t ZSTD_noCompressBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	if (srcSize + ZSTD_blockHeaderSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memcpy((BYTE *)dst + ZSTD_blockHeaderSize, src, srcSize);
-	ZSTD_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
-	return ZSTD_blockHeaderSize + srcSize;
-}
-
-static size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	BYTE *const ostart = (BYTE * const)dst;
-	U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
-
-	if (srcSize + flSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-
-	switch (flSize) {
-	case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_basic + (srcSize << 3)); break;
-	case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_basic + (1 << 2) + (srcSize << 4))); break;
-	default: /*note : should not be necessary : flSize is within {1,2,3} */
-	case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_basic + (3 << 2) + (srcSize << 4))); break;
-	}
-
-	memcpy(ostart + flSize, src, srcSize);
-	return srcSize + flSize;
-}
-
-static size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	BYTE *const ostart = (BYTE * const)dst;
-	U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
-
-	(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
-
-	switch (flSize) {
-	case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_rle + (srcSize << 3)); break;
-	case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_rle + (1 << 2) + (srcSize << 4))); break;
-	default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
-	case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_rle + (3 << 2) + (srcSize << 4))); break;
-	}
-
-	ostart[flSize] = *(const BYTE *)src;
-	return flSize + 1;
-}
-
-static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
-
-static size_t ZSTD_compressLiterals(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const minGain = ZSTD_minGain(srcSize);
-	size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
-	BYTE *const ostart = (BYTE *)dst;
-	U32 singleStream = srcSize < 256;
-	symbolEncodingType_e hType = set_compressed;
-	size_t cLitSize;
-
-/* small ? don't even attempt compression (speed opt) */
-#define LITERAL_NOENTROPY 63
-	{
-		size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
-		if (srcSize <= minLitSize)
-			return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-	}
-
-	if (dstCapacity < lhSize + 1)
-		return ERROR(dstSize_tooSmall); /* not enough space for compression */
-	{
-		HUF_repeat repeat = zc->flagStaticHufTable;
-		int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
-		if (repeat == HUF_repeat_valid && lhSize == 3)
-			singleStream = 1;
-		cLitSize = singleStream ? HUF_compress1X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
-								sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
-					: HUF_compress4X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
-								sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
-		if (repeat != HUF_repeat_none) {
-			hType = set_repeat;
-		} /* reused the existing table */
-		else {
-			zc->flagStaticHufTable = HUF_repeat_check;
-		} /* now have a table to reuse */
-	}
-
-	if ((cLitSize == 0) | (cLitSize >= srcSize - minGain)) {
-		zc->flagStaticHufTable = HUF_repeat_none;
-		return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-	}
-	if (cLitSize == 1) {
-		zc->flagStaticHufTable = HUF_repeat_none;
-		return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
-	}
-
-	/* Build header */
-	switch (lhSize) {
-	case 3: /* 2 - 2 - 10 - 10 */
-	{
-		U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 14);
-		ZSTD_writeLE24(ostart, lhc);
-		break;
-	}
-	case 4: /* 2 - 2 - 14 - 14 */
-	{
-		U32 const lhc = hType + (2 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 18);
-		ZSTD_writeLE32(ostart, lhc);
-		break;
-	}
-	default: /* should not be necessary, lhSize is only {3,4,5} */
-	case 5:  /* 2 - 2 - 18 - 18 */
-	{
-		U32 const lhc = hType + (3 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 22);
-		ZSTD_writeLE32(ostart, lhc);
-		ostart[4] = (BYTE)(cLitSize >> 10);
-		break;
-	}
-	}
-	return lhSize + cLitSize;
-}
-
-static const BYTE LL_Code[64] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18,
-				 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
-				 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24};
-
-static const BYTE ML_Code[128] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
-				  26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38,
-				  38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-				  40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42,
-				  42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42};
-
-void ZSTD_seqToCodes(const seqStore_t *seqStorePtr)
-{
-	BYTE const LL_deltaCode = 19;
-	BYTE const ML_deltaCode = 36;
-	const seqDef *const sequences = seqStorePtr->sequencesStart;
-	BYTE *const llCodeTable = seqStorePtr->llCode;
-	BYTE *const ofCodeTable = seqStorePtr->ofCode;
-	BYTE *const mlCodeTable = seqStorePtr->mlCode;
-	U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-	U32 u;
-	for (u = 0; u < nbSeq; u++) {
-		U32 const llv = sequences[u].litLength;
-		U32 const mlv = sequences[u].matchLength;
-		llCodeTable[u] = (llv > 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
-		ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
-		mlCodeTable[u] = (mlv > 127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
-	}
-	if (seqStorePtr->longLengthID == 1)
-		llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-	if (seqStorePtr->longLengthID == 2)
-		mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
-}
-
-ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity)
-{
-	const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-	const seqStore_t *seqStorePtr = &(zc->seqStore);
-	FSE_CTable *CTable_LitLength = zc->litlengthCTable;
-	FSE_CTable *CTable_OffsetBits = zc->offcodeCTable;
-	FSE_CTable *CTable_MatchLength = zc->matchlengthCTable;
-	U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
-	const seqDef *const sequences = seqStorePtr->sequencesStart;
-	const BYTE *const ofCodeTable = seqStorePtr->ofCode;
-	const BYTE *const llCodeTable = seqStorePtr->llCode;
-	const BYTE *const mlCodeTable = seqStorePtr->mlCode;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstCapacity;
-	BYTE *op = ostart;
-	size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-	BYTE *seqHead;
-
-	U32 *count;
-	S16 *norm;
-	U32 *workspace;
-	size_t workspaceSize = sizeof(zc->tmpCounters);
-	{
-		size_t spaceUsed32 = 0;
-		count = (U32 *)zc->tmpCounters + spaceUsed32;
-		spaceUsed32 += MaxSeq + 1;
-		norm = (S16 *)((U32 *)zc->tmpCounters + spaceUsed32);
-		spaceUsed32 += ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
-
-		workspace = (U32 *)zc->tmpCounters + spaceUsed32;
-		workspaceSize -= (spaceUsed32 << 2);
-	}
-
-	/* Compress literals */
-	{
-		const BYTE *const literals = seqStorePtr->litStart;
-		size_t const litSize = seqStorePtr->lit - literals;
-		size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
-		if (ZSTD_isError(cSize))
-			return cSize;
-		op += cSize;
-	}
-
-	/* Sequences Header */
-	if ((oend - op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */)
-		return ERROR(dstSize_tooSmall);
-	if (nbSeq < 0x7F)
-		*op++ = (BYTE)nbSeq;
-	else if (nbSeq < LONGNBSEQ)
-		op[0] = (BYTE)((nbSeq >> 8) + 0x80), op[1] = (BYTE)nbSeq, op += 2;
-	else
-		op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3;
-	if (nbSeq == 0)
-		return op - ostart;
-
-	/* seqHead : flags for FSE encoding type */
-	seqHead = op++;
-
-#define MIN_SEQ_FOR_DYNAMIC_FSE 64
-#define MAX_SEQ_FOR_STATIC_FSE 1000
-
-	/* convert length/distances into codes */
-	ZSTD_seqToCodes(seqStorePtr);
-
-	/* CTable for Literal Lengths */
-	{
-		U32 max = MaxLL;
-		size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
-		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-			*op++ = llCodeTable[0];
-			FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
-			LLtype = set_rle;
-		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-			LLtype = set_repeat;
-		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog - 1)))) {
-			FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, workspace, workspaceSize);
-			LLtype = set_basic;
-		} else {
-			size_t nbSeq_1 = nbSeq;
-			const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
-			if (count[llCodeTable[nbSeq - 1]] > 1) {
-				count[llCodeTable[nbSeq - 1]]--;
-				nbSeq_1--;
-			}
-			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-			{
-				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-				if (FSE_isError(NCountSize))
-					return NCountSize;
-				op += NCountSize;
-			}
-			FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, workspace, workspaceSize);
-			LLtype = set_compressed;
-		}
-	}
-
-	/* CTable for Offsets */
-	{
-		U32 max = MaxOff;
-		size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
-		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-			*op++ = ofCodeTable[0];
-			FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
-			Offtype = set_rle;
-		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-			Offtype = set_repeat;
-		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog - 1)))) {
-			FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, workspace, workspaceSize);
-			Offtype = set_basic;
-		} else {
-			size_t nbSeq_1 = nbSeq;
-			const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
-			if (count[ofCodeTable[nbSeq - 1]] > 1) {
-				count[ofCodeTable[nbSeq - 1]]--;
-				nbSeq_1--;
-			}
-			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-			{
-				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-				if (FSE_isError(NCountSize))
-					return NCountSize;
-				op += NCountSize;
-			}
-			FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, workspace, workspaceSize);
-			Offtype = set_compressed;
-		}
-	}
-
-	/* CTable for MatchLengths */
-	{
-		U32 max = MaxML;
-		size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
-		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-			*op++ = *mlCodeTable;
-			FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
-			MLtype = set_rle;
-		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-			MLtype = set_repeat;
-		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog - 1)))) {
-			FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, workspace, workspaceSize);
-			MLtype = set_basic;
-		} else {
-			size_t nbSeq_1 = nbSeq;
-			const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
-			if (count[mlCodeTable[nbSeq - 1]] > 1) {
-				count[mlCodeTable[nbSeq - 1]]--;
-				nbSeq_1--;
-			}
-			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-			{
-				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-				if (FSE_isError(NCountSize))
-					return NCountSize;
-				op += NCountSize;
-			}
-			FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, workspace, workspaceSize);
-			MLtype = set_compressed;
-		}
-	}
-
-	*seqHead = (BYTE)((LLtype << 6) + (Offtype << 4) + (MLtype << 2));
-	zc->flagStaticTables = 0;
-
-	/* Encoding Sequences */
-	{
-		BIT_CStream_t blockStream;
-		FSE_CState_t stateMatchLength;
-		FSE_CState_t stateOffsetBits;
-		FSE_CState_t stateLitLength;
-
-		CHECK_E(BIT_initCStream(&blockStream, op, oend - op), dstSize_tooSmall); /* not enough space remaining */
-
-		/* first symbols */
-		FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq - 1]);
-		FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq - 1]);
-		FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq - 1]);
-		BIT_addBits(&blockStream, sequences[nbSeq - 1].litLength, LL_bits[llCodeTable[nbSeq - 1]]);
-		if (ZSTD_32bits())
-			BIT_flushBits(&blockStream);
-		BIT_addBits(&blockStream, sequences[nbSeq - 1].matchLength, ML_bits[mlCodeTable[nbSeq - 1]]);
-		if (ZSTD_32bits())
-			BIT_flushBits(&blockStream);
-		if (longOffsets) {
-			U32 const ofBits = ofCodeTable[nbSeq - 1];
-			int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
-			if (extraBits) {
-				BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, extraBits);
-				BIT_flushBits(&blockStream);
-			}
-			BIT_addBits(&blockStream, sequences[nbSeq - 1].offset >> extraBits, ofBits - extraBits);
-		} else {
-			BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, ofCodeTable[nbSeq - 1]);
-		}
-		BIT_flushBits(&blockStream);
-
-		{
-			size_t n;
-			for (n = nbSeq - 2; n < nbSeq; n--) { /* intentional underflow */
-				BYTE const llCode = llCodeTable[n];
-				BYTE const ofCode = ofCodeTable[n];
-				BYTE const mlCode = mlCodeTable[n];
-				U32 const llBits = LL_bits[llCode];
-				U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
-				U32 const mlBits = ML_bits[mlCode];
-				/* (7)*/							    /* (7)*/
-				FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */  /* 15 */
-				FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
-				if (ZSTD_32bits())
-					BIT_flushBits(&blockStream);				  /* (7)*/
-				FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
-				if (ZSTD_32bits() || (ofBits + mlBits + llBits >= 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-					BIT_flushBits(&blockStream); /* (7)*/
-				BIT_addBits(&blockStream, sequences[n].litLength, llBits);
-				if (ZSTD_32bits() && ((llBits + mlBits) > 24))
-					BIT_flushBits(&blockStream);
-				BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
-				if (ZSTD_32bits())
-					BIT_flushBits(&blockStream); /* (7)*/
-				if (longOffsets) {
-					int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
-					if (extraBits) {
-						BIT_addBits(&blockStream, sequences[n].offset, extraBits);
-						BIT_flushBits(&blockStream); /* (7)*/
-					}
-					BIT_addBits(&blockStream, sequences[n].offset >> extraBits, ofBits - extraBits); /* 31 */
-				} else {
-					BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
-				}
-				BIT_flushBits(&blockStream); /* (7)*/
-			}
-		}
-
-		FSE_flushCState(&blockStream, &stateMatchLength);
-		FSE_flushCState(&blockStream, &stateOffsetBits);
-		FSE_flushCState(&blockStream, &stateLitLength);
-
-		{
-			size_t const streamSize = BIT_closeCStream(&blockStream);
-			if (streamSize == 0)
-				return ERROR(dstSize_tooSmall); /* not enough space */
-			op += streamSize;
-		}
-	}
-	return op - ostart;
-}
-
-ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
-{
-	size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity);
-	size_t const minGain = ZSTD_minGain(srcSize);
-	size_t const maxCSize = srcSize - minGain;
-	/* If the srcSize <= dstCapacity, then there is enough space to write a
-	 * raw uncompressed block. Since we ran out of space, the block must not
-	 * be compressible, so fall back to a raw uncompressed block.
-	 */
-	int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
-	int i;
-
-	if (ZSTD_isError(cSize) && !uncompressibleError)
-		return cSize;
-	if (cSize >= maxCSize || uncompressibleError) {
-		zc->flagStaticHufTable = HUF_repeat_none;
-		return 0;
-	}
-	/* confirm repcodes */
-	for (i = 0; i < ZSTD_REP_NUM; i++)
-		zc->rep[i] = zc->repToConfirm[i];
-	return cSize;
-}
-
-/*! ZSTD_storeSeq() :
-	Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
-	`offsetCode` : distance to match, or 0 == repCode.
-	`matchCode` : matchLength - MINMATCH
-*/
-ZSTD_STATIC void ZSTD_storeSeq(seqStore_t *seqStorePtr, size_t litLength, const void *literals, U32 offsetCode, size_t matchCode)
-{
-	/* copy Literals */
-	ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
-	seqStorePtr->lit += litLength;
-
-	/* literal Length */
-	if (litLength > 0xFFFF) {
-		seqStorePtr->longLengthID = 1;
-		seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-	}
-	seqStorePtr->sequences[0].litLength = (U16)litLength;
-
-	/* match offset */
-	seqStorePtr->sequences[0].offset = offsetCode + 1;
-
-	/* match Length */
-	if (matchCode > 0xFFFF) {
-		seqStorePtr->longLengthID = 2;
-		seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-	}
-	seqStorePtr->sequences[0].matchLength = (U16)matchCode;
-
-	seqStorePtr->sequences++;
-}
-
-/*-*************************************
-*  Match length counter
-***************************************/
-static unsigned ZSTD_NbCommonBytes(register size_t val)
-{
-	if (ZSTD_isLittleEndian()) {
-		if (ZSTD_64bits()) {
-			return (__builtin_ctzll((U64)val) >> 3);
-		} else { /* 32 bits */
-			return (__builtin_ctz((U32)val) >> 3);
-		}
-	} else { /* Big Endian CPU */
-		if (ZSTD_64bits()) {
-			return (__builtin_clzll(val) >> 3);
-		} else { /* 32 bits */
-			return (__builtin_clz((U32)val) >> 3);
-		}
-	}
-}
-
-static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *const pInLimit)
-{
-	const BYTE *const pStart = pIn;
-	const BYTE *const pInLoopLimit = pInLimit - (sizeof(size_t) - 1);
-
-	while (pIn < pInLoopLimit) {
-		size_t const diff = ZSTD_readST(pMatch) ^ ZSTD_readST(pIn);
-		if (!diff) {
-			pIn += sizeof(size_t);
-			pMatch += sizeof(size_t);
-			continue;
-		}
-		pIn += ZSTD_NbCommonBytes(diff);
-		return (size_t)(pIn - pStart);
-	}
-	if (ZSTD_64bits())
-		if ((pIn < (pInLimit - 3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) {
-			pIn += 4;
-			pMatch += 4;
-		}
-	if ((pIn < (pInLimit - 1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) {
-		pIn += 2;
-		pMatch += 2;
-	}
-	if ((pIn < pInLimit) && (*pMatch == *pIn))
-		pIn++;
-	return (size_t)(pIn - pStart);
-}
-
-/** ZSTD_count_2segments() :
-*   can count match length with `ip` & `match` in 2 different segments.
-*   convention : on reaching mEnd, match count continue starting from iStart
-*/
-static size_t ZSTD_count_2segments(const BYTE *ip, const BYTE *match, const BYTE *iEnd, const BYTE *mEnd, const BYTE *iStart)
-{
-	const BYTE *const vEnd = MIN(ip + (mEnd - match), iEnd);
-	size_t const matchLength = ZSTD_count(ip, match, vEnd);
-	if (match + matchLength != mEnd)
-		return matchLength;
-	return matchLength + ZSTD_count(ip + matchLength, iStart, iEnd);
-}
-
-/*-*************************************
-*  Hashes
-***************************************/
-static const U32 prime3bytes = 506832829U;
-static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32 - 24)) * prime3bytes) >> (32 - h); }
-ZSTD_STATIC size_t ZSTD_hash3Ptr(const void *ptr, U32 h) { return ZSTD_hash3(ZSTD_readLE32(ptr), h); } /* only in zstd_opt.h */
-
-static const U32 prime4bytes = 2654435761U;
-static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32 - h); }
-static size_t ZSTD_hash4Ptr(const void *ptr, U32 h) { return ZSTD_hash4(ZSTD_read32(ptr), h); }
-
-static const U64 prime5bytes = 889523592379ULL;
-static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64 - 40)) * prime5bytes) >> (64 - h)); }
-static size_t ZSTD_hash5Ptr(const void *p, U32 h) { return ZSTD_hash5(ZSTD_readLE64(p), h); }
-
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64 - 48)) * prime6bytes) >> (64 - h)); }
-static size_t ZSTD_hash6Ptr(const void *p, U32 h) { return ZSTD_hash6(ZSTD_readLE64(p), h); }
-
-static const U64 prime7bytes = 58295818150454627ULL;
-static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64 - 56)) * prime7bytes) >> (64 - h)); }
-static size_t ZSTD_hash7Ptr(const void *p, U32 h) { return ZSTD_hash7(ZSTD_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u)*prime8bytes) >> (64 - h)); }
-static size_t ZSTD_hash8Ptr(const void *p, U32 h) { return ZSTD_hash8(ZSTD_readLE64(p), h); }
-
-static size_t ZSTD_hashPtr(const void *p, U32 hBits, U32 mls)
-{
-	switch (mls) {
-	// case 3: return ZSTD_hash3Ptr(p, hBits);
-	default:
-	case 4: return ZSTD_hash4Ptr(p, hBits);
-	case 5: return ZSTD_hash5Ptr(p, hBits);
-	case 6: return ZSTD_hash6Ptr(p, hBits);
-	case 7: return ZSTD_hash7Ptr(p, hBits);
-	case 8: return ZSTD_hash8Ptr(p, hBits);
-	}
-}
-
-/*-*************************************
-*  Fast Scan
-***************************************/
-static void ZSTD_fillHashTable(ZSTD_CCtx *zc, const void *end, const U32 mls)
-{
-	U32 *const hashTable = zc->hashTable;
-	U32 const hBits = zc->params.cParams.hashLog;
-	const BYTE *const base = zc->base;
-	const BYTE *ip = base + zc->nextToUpdate;
-	const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
-	const size_t fastHashFillStep = 3;
-
-	while (ip <= iend) {
-		hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
-		ip += fastHashFillStep;
-	}
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_fast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *const hashTable = cctx->hashTable;
-	U32 const hBits = cctx->params.cParams.hashLog;
-	seqStore_t *seqStorePtr = &(cctx->seqStore);
-	const BYTE *const base = cctx->base;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = cctx->dictLimit;
-	const BYTE *const lowest = base + lowestIndex;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - HASH_READ_SIZE;
-	U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
-	U32 offsetSaved = 0;
-
-	/* init */
-	ip += (ip == lowest);
-	{
-		U32 const maxRep = (U32)(ip - lowest);
-		if (offset_2 > maxRep)
-			offsetSaved = offset_2, offset_2 = 0;
-		if (offset_1 > maxRep)
-			offsetSaved = offset_1, offset_1 = 0;
-	}
-
-	/* Main Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
-		size_t mLength;
-		size_t const h = ZSTD_hashPtr(ip, hBits, mls);
-		U32 const curr = (U32)(ip - base);
-		U32 const matchIndex = hashTable[h];
-		const BYTE *match = base + matchIndex;
-		hashTable[h] = curr; /* update hash table */
-
-		if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) {
-			mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			U32 offset;
-			if ((matchIndex <= lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-			mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
-			offset = (U32)(ip - match);
-			while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
-				ip--;
-				match--;
-				mLength++;
-			} /* catch up */
-			offset_2 = offset_1;
-			offset_1 = offset;
-
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-		}
-
-		/* match found */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; /* here because curr+2 could be > iend-8 */
-			hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
-			/* check immediate repcode */
-			while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-				/* store sequence */
-				size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
-				{
-					U32 const tmpOff = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOff;
-				} /* swap offset_2 <=> offset_1 */
-				hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
-				ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
-				ip += rLength;
-				anchor = ip;
-				continue; /* faster when present ... (?) */
-			}
-		}
-	}
-
-	/* save reps for next block */
-	cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-	cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_fast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	const U32 mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *hashTable = ctx->hashTable;
-	const U32 hBits = ctx->params.cParams.hashLog;
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const base = ctx->base;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = ctx->lowLimit;
-	const BYTE *const dictStart = dictBase + lowestIndex;
-	const U32 dictLimit = ctx->dictLimit;
-	const BYTE *const lowPrefixPtr = base + dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-	/* Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because (ip+1) */
-		const size_t h = ZSTD_hashPtr(ip, hBits, mls);
-		const U32 matchIndex = hashTable[h];
-		const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
-		const BYTE *match = matchBase + matchIndex;
-		const U32 curr = (U32)(ip - base);
-		const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
-		const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
-		const BYTE *repMatch = repBase + repIndex;
-		size_t mLength;
-		hashTable[h] = curr; /* update hash table */
-
-		if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
-		    (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
-			const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-			mLength = ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			if ((matchIndex < lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-			{
-				const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
-				const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
-				U32 offset;
-				mLength = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
-				while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
-					ip--;
-					match--;
-					mLength++;
-				} /* catch up */
-				offset = curr - matchIndex;
-				offset_2 = offset_1;
-				offset_1 = offset;
-				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-			}
-		}
-
-		/* found a match : store it */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2;
-			hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
-			/* check immediate repcode */
-			while (ip <= ilimit) {
-				U32 const curr2 = (U32)(ip - base);
-				U32 const repIndex2 = curr2 - offset_2;
-				const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
-				if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
-				    && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
-					const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-					size_t repLength2 =
-					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
-					U32 tmpOffset = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
-					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
-					hashTable[ZSTD_hashPtr(ip, hBits, mls)] = curr2;
-					ip += repLength2;
-					anchor = ip;
-					continue;
-				}
-				break;
-			}
-		}
-	}
-
-	/* save reps for next block */
-	ctx->repToConfirm[0] = offset_1;
-	ctx->repToConfirm[1] = offset_2;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	U32 const mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-/*-*************************************
-*  Double Fast
-***************************************/
-static void ZSTD_fillDoubleHashTable(ZSTD_CCtx *cctx, const void *end, const U32 mls)
-{
-	U32 *const hashLarge = cctx->hashTable;
-	U32 const hBitsL = cctx->params.cParams.hashLog;
-	U32 *const hashSmall = cctx->chainTable;
-	U32 const hBitsS = cctx->params.cParams.chainLog;
-	const BYTE *const base = cctx->base;
-	const BYTE *ip = base + cctx->nextToUpdate;
-	const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
-	const size_t fastHashFillStep = 3;
-
-	while (ip <= iend) {
-		hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
-		hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
-		ip += fastHashFillStep;
-	}
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *const hashLong = cctx->hashTable;
-	const U32 hBitsL = cctx->params.cParams.hashLog;
-	U32 *const hashSmall = cctx->chainTable;
-	const U32 hBitsS = cctx->params.cParams.chainLog;
-	seqStore_t *seqStorePtr = &(cctx->seqStore);
-	const BYTE *const base = cctx->base;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = cctx->dictLimit;
-	const BYTE *const lowest = base + lowestIndex;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - HASH_READ_SIZE;
-	U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
-	U32 offsetSaved = 0;
-
-	/* init */
-	ip += (ip == lowest);
-	{
-		U32 const maxRep = (U32)(ip - lowest);
-		if (offset_2 > maxRep)
-			offsetSaved = offset_2, offset_2 = 0;
-		if (offset_1 > maxRep)
-			offsetSaved = offset_1, offset_1 = 0;
-	}
-
-	/* Main Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
-		size_t mLength;
-		size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
-		size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
-		U32 const curr = (U32)(ip - base);
-		U32 const matchIndexL = hashLong[h2];
-		U32 const matchIndexS = hashSmall[h];
-		const BYTE *matchLong = base + matchIndexL;
-		const BYTE *match = base + matchIndexS;
-		hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
-
-		if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { /* note : by construction, offset_1 <= curr */
-			mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			U32 offset;
-			if ((matchIndexL > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
-				mLength = ZSTD_count(ip + 8, matchLong + 8, iend) + 8;
-				offset = (U32)(ip - matchLong);
-				while (((ip > anchor) & (matchLong > lowest)) && (ip[-1] == matchLong[-1])) {
-					ip--;
-					matchLong--;
-					mLength++;
-				} /* catch up */
-			} else if ((matchIndexS > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
-				size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
-				U32 const matchIndex3 = hashLong[h3];
-				const BYTE *match3 = base + matchIndex3;
-				hashLong[h3] = curr + 1;
-				if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
-					mLength = ZSTD_count(ip + 9, match3 + 8, iend) + 8;
-					ip++;
-					offset = (U32)(ip - match3);
-					while (((ip > anchor) & (match3 > lowest)) && (ip[-1] == match3[-1])) {
-						ip--;
-						match3--;
-						mLength++;
-					} /* catch up */
-				} else {
-					mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
-					offset = (U32)(ip - match);
-					while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
-						ip--;
-						match--;
-						mLength++;
-					} /* catch up */
-				}
-			} else {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-
-			offset_2 = offset_1;
-			offset_1 = offset;
-
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-		}
-
-		/* match found */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] =
-			    curr + 2; /* here because curr+2 could be > iend-8 */
-			hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
-
-			/* check immediate repcode */
-			while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-				/* store sequence */
-				size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
-				{
-					U32 const tmpOff = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOff;
-				} /* swap offset_2 <=> offset_1 */
-				hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
-				hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
-				ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
-				ip += rLength;
-				anchor = ip;
-				continue; /* faster when present ... (?) */
-			}
-		}
-	}
-
-	/* save reps for next block */
-	cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-	cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	const U32 mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *const hashLong = ctx->hashTable;
-	U32 const hBitsL = ctx->params.cParams.hashLog;
-	U32 *const hashSmall = ctx->chainTable;
-	U32 const hBitsS = ctx->params.cParams.chainLog;
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const base = ctx->base;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = ctx->lowLimit;
-	const BYTE *const dictStart = dictBase + lowestIndex;
-	const U32 dictLimit = ctx->dictLimit;
-	const BYTE *const lowPrefixPtr = base + dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-	/* Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because (ip+1) */
-		const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
-		const U32 matchIndex = hashSmall[hSmall];
-		const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
-		const BYTE *match = matchBase + matchIndex;
-
-		const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
-		const U32 matchLongIndex = hashLong[hLong];
-		const BYTE *matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
-		const BYTE *matchLong = matchLongBase + matchLongIndex;
-
-		const U32 curr = (U32)(ip - base);
-		const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
-		const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
-		const BYTE *repMatch = repBase + repIndex;
-		size_t mLength;
-		hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
-
-		if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
-		    (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
-			const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-			mLength = ZSTD_count_2segments(ip + 1 + 4, repMatch + 4, iend, repMatchEnd, lowPrefixPtr) + 4;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			if ((matchLongIndex > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
-				const BYTE *matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
-				const BYTE *lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
-				U32 offset;
-				mLength = ZSTD_count_2segments(ip + 8, matchLong + 8, iend, matchEnd, lowPrefixPtr) + 8;
-				offset = curr - matchLongIndex;
-				while (((ip > anchor) & (matchLong > lowMatchPtr)) && (ip[-1] == matchLong[-1])) {
-					ip--;
-					matchLong--;
-					mLength++;
-				} /* catch up */
-				offset_2 = offset_1;
-				offset_1 = offset;
-				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-
-			} else if ((matchIndex > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
-				size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
-				U32 const matchIndex3 = hashLong[h3];
-				const BYTE *const match3Base = matchIndex3 < dictLimit ? dictBase : base;
-				const BYTE *match3 = match3Base + matchIndex3;
-				U32 offset;
-				hashLong[h3] = curr + 1;
-				if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
-					const BYTE *matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
-					const BYTE *lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
-					mLength = ZSTD_count_2segments(ip + 9, match3 + 8, iend, matchEnd, lowPrefixPtr) + 8;
-					ip++;
-					offset = curr + 1 - matchIndex3;
-					while (((ip > anchor) & (match3 > lowMatchPtr)) && (ip[-1] == match3[-1])) {
-						ip--;
-						match3--;
-						mLength++;
-					} /* catch up */
-				} else {
-					const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
-					const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
-					mLength = ZSTD_count_2segments(ip + 4, match + 4, iend, matchEnd, lowPrefixPtr) + 4;
-					offset = curr - matchIndex;
-					while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
-						ip--;
-						match--;
-						mLength++;
-					} /* catch up */
-				}
-				offset_2 = offset_1;
-				offset_1 = offset;
-				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-
-			} else {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-		}
-
-		/* found a match : store it */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = curr + 2;
-			hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = curr + 2;
-			hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
-			hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = (U32)(ip - 2 - base);
-			/* check immediate repcode */
-			while (ip <= ilimit) {
-				U32 const curr2 = (U32)(ip - base);
-				U32 const repIndex2 = curr2 - offset_2;
-				const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
-				if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
-				    && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
-					const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-					size_t const repLength2 =
-					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
-					U32 tmpOffset = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
-					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
-					hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = curr2;
-					hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = curr2;
-					ip += repLength2;
-					anchor = ip;
-					continue;
-				}
-				break;
-			}
-		}
-	}
-
-	/* save reps for next block */
-	ctx->repToConfirm[0] = offset_1;
-	ctx->repToConfirm[1] = offset_2;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	U32 const mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-/** ZSTD_insertBt1() : add one or multiple positions to tree.
-*   ip : assumed <= iend-8 .
-*   @return : nb of positions added */
-static U32 ZSTD_insertBt1(ZSTD_CCtx *zc, const BYTE *const ip, const U32 mls, const BYTE *const iend, U32 nbCompares, U32 extDict)
-{
-	U32 *const hashTable = zc->hashTable;
-	U32 const hashLog = zc->params.cParams.hashLog;
-	size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
-	U32 *const bt = zc->chainTable;
-	U32 const btLog = zc->params.cParams.chainLog - 1;
-	U32 const btMask = (1 << btLog) - 1;
-	U32 matchIndex = hashTable[h];
-	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-	const BYTE *const base = zc->base;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *match;
-	const U32 curr = (U32)(ip - base);
-	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-	U32 *smallerPtr = bt + 2 * (curr & btMask);
-	U32 *largerPtr = smallerPtr + 1;
-	U32 dummy32; /* to be nullified at the end */
-	U32 const windowLow = zc->lowLimit;
-	U32 matchEndIdx = curr + 8;
-	size_t bestLength = 8;
-
-	hashTable[h] = curr; /* Update Hash Table */
-
-	while (nbCompares-- && (matchIndex > windowLow)) {
-		U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
-		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-
-		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-			match = base + matchIndex;
-			if (match[matchLength] == ip[matchLength])
-				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
-		} else {
-			match = dictBase + matchIndex;
-			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
-			if (matchIndex + matchLength >= dictLimit)
-				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-		}
-
-		if (matchLength > bestLength) {
-			bestLength = matchLength;
-			if (matchLength > matchEndIdx - matchIndex)
-				matchEndIdx = matchIndex + (U32)matchLength;
-		}
-
-		if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
-			break;		      /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
-
-		if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
-			/* match is smaller than curr */
-			*smallerPtr = matchIndex;	  /* update smaller idx */
-			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-			if (matchIndex <= btLow) {
-				smallerPtr = &dummy32;
-				break;
-			}			  /* beyond tree size, stop the search */
-			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-		} else {
-			/* match is larger than curr */
-			*largerPtr = matchIndex;
-			commonLengthLarger = matchLength;
-			if (matchIndex <= btLow) {
-				largerPtr = &dummy32;
-				break;
-			} /* beyond tree size, stop the search */
-			largerPtr = nextPtr;
-			matchIndex = nextPtr[0];
-		}
-	}
-
-	*smallerPtr = *largerPtr = 0;
-	if (bestLength > 384)
-		return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
-	if (matchEndIdx > curr + 8)
-		return matchEndIdx - curr - 8;
-	return 1;
-}
-
-static size_t ZSTD_insertBtAndFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, size_t *offsetPtr, U32 nbCompares, const U32 mls,
-					    U32 extDict)
-{
-	U32 *const hashTable = zc->hashTable;
-	U32 const hashLog = zc->params.cParams.hashLog;
-	size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
-	U32 *const bt = zc->chainTable;
-	U32 const btLog = zc->params.cParams.chainLog - 1;
-	U32 const btMask = (1 << btLog) - 1;
-	U32 matchIndex = hashTable[h];
-	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-	const BYTE *const base = zc->base;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const U32 curr = (U32)(ip - base);
-	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-	const U32 windowLow = zc->lowLimit;
-	U32 *smallerPtr = bt + 2 * (curr & btMask);
-	U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
-	U32 matchEndIdx = curr + 8;
-	U32 dummy32; /* to be nullified at the end */
-	size_t bestLength = 0;
-
-	hashTable[h] = curr; /* Update Hash Table */
-
-	while (nbCompares-- && (matchIndex > windowLow)) {
-		U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
-		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-		const BYTE *match;
-
-		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-			match = base + matchIndex;
-			if (match[matchLength] == ip[matchLength])
-				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
-		} else {
-			match = dictBase + matchIndex;
-			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
-			if (matchIndex + matchLength >= dictLimit)
-				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-		}
-
-		if (matchLength > bestLength) {
-			if (matchLength > matchEndIdx - matchIndex)
-				matchEndIdx = matchIndex + (U32)matchLength;
-			if ((4 * (int)(matchLength - bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)offsetPtr[0] + 1)))
-				bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
-			if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
-				break;		      /* drop, to guarantee consistency (miss a little bit of compression) */
-		}
-
-		if (match[matchLength] < ip[matchLength]) {
-			/* match is smaller than curr */
-			*smallerPtr = matchIndex;	  /* update smaller idx */
-			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-			if (matchIndex <= btLow) {
-				smallerPtr = &dummy32;
-				break;
-			}			  /* beyond tree size, stop the search */
-			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-		} else {
-			/* match is larger than curr */
-			*largerPtr = matchIndex;
-			commonLengthLarger = matchLength;
-			if (matchIndex <= btLow) {
-				largerPtr = &dummy32;
-				break;
-			} /* beyond tree size, stop the search */
-			largerPtr = nextPtr;
-			matchIndex = nextPtr[0];
-		}
-	}
-
-	*smallerPtr = *largerPtr = 0;
-
-	zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
-	return bestLength;
-}
-
-static void ZSTD_updateTree(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
-{
-	const BYTE *const base = zc->base;
-	const U32 target = (U32)(ip - base);
-	U32 idx = zc->nextToUpdate;
-
-	while (idx < target)
-		idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 0);
-}
-
-/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-static size_t ZSTD_BtFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
-}
-
-static size_t ZSTD_BtFindBestMatch_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
-					     const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-	case 5: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-	case 7:
-	case 6: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-	}
-}
-
-static void ZSTD_updateTree_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
-{
-	const BYTE *const base = zc->base;
-	const U32 target = (U32)(ip - base);
-	U32 idx = zc->nextToUpdate;
-
-	while (idx < target)
-		idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 1);
-}
-
-/** Tree updater, providing best match */
-static size_t ZSTD_BtFindBestMatch_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-					   const U32 mls)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
-}
-
-static size_t ZSTD_BtFindBestMatch_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
-						     const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-						     const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-	case 5: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-	case 7:
-	case 6: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-	}
-}
-
-/* *********************************
-*  Hash Chain
-***********************************/
-#define NEXT_IN_CHAIN(d, mask) chainTable[(d)&mask]
-
-/* Update chains up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_CCtx *zc, const BYTE *ip, U32 mls)
-{
-	U32 *const hashTable = zc->hashTable;
-	const U32 hashLog = zc->params.cParams.hashLog;
-	U32 *const chainTable = zc->chainTable;
-	const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
-	const BYTE *const base = zc->base;
-	const U32 target = (U32)(ip - base);
-	U32 idx = zc->nextToUpdate;
-
-	while (idx < target) { /* catch up */
-		size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls);
-		NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-		hashTable[h] = idx;
-		idx++;
-	}
-
-	zc->nextToUpdate = target;
-	return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-/* inlining is important to hardwire a hot branch (template emulation) */
-FORCE_INLINE
-size_t ZSTD_HcFindBestMatch_generic(ZSTD_CCtx *zc, /* Index table will be updated */
-				    const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls,
-				    const U32 extDict)
-{
-	U32 *const chainTable = zc->chainTable;
-	const U32 chainSize = (1 << zc->params.cParams.chainLog);
-	const U32 chainMask = chainSize - 1;
-	const BYTE *const base = zc->base;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const U32 lowLimit = zc->lowLimit;
-	const U32 curr = (U32)(ip - base);
-	const U32 minChain = curr > chainSize ? curr - chainSize : 0;
-	int nbAttempts = maxNbAttempts;
-	size_t ml = EQUAL_READ32 - 1;
-
-	/* HC4 match finder */
-	U32 matchIndex = ZSTD_insertAndFindFirstIndex(zc, ip, mls);
-
-	for (; (matchIndex > lowLimit) & (nbAttempts > 0); nbAttempts--) {
-		const BYTE *match;
-		size_t currMl = 0;
-		if ((!extDict) || matchIndex >= dictLimit) {
-			match = base + matchIndex;
-			if (match[ml] == ip[ml]) /* potentially better */
-				currMl = ZSTD_count(ip, match, iLimit);
-		} else {
-			match = dictBase + matchIndex;
-			if (ZSTD_read32(match) == ZSTD_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-				currMl = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
-		}
-
-		/* save best solution */
-		if (currMl > ml) {
-			ml = currMl;
-			*offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
-			if (ip + currMl == iLimit)
-				break; /* best possible, and avoid read overflow*/
-		}
-
-		if (matchIndex <= minChain)
-			break;
-		matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
-	}
-
-	return ml;
-}
-
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-						   const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
-	case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
-	case 7:
-	case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
-	}
-}
-
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-							   const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
-	case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
-	case 7:
-	case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
-	}
-}
-
-/* *******************************
-*  Common parser - lazy strategy
-*********************************/
-FORCE_INLINE
-void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base + ctx->dictLimit;
-
-	U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
-	U32 const mls = ctx->params.cParams.searchLength;
-
-	typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
-	searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset = 0;
-
-	/* init */
-	ip += (ip == base);
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	{
-		U32 const maxRep = (U32)(ip - base);
-		if (offset_2 > maxRep)
-			savedOffset = offset_2, offset_2 = 0;
-		if (offset_1 > maxRep)
-			savedOffset = offset_1, offset_1 = 0;
-	}
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		size_t matchLength = 0;
-		size_t offset = 0;
-		const BYTE *start = ip + 1;
-
-		/* check repCode */
-		if ((offset_1 > 0) & (ZSTD_read32(ip + 1) == ZSTD_read32(ip + 1 - offset_1))) {
-			/* repcode : we take it */
-			matchLength = ZSTD_count(ip + 1 + EQUAL_READ32, ip + 1 + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-			if (depth == 0)
-				goto _storeSequence;
-		}
-
-		/* first search (depth 0) */
-		{
-			size_t offsetFound = 99999999;
-			size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
-			if (ml2 > matchLength)
-				matchLength = ml2, start = ip, offset = offsetFound;
-		}
-
-		if (matchLength < EQUAL_READ32) {
-			ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
-			continue;
-		}
-
-		/* let's try to find a better solution */
-		if (depth >= 1)
-			while (ip < ilimit) {
-				ip++;
-				if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
-					size_t const mlRep = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-					int const gain2 = (int)(mlRep * 3);
-					int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
-					if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
-						matchLength = mlRep, offset = 0, start = ip;
-				}
-				{
-					size_t offset2 = 99999999;
-					size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-					int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-					int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
-					if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-						matchLength = ml2, offset = offset2, start = ip;
-						continue; /* search a better one */
-					}
-				}
-
-				/* let's find an even better one */
-				if ((depth == 2) && (ip < ilimit)) {
-					ip++;
-					if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
-						size_t const ml2 = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-						int const gain2 = (int)(ml2 * 4);
-						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
-						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
-							matchLength = ml2, offset = 0, start = ip;
-					}
-					{
-						size_t offset2 = 99999999;
-						size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-						int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
-						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-							matchLength = ml2, offset = offset2, start = ip;
-							continue;
-						}
-					}
-				}
-				break; /* nothing found : store previous solution */
-			}
-
-		/* NOTE:
-		 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
-		 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
-		 * overflows the pointer, which is undefined behavior.
-		 */
-		/* catch up */
-		if (offset) {
-			while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) &&
-			       (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */
-			{
-				start--;
-				matchLength++;
-			}
-			offset_2 = offset_1;
-			offset_1 = (U32)(offset - ZSTD_REP_MOVE);
-		}
-
-	/* store sequence */
-_storeSequence:
-		{
-			size_t const litLength = start - anchor;
-			ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
-			anchor = ip = start + matchLength;
-		}
-
-		/* check immediate repcode */
-		while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-			/* store sequence */
-			matchLength = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_2, iend) + EQUAL_READ32;
-			offset = offset_2;
-			offset_2 = offset_1;
-			offset_1 = (U32)offset; /* swap repcodes */
-			ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
-			ip += matchLength;
-			anchor = ip;
-			continue; /* faster when present ... (?) */
-		}
-	}
-
-	/* Save reps for next block */
-	ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
-	ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); }
-
-static void ZSTD_compressBlock_lazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); }
-
-static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); }
-
-static void ZSTD_compressBlock_greedy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); }
-
-FORCE_INLINE
-void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base;
-	const U32 dictLimit = ctx->dictLimit;
-	const U32 lowestIndex = ctx->lowLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const dictStart = dictBase + ctx->lowLimit;
-
-	const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
-	const U32 mls = ctx->params.cParams.searchLength;
-
-	typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
-	searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
-
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-	/* init */
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	ip += (ip == prefixStart);
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		size_t matchLength = 0;
-		size_t offset = 0;
-		const BYTE *start = ip + 1;
-		U32 curr = (U32)(ip - base);
-
-		/* check repCode */
-		{
-			const U32 repIndex = (U32)(curr + 1 - offset_1);
-			const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-			const BYTE *const repMatch = repBase + repIndex;
-			if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-				if (ZSTD_read32(ip + 1) == ZSTD_read32(repMatch)) {
-					/* repcode detected we should take it */
-					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-					matchLength =
-					    ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-					if (depth == 0)
-						goto _storeSequence;
-				}
-		}
-
-		/* first search (depth 0) */
-		{
-			size_t offsetFound = 99999999;
-			size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
-			if (ml2 > matchLength)
-				matchLength = ml2, start = ip, offset = offsetFound;
-		}
-
-		if (matchLength < EQUAL_READ32) {
-			ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
-			continue;
-		}
-
-		/* let's try to find a better solution */
-		if (depth >= 1)
-			while (ip < ilimit) {
-				ip++;
-				curr++;
-				/* check repCode */
-				if (offset) {
-					const U32 repIndex = (U32)(curr - offset_1);
-					const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-					const BYTE *const repMatch = repBase + repIndex;
-					if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-						if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-							/* repcode detected */
-							const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-							size_t const repLength =
-							    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) +
-							    EQUAL_READ32;
-							int const gain2 = (int)(repLength * 3);
-							int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
-							if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
-								matchLength = repLength, offset = 0, start = ip;
-						}
-				}
-
-				/* search match, depth 1 */
-				{
-					size_t offset2 = 99999999;
-					size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-					int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-					int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
-					if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-						matchLength = ml2, offset = offset2, start = ip;
-						continue; /* search a better one */
-					}
-				}
-
-				/* let's find an even better one */
-				if ((depth == 2) && (ip < ilimit)) {
-					ip++;
-					curr++;
-					/* check repCode */
-					if (offset) {
-						const U32 repIndex = (U32)(curr - offset_1);
-						const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-						const BYTE *const repMatch = repBase + repIndex;
-						if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-							if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-								/* repcode detected */
-								const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-								size_t repLength = ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend,
-													repEnd, prefixStart) +
-										   EQUAL_READ32;
-								int gain2 = (int)(repLength * 4);
-								int gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
-								if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
-									matchLength = repLength, offset = 0, start = ip;
-							}
-					}
-
-					/* search match, depth 2 */
-					{
-						size_t offset2 = 99999999;
-						size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-						int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
-						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-							matchLength = ml2, offset = offset2, start = ip;
-							continue;
-						}
-					}
-				}
-				break; /* nothing found : store previous solution */
-			}
-
-		/* catch up */
-		if (offset) {
-			U32 const matchIndex = (U32)((start - base) - (offset - ZSTD_REP_MOVE));
-			const BYTE *match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
-			const BYTE *const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
-			while ((start > anchor) && (match > mStart) && (start[-1] == match[-1])) {
-				start--;
-				match--;
-				matchLength++;
-			} /* catch up */
-			offset_2 = offset_1;
-			offset_1 = (U32)(offset - ZSTD_REP_MOVE);
-		}
-
-	/* store sequence */
-	_storeSequence : {
-		size_t const litLength = start - anchor;
-		ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
-		anchor = ip = start + matchLength;
-	}
-
-		/* check immediate repcode */
-		while (ip <= ilimit) {
-			const U32 repIndex = (U32)((ip - base) - offset_2);
-			const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-			const BYTE *const repMatch = repBase + repIndex;
-			if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-				if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-					/* repcode detected we should take it */
-					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-					matchLength =
-					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-					offset = offset_2;
-					offset_2 = offset_1;
-					offset_1 = (U32)offset; /* swap offset history */
-					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
-					ip += matchLength;
-					anchor = ip;
-					continue; /* faster when present ... (?) */
-				}
-			break;
-		}
-	}
-
-	/* Save reps for next block */
-	ctx->repToConfirm[0] = offset_1;
-	ctx->repToConfirm[1] = offset_2;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); }
-
-static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
-}
-
-static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
-}
-
-static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
-}
-
-/* The optimal parser */
-#include "zstd_opt.h"
-
-static void ZSTD_compressBlock_btopt(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt2(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-typedef void (*ZSTD_blockCompressor)(ZSTD_CCtx *ctx, const void *src, size_t srcSize);
-
-static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
-{
-	static const ZSTD_blockCompressor blockCompressor[2][8] = {
-	    {ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2,
-	     ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2},
-	    {ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,
-	     ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict}};
-
-	return blockCompressor[extDict][(U32)strat];
-}
-
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
-	const BYTE *const base = zc->base;
-	const BYTE *const istart = (const BYTE *)src;
-	const U32 curr = (U32)(istart - base);
-	if (srcSize < MIN_CBLOCK_SIZE + ZSTD_blockHeaderSize + 1)
-		return 0; /* don't even attempt compression below a certain srcSize */
-	ZSTD_resetSeqStore(&(zc->seqStore));
-	if (curr > zc->nextToUpdate + 384)
-		zc->nextToUpdate = curr - MIN(192, (U32)(curr - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
-	blockCompressor(zc, src, srcSize);
-	return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
-}
-
-/*! ZSTD_compress_generic() :
-*   Compress a chunk of data into one or multiple blocks.
-*   All blocks will be terminated, all input will be consumed.
-*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
-*   Frame is supposed already started (header already produced)
-*   @return : compressed size, or an error code
-*/
-static size_t ZSTD_compress_generic(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 lastFrameChunk)
-{
-	size_t blockSize = cctx->blockSize;
-	size_t remaining = srcSize;
-	const BYTE *ip = (const BYTE *)src;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	U32 const maxDist = 1 << cctx->params.cParams.windowLog;
-
-	if (cctx->params.fParams.checksumFlag && srcSize)
-		xxh64_update(&cctx->xxhState, src, srcSize);
-
-	while (remaining) {
-		U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
-		size_t cSize;
-
-		if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
-			return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
-		if (remaining < blockSize)
-			blockSize = remaining;
-
-		/* preemptive overflow correction */
-		if (cctx->lowLimit > (3U << 29)) {
-			U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
-			U32 const curr = (U32)(ip - cctx->base);
-			U32 const newCurr = (curr & cycleMask) + (1 << cctx->params.cParams.windowLog);
-			U32 const correction = curr - newCurr;
-			ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
-			ZSTD_reduceIndex(cctx, correction);
-			cctx->base += correction;
-			cctx->dictBase += correction;
-			cctx->lowLimit -= correction;
-			cctx->dictLimit -= correction;
-			if (cctx->nextToUpdate < correction)
-				cctx->nextToUpdate = 0;
-			else
-				cctx->nextToUpdate -= correction;
-		}
-
-		if ((U32)(ip + blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
-			/* enforce maxDist */
-			U32 const newLowLimit = (U32)(ip + blockSize - cctx->base) - maxDist;
-			if (cctx->lowLimit < newLowLimit)
-				cctx->lowLimit = newLowLimit;
-			if (cctx->dictLimit < cctx->lowLimit)
-				cctx->dictLimit = cctx->lowLimit;
-		}
-
-		cSize = ZSTD_compressBlock_internal(cctx, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, ip, blockSize);
-		if (ZSTD_isError(cSize))
-			return cSize;
-
-		if (cSize == 0) { /* block is not compressible */
-			U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw) << 1) + (U32)(blockSize << 3);
-			if (blockSize + ZSTD_blockHeaderSize > dstCapacity)
-				return ERROR(dstSize_tooSmall);
-			ZSTD_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
-			memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
-			cSize = ZSTD_blockHeaderSize + blockSize;
-		} else {
-			U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed) << 1) + (U32)(cSize << 3);
-			ZSTD_writeLE24(op, cBlockHeader24);
-			cSize += ZSTD_blockHeaderSize;
-		}
-
-		remaining -= blockSize;
-		dstCapacity -= cSize;
-		ip += blockSize;
-		op += cSize;
-	}
-
-	if (lastFrameChunk && (op > ostart))
-		cctx->stage = ZSTDcs_ending;
-	return op - ostart;
-}
-
-static size_t ZSTD_writeFrameHeader(void *dst, size_t dstCapacity, ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
-{
-	BYTE *const op = (BYTE *)dst;
-	U32 const dictIDSizeCode = (dictID > 0) + (dictID >= 256) + (dictID >= 65536); /* 0-3 */
-	U32 const checksumFlag = params.fParams.checksumFlag > 0;
-	U32 const windowSize = 1U << params.cParams.windowLog;
-	U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
-	BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
-	U32 const fcsCode =
-	    params.fParams.contentSizeFlag ? (pledgedSrcSize >= 256) + (pledgedSrcSize >= 65536 + 256) + (pledgedSrcSize >= 0xFFFFFFFFU) : 0; /* 0-3 */
-	BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag << 2) + (singleSegment << 5) + (fcsCode << 6));
-	size_t pos;
-
-	if (dstCapacity < ZSTD_frameHeaderSize_max)
-		return ERROR(dstSize_tooSmall);
-
-	ZSTD_writeLE32(dst, ZSTD_MAGICNUMBER);
-	op[4] = frameHeaderDecriptionByte;
-	pos = 5;
-	if (!singleSegment)
-		op[pos++] = windowLogByte;
-	switch (dictIDSizeCode) {
-	default: /* impossible */
-	case 0: break;
-	case 1:
-		op[pos] = (BYTE)(dictID);
-		pos++;
-		break;
-	case 2:
-		ZSTD_writeLE16(op + pos, (U16)dictID);
-		pos += 2;
-		break;
-	case 3:
-		ZSTD_writeLE32(op + pos, dictID);
-		pos += 4;
-		break;
-	}
-	switch (fcsCode) {
-	default: /* impossible */
-	case 0:
-		if (singleSegment)
-			op[pos++] = (BYTE)(pledgedSrcSize);
-		break;
-	case 1:
-		ZSTD_writeLE16(op + pos, (U16)(pledgedSrcSize - 256));
-		pos += 2;
-		break;
-	case 2:
-		ZSTD_writeLE32(op + pos, (U32)(pledgedSrcSize));
-		pos += 4;
-		break;
-	case 3:
-		ZSTD_writeLE64(op + pos, (U64)(pledgedSrcSize));
-		pos += 8;
-		break;
-	}
-	return pos;
-}
-
-static size_t ZSTD_compressContinue_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 frame, U32 lastFrameChunk)
-{
-	const BYTE *const ip = (const BYTE *)src;
-	size_t fhSize = 0;
-
-	if (cctx->stage == ZSTDcs_created)
-		return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
-
-	if (frame && (cctx->stage == ZSTDcs_init)) {
-		fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
-		if (ZSTD_isError(fhSize))
-			return fhSize;
-		dstCapacity -= fhSize;
-		dst = (char *)dst + fhSize;
-		cctx->stage = ZSTDcs_ongoing;
-	}
-
-	/* Check if blocks follow each other */
-	if (src != cctx->nextSrc) {
-		/* not contiguous */
-		ptrdiff_t const delta = cctx->nextSrc - ip;
-		cctx->lowLimit = cctx->dictLimit;
-		cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
-		cctx->dictBase = cctx->base;
-		cctx->base -= delta;
-		cctx->nextToUpdate = cctx->dictLimit;
-		if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE)
-			cctx->lowLimit = cctx->dictLimit; /* too small extDict */
-	}
-
-	/* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
-	if ((ip + srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
-		ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
-		U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
-		cctx->lowLimit = lowLimitMax;
-	}
-
-	cctx->nextSrc = ip + srcSize;
-
-	if (srcSize) {
-		size_t const cSize = frame ? ZSTD_compress_generic(cctx, dst, dstCapacity, src, srcSize, lastFrameChunk)
-					   : ZSTD_compressBlock_internal(cctx, dst, dstCapacity, src, srcSize);
-		if (ZSTD_isError(cSize))
-			return cSize;
-		return cSize + fhSize;
-	} else
-		return fhSize;
-}
-
-size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
-}
-
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx) { return MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); }
-
-size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
-	if (srcSize > blockSizeMax)
-		return ERROR(srcSize_wrong);
-	return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
-}
-
-/*! ZSTD_loadDictionaryContent() :
- *  @return : 0, or an error code
- */
-static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx *zc, const void *src, size_t srcSize)
-{
-	const BYTE *const ip = (const BYTE *)src;
-	const BYTE *const iend = ip + srcSize;
-
-	/* input becomes curr prefix */
-	zc->lowLimit = zc->dictLimit;
-	zc->dictLimit = (U32)(zc->nextSrc - zc->base);
-	zc->dictBase = zc->base;
-	zc->base += ip - zc->nextSrc;
-	zc->nextToUpdate = zc->dictLimit;
-	zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
-
-	zc->nextSrc = iend;
-	if (srcSize <= HASH_READ_SIZE)
-		return 0;
-
-	switch (zc->params.cParams.strategy) {
-	case ZSTD_fast: ZSTD_fillHashTable(zc, iend, zc->params.cParams.searchLength); break;
-
-	case ZSTD_dfast: ZSTD_fillDoubleHashTable(zc, iend, zc->params.cParams.searchLength); break;
-
-	case ZSTD_greedy:
-	case ZSTD_lazy:
-	case ZSTD_lazy2:
-		if (srcSize >= HASH_READ_SIZE)
-			ZSTD_insertAndFindFirstIndex(zc, iend - HASH_READ_SIZE, zc->params.cParams.searchLength);
-		break;
-
-	case ZSTD_btlazy2:
-	case ZSTD_btopt:
-	case ZSTD_btopt2:
-		if (srcSize >= HASH_READ_SIZE)
-			ZSTD_updateTree(zc, iend - HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
-		break;
-
-	default:
-		return ERROR(GENERIC); /* strategy doesn't exist; impossible */
-	}
-
-	zc->nextToUpdate = (U32)(iend - zc->base);
-	return 0;
-}
-
-/* Dictionaries that assign zero probability to symbols that show up causes problems
-   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
-   that we may encounter during compression.
-   NOTE: This behavior is not standard and could be improved in the future. */
-static size_t ZSTD_checkDictNCount(short *normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
-{
-	U32 s;
-	if (dictMaxSymbolValue < maxSymbolValue)
-		return ERROR(dictionary_corrupted);
-	for (s = 0; s <= maxSymbolValue; ++s) {
-		if (normalizedCounter[s] == 0)
-			return ERROR(dictionary_corrupted);
-	}
-	return 0;
-}
-
-/* Dictionary format :
- * See :
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
- */
-/*! ZSTD_loadZstdDictionary() :
- * @return : 0, or an error code
- *  assumptions : magic number supposed already checked
- *                dictSize supposed > 8
- */
-static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
-{
-	const BYTE *dictPtr = (const BYTE *)dict;
-	const BYTE *const dictEnd = dictPtr + dictSize;
-	short offcodeNCount[MaxOff + 1];
-	unsigned offcodeMaxValue = MaxOff;
-
-	dictPtr += 4; /* skip magic number */
-	cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : ZSTD_readLE32(dictPtr);
-	dictPtr += 4;
-
-	{
-		size_t const hufHeaderSize = HUF_readCTable_wksp(cctx->hufTable, 255, dictPtr, dictEnd - dictPtr, cctx->tmpCounters, sizeof(cctx->tmpCounters));
-		if (HUF_isError(hufHeaderSize))
-			return ERROR(dictionary_corrupted);
-		dictPtr += hufHeaderSize;
-	}
-
-	{
-		unsigned offcodeLog;
-		size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(offcodeHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (offcodeLog > OffFSELog)
-			return ERROR(dictionary_corrupted);
-		/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
-		CHECK_E(FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-			dictionary_corrupted);
-		dictPtr += offcodeHeaderSize;
-	}
-
-	{
-		short matchlengthNCount[MaxML + 1];
-		unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-		size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(matchlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (matchlengthLog > MLFSELog)
-			return ERROR(dictionary_corrupted);
-		/* Every match length code must have non-zero probability */
-		CHECK_F(ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
-		CHECK_E(
-		    FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-		    dictionary_corrupted);
-		dictPtr += matchlengthHeaderSize;
-	}
-
-	{
-		short litlengthNCount[MaxLL + 1];
-		unsigned litlengthMaxValue = MaxLL, litlengthLog;
-		size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(litlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (litlengthLog > LLFSELog)
-			return ERROR(dictionary_corrupted);
-		/* Every literal length code must have non-zero probability */
-		CHECK_F(ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
-		CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-			dictionary_corrupted);
-		dictPtr += litlengthHeaderSize;
-	}
-
-	if (dictPtr + 12 > dictEnd)
-		return ERROR(dictionary_corrupted);
-	cctx->rep[0] = ZSTD_readLE32(dictPtr + 0);
-	cctx->rep[1] = ZSTD_readLE32(dictPtr + 4);
-	cctx->rep[2] = ZSTD_readLE32(dictPtr + 8);
-	dictPtr += 12;
-
-	{
-		size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
-		U32 offcodeMax = MaxOff;
-		if (dictContentSize <= ((U32)-1) - 128 KB) {
-			U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
-			offcodeMax = ZSTD_highbit32(maxOffset);		     /* Calculate minimum offset code required to represent maxOffset */
-		}
-		/* All offset values <= dictContentSize + 128 KB must be representable */
-		CHECK_F(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
-		/* All repCodes must be <= dictContentSize and != 0*/
-		{
-			U32 u;
-			for (u = 0; u < 3; u++) {
-				if (cctx->rep[u] == 0)
-					return ERROR(dictionary_corrupted);
-				if (cctx->rep[u] > dictContentSize)
-					return ERROR(dictionary_corrupted);
-			}
-		}
-
-		cctx->flagStaticTables = 1;
-		cctx->flagStaticHufTable = HUF_repeat_valid;
-		return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
-	}
-}
-
-/** ZSTD_compress_insertDictionary() :
-*   @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
-{
-	if ((dict == NULL) || (dictSize <= 8))
-		return 0;
-
-	/* dict as pure content */
-	if ((ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
-		return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
-
-	/* dict as zstd dictionary */
-	return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
-}
-
-/*! ZSTD_compressBegin_internal() :
-*   @return : 0, or an error code */
-static size_t ZSTD_compressBegin_internal(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize)
-{
-	ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
-	CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
-	return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
-}
-
-/*! ZSTD_compressBegin_advanced() :
-*   @return : 0, or an error code */
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-	/* compression parameters verification and optimization */
-	CHECK_F(ZSTD_checkCParams(params.cParams));
-	return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
-}
-
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, int compressionLevel)
-{
-	ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-	return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
-}
-
-size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel) { return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); }
-
-/*! ZSTD_writeEpilogue() :
-*   Ends a frame.
-*   @return : nb of bytes written into dst (or an error code) */
-static size_t ZSTD_writeEpilogue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	size_t fhSize = 0;
-
-	if (cctx->stage == ZSTDcs_created)
-		return ERROR(stage_wrong); /* init missing */
-
-	/* special case : empty frame */
-	if (cctx->stage == ZSTDcs_init) {
-		fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
-		if (ZSTD_isError(fhSize))
-			return fhSize;
-		dstCapacity -= fhSize;
-		op += fhSize;
-		cctx->stage = ZSTDcs_ongoing;
-	}
-
-	if (cctx->stage != ZSTDcs_ending) {
-		/* write one last empty block, make it the "last" block */
-		U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw) << 1) + 0;
-		if (dstCapacity < 4)
-			return ERROR(dstSize_tooSmall);
-		ZSTD_writeLE32(op, cBlockHeader24);
-		op += ZSTD_blockHeaderSize;
-		dstCapacity -= ZSTD_blockHeaderSize;
-	}
-
-	if (cctx->params.fParams.checksumFlag) {
-		U32 const checksum = (U32)xxh64_digest(&cctx->xxhState);
-		if (dstCapacity < 4)
-			return ERROR(dstSize_tooSmall);
-		ZSTD_writeLE32(op, checksum);
-		op += 4;
-	}
-
-	cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
-	return op - ostart;
-}
-
-size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t endResult;
-	size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
-	if (ZSTD_isError(cSize))
-		return cSize;
-	endResult = ZSTD_writeEpilogue(cctx, (char *)dst + cSize, dstCapacity - cSize);
-	if (ZSTD_isError(endResult))
-		return endResult;
-	return cSize + endResult;
-}
-
-static size_t ZSTD_compress_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-				     ZSTD_parameters params)
-{
-	CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
-	return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-			       ZSTD_parameters params)
-{
-	return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
-}
-
-size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, ZSTD_parameters params)
-{
-	return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
-}
-
-/* =====  Dictionary API  ===== */
-
-struct ZSTD_CDict_s {
-	void *dictBuffer;
-	const void *dictContent;
-	size_t dictContentSize;
-	ZSTD_CCtx *refContext;
-}; /* typedef'd tp ZSTD_CDict within "zstd.h" */
-
-size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams) { return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CDict)); }
-
-static ZSTD_CDict *ZSTD_createCDict_advanced(const void *dictBuffer, size_t dictSize, unsigned byReference, ZSTD_parameters params, ZSTD_customMem customMem)
-{
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	{
-		ZSTD_CDict *const cdict = (ZSTD_CDict *)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
-		ZSTD_CCtx *const cctx = ZSTD_createCCtx_advanced(customMem);
-
-		if (!cdict || !cctx) {
-			ZSTD_free(cdict, customMem);
-			ZSTD_freeCCtx(cctx);
-			return NULL;
-		}
-
-		if ((byReference) || (!dictBuffer) || (!dictSize)) {
-			cdict->dictBuffer = NULL;
-			cdict->dictContent = dictBuffer;
-		} else {
-			void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
-			if (!internalBuffer) {
-				ZSTD_free(cctx, customMem);
-				ZSTD_free(cdict, customMem);
-				return NULL;
-			}
-			memcpy(internalBuffer, dictBuffer, dictSize);
-			cdict->dictBuffer = internalBuffer;
-			cdict->dictContent = internalBuffer;
-		}
-
-		{
-			size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
-			if (ZSTD_isError(errorCode)) {
-				ZSTD_free(cdict->dictBuffer, customMem);
-				ZSTD_free(cdict, customMem);
-				ZSTD_freeCCtx(cctx);
-				return NULL;
-			}
-		}
-
-		cdict->refContext = cctx;
-		cdict->dictContentSize = dictSize;
-		return cdict;
-	}
-}
-
-ZSTD_CDict *ZSTD_initCDict(const void *dict, size_t dictSize, ZSTD_parameters params, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	return ZSTD_createCDict_advanced(dict, dictSize, 1, params, stackMem);
-}
-
-size_t ZSTD_freeCDict(ZSTD_CDict *cdict)
-{
-	if (cdict == NULL)
-		return 0; /* support free on NULL */
-	{
-		ZSTD_customMem const cMem = cdict->refContext->customMem;
-		ZSTD_freeCCtx(cdict->refContext);
-		ZSTD_free(cdict->dictBuffer, cMem);
-		ZSTD_free(cdict, cMem);
-		return 0;
-	}
-}
-
-static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict *cdict) { return ZSTD_getParamsFromCCtx(cdict->refContext); }
-
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize)
-{
-	if (cdict->dictContentSize)
-		CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
-	else {
-		ZSTD_parameters params = cdict->refContext->params;
-		params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-		CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
-	}
-	return 0;
-}
-
-/*! ZSTD_compress_usingCDict() :
-*   Compression using a digested Dictionary.
-*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-*   Note that compression level is decided during dictionary creation */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_CDict *cdict)
-{
-	CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
-
-	if (cdict->refContext->params.fParams.contentSizeFlag == 1) {
-		cctx->params.fParams.contentSizeFlag = 1;
-		cctx->frameContentSize = srcSize;
-	} else {
-		cctx->params.fParams.contentSizeFlag = 0;
-	}
-
-	return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-/* ******************************************************************
-*  Streaming
-********************************************************************/
-
-typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
-
-struct ZSTD_CStream_s {
-	ZSTD_CCtx *cctx;
-	ZSTD_CDict *cdictLocal;
-	const ZSTD_CDict *cdict;
-	char *inBuff;
-	size_t inBuffSize;
-	size_t inToCompress;
-	size_t inBuffPos;
-	size_t inBuffTarget;
-	size_t blockSize;
-	char *outBuff;
-	size_t outBuffSize;
-	size_t outBuffContentSize;
-	size_t outBuffFlushedSize;
-	ZSTD_cStreamStage stage;
-	U32 checksum;
-	U32 frameEnded;
-	U64 pledgedSrcSize;
-	U64 inputProcessed;
-	ZSTD_parameters params;
-	ZSTD_customMem customMem;
-}; /* typedef'd to ZSTD_CStream within "zstd.h" */
-
-size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams)
-{
-	size_t const inBuffSize = (size_t)1 << cParams.windowLog;
-	size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, inBuffSize);
-	size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
-
-	return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
-}
-
-ZSTD_CStream *ZSTD_createCStream_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_CStream *zcs;
-
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	zcs = (ZSTD_CStream *)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
-	if (zcs == NULL)
-		return NULL;
-	memset(zcs, 0, sizeof(ZSTD_CStream));
-	memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
-	zcs->cctx = ZSTD_createCCtx_advanced(customMem);
-	if (zcs->cctx == NULL) {
-		ZSTD_freeCStream(zcs);
-		return NULL;
-	}
-	return zcs;
-}
-
-size_t ZSTD_freeCStream(ZSTD_CStream *zcs)
-{
-	if (zcs == NULL)
-		return 0; /* support free on NULL */
-	{
-		ZSTD_customMem const cMem = zcs->customMem;
-		ZSTD_freeCCtx(zcs->cctx);
-		zcs->cctx = NULL;
-		ZSTD_freeCDict(zcs->cdictLocal);
-		zcs->cdictLocal = NULL;
-		ZSTD_free(zcs->inBuff, cMem);
-		zcs->inBuff = NULL;
-		ZSTD_free(zcs->outBuff, cMem);
-		zcs->outBuff = NULL;
-		ZSTD_free(zcs, cMem);
-		return 0;
-	}
-}
-
-/*======   Initialization   ======*/
-
-size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */; }
-
-static size_t ZSTD_resetCStream_internal(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
-{
-	if (zcs->inBuffSize == 0)
-		return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
-
-	if (zcs->cdict)
-		CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
-	else
-		CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
-
-	zcs->inToCompress = 0;
-	zcs->inBuffPos = 0;
-	zcs->inBuffTarget = zcs->blockSize;
-	zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-	zcs->stage = zcss_load;
-	zcs->frameEnded = 0;
-	zcs->pledgedSrcSize = pledgedSrcSize;
-	zcs->inputProcessed = 0;
-	return 0; /* ready to go */
-}
-
-size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
-{
-
-	zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-
-	return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
-}
-
-static size_t ZSTD_initCStream_advanced(ZSTD_CStream *zcs, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-	/* allocate buffers */
-	{
-		size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
-		if (zcs->inBuffSize < neededInBuffSize) {
-			zcs->inBuffSize = neededInBuffSize;
-			ZSTD_free(zcs->inBuff, zcs->customMem);
-			zcs->inBuff = (char *)ZSTD_malloc(neededInBuffSize, zcs->customMem);
-			if (zcs->inBuff == NULL)
-				return ERROR(memory_allocation);
-		}
-		zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
-	}
-	if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize) + 1) {
-		zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize) + 1;
-		ZSTD_free(zcs->outBuff, zcs->customMem);
-		zcs->outBuff = (char *)ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
-		if (zcs->outBuff == NULL)
-			return ERROR(memory_allocation);
-	}
-
-	if (dict && dictSize >= 8) {
-		ZSTD_freeCDict(zcs->cdictLocal);
-		zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
-		if (zcs->cdictLocal == NULL)
-			return ERROR(memory_allocation);
-		zcs->cdict = zcs->cdictLocal;
-	} else
-		zcs->cdict = NULL;
-
-	zcs->checksum = params.fParams.checksumFlag > 0;
-	zcs->params = params;
-
-	return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
-}
-
-ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	ZSTD_CStream *const zcs = ZSTD_createCStream_advanced(stackMem);
-	if (zcs) {
-		size_t const code = ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
-		if (ZSTD_isError(code)) {
-			return NULL;
-		}
-	}
-	return zcs;
-}
-
-ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
-	ZSTD_CStream *const zcs = ZSTD_initCStream(params, pledgedSrcSize, workspace, workspaceSize);
-	if (zcs) {
-		zcs->cdict = cdict;
-		if (ZSTD_isError(ZSTD_resetCStream_internal(zcs, pledgedSrcSize))) {
-			return NULL;
-		}
-	}
-	return zcs;
-}
-
-/*======   Compression   ======*/
-
-typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
-
-ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const length = MIN(dstCapacity, srcSize);
-	memcpy(dst, src, length);
-	return length;
-}
-
-static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *dstCapacityPtr, const void *src, size_t *srcSizePtr, ZSTD_flush_e const flush)
-{
-	U32 someMoreWork = 1;
-	const char *const istart = (const char *)src;
-	const char *const iend = istart + *srcSizePtr;
-	const char *ip = istart;
-	char *const ostart = (char *)dst;
-	char *const oend = ostart + *dstCapacityPtr;
-	char *op = ostart;
-
-	while (someMoreWork) {
-		switch (zcs->stage) {
-		case zcss_init:
-			return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
-
-		case zcss_load:
-			/* complete inBuffer */
-			{
-				size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
-				size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend - ip);
-				zcs->inBuffPos += loaded;
-				ip += loaded;
-				if ((zcs->inBuffPos == zcs->inToCompress) || (!flush && (toLoad != loaded))) {
-					someMoreWork = 0;
-					break; /* not enough input to get a full block : stop there, wait for more */
-				}
-			}
-			/* compress curr block (note : this stage cannot be stopped in the middle) */
-			{
-				void *cDst;
-				size_t cSize;
-				size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
-				size_t oSize = oend - op;
-				if (oSize >= ZSTD_compressBound(iSize))
-					cDst = op; /* compress directly into output buffer (avoid flush stage) */
-				else
-					cDst = zcs->outBuff, oSize = zcs->outBuffSize;
-				cSize = (flush == zsf_end) ? ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize)
-							   : ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
-				if (ZSTD_isError(cSize))
-					return cSize;
-				if (flush == zsf_end)
-					zcs->frameEnded = 1;
-				/* prepare next block */
-				zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
-				if (zcs->inBuffTarget > zcs->inBuffSize)
-					zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
-				zcs->inToCompress = zcs->inBuffPos;
-				if (cDst == op) {
-					op += cSize;
-					break;
-				} /* no need to flush */
-				zcs->outBuffContentSize = cSize;
-				zcs->outBuffFlushedSize = 0;
-				zcs->stage = zcss_flush; /* pass-through to flush stage */
-			}
-			fallthrough;
-
-		case zcss_flush: {
-			size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-			size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-			op += flushed;
-			zcs->outBuffFlushedSize += flushed;
-			if (toFlush != flushed) {
-				someMoreWork = 0;
-				break;
-			} /* dst too small to store flushed data : stop there */
-			zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-			zcs->stage = zcss_load;
-			break;
-		}
-
-		case zcss_final:
-			someMoreWork = 0; /* do nothing */
-			break;
-
-		default:
-			return ERROR(GENERIC); /* impossible */
-		}
-	}
-
-	*srcSizePtr = ip - istart;
-	*dstCapacityPtr = op - ostart;
-	zcs->inputProcessed += *srcSizePtr;
-	if (zcs->frameEnded)
-		return 0;
-	{
-		size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
-		if (hintInSize == 0)
-			hintInSize = zcs->blockSize;
-		return hintInSize;
-	}
-}
-
-size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
-{
-	size_t sizeRead = input->size - input->pos;
-	size_t sizeWritten = output->size - output->pos;
-	size_t const result =
-	    ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, (const char *)(input->src) + input->pos, &sizeRead, zsf_gather);
-	input->pos += sizeRead;
-	output->pos += sizeWritten;
-	return result;
-}
-
-/*======   Finalize   ======*/
-
-/*! ZSTD_flushStream() :
-*   @return : amount of data remaining to flush */
-size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
-{
-	size_t srcSize = 0;
-	size_t sizeWritten = output->size - output->pos;
-	size_t const result = ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, &srcSize,
-							  &srcSize, /* use a valid src address instead of NULL */
-							  zsf_flush);
-	output->pos += sizeWritten;
-	if (ZSTD_isError(result))
-		return result;
-	return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
-}
-
-size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
-{
-	BYTE *const ostart = (BYTE *)(output->dst) + output->pos;
-	BYTE *const oend = (BYTE *)(output->dst) + output->size;
-	BYTE *op = ostart;
-
-	if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
-		return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
-
-	if (zcs->stage != zcss_final) {
-		/* flush whatever remains */
-		size_t srcSize = 0;
-		size_t sizeWritten = output->size - output->pos;
-		size_t const notEnded =
-		    ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
-		size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-		op += sizeWritten;
-		if (remainingToFlush) {
-			output->pos += sizeWritten;
-			return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
-		}
-		/* create epilogue */
-		zcs->stage = zcss_final;
-		zcs->outBuffContentSize = !notEnded ? 0 : ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL,
-									   0); /* write epilogue, including final empty block, into outBuff */
-	}
-
-	/* flush epilogue */
-	{
-		size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-		size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-		op += flushed;
-		zcs->outBuffFlushedSize += flushed;
-		output->pos += op - ostart;
-		if (toFlush == flushed)
-			zcs->stage = zcss_init; /* end reached */
-		return toFlush - flushed;
-	}
-}
-
-/*-=====  Pre-defined compression levels  =====-*/
-
-#define ZSTD_DEFAULT_CLEVEL 1
-#define ZSTD_MAX_CLEVEL 22
-int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
-
-static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL + 1] = {
-    {
-	/* "default" */
-	/* W,  C,  H,  S,  L, TL, strat */
-	{18, 12, 12, 1, 7, 16, ZSTD_fast},    /* level  0 - never used */
-	{19, 13, 14, 1, 7, 16, ZSTD_fast},    /* level  1 */
-	{19, 15, 16, 1, 6, 16, ZSTD_fast},    /* level  2 */
-	{20, 16, 17, 1, 5, 16, ZSTD_dfast},   /* level  3.*/
-	{20, 18, 18, 1, 5, 16, ZSTD_dfast},   /* level  4.*/
-	{20, 15, 18, 3, 5, 16, ZSTD_greedy},  /* level  5 */
-	{21, 16, 19, 2, 5, 16, ZSTD_lazy},    /* level  6 */
-	{21, 17, 20, 3, 5, 16, ZSTD_lazy},    /* level  7 */
-	{21, 18, 20, 3, 5, 16, ZSTD_lazy2},   /* level  8 */
-	{21, 20, 20, 3, 5, 16, ZSTD_lazy2},   /* level  9 */
-	{21, 19, 21, 4, 5, 16, ZSTD_lazy2},   /* level 10 */
-	{22, 20, 22, 4, 5, 16, ZSTD_lazy2},   /* level 11 */
-	{22, 20, 22, 5, 5, 16, ZSTD_lazy2},   /* level 12 */
-	{22, 21, 22, 5, 5, 16, ZSTD_lazy2},   /* level 13 */
-	{22, 21, 22, 6, 5, 16, ZSTD_lazy2},   /* level 14 */
-	{22, 21, 21, 5, 5, 16, ZSTD_btlazy2}, /* level 15 */
-	{23, 22, 22, 5, 5, 16, ZSTD_btlazy2}, /* level 16 */
-	{23, 21, 22, 4, 5, 24, ZSTD_btopt},   /* level 17 */
-	{23, 23, 22, 6, 5, 32, ZSTD_btopt},   /* level 18 */
-	{23, 23, 22, 6, 3, 48, ZSTD_btopt},   /* level 19 */
-	{25, 25, 23, 7, 3, 64, ZSTD_btopt2},  /* level 20 */
-	{26, 26, 23, 7, 3, 256, ZSTD_btopt2}, /* level 21 */
-	{27, 27, 25, 9, 3, 512, ZSTD_btopt2}, /* level 22 */
-    },
-    {
-	/* for srcSize <= 256 KB */
-	/* W,  C,  H,  S,  L,  T, strat */
-	{0, 0, 0, 0, 0, 0, ZSTD_fast},	 /* level  0 - not used */
-	{18, 13, 14, 1, 6, 8, ZSTD_fast},      /* level  1 */
-	{18, 14, 13, 1, 5, 8, ZSTD_dfast},     /* level  2 */
-	{18, 16, 15, 1, 5, 8, ZSTD_dfast},     /* level  3 */
-	{18, 15, 17, 1, 5, 8, ZSTD_greedy},    /* level  4.*/
-	{18, 16, 17, 4, 5, 8, ZSTD_greedy},    /* level  5.*/
-	{18, 16, 17, 3, 5, 8, ZSTD_lazy},      /* level  6.*/
-	{18, 17, 17, 4, 4, 8, ZSTD_lazy},      /* level  7 */
-	{18, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
-	{18, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
-	{18, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
-	{18, 18, 17, 6, 4, 8, ZSTD_lazy2},     /* level 11.*/
-	{18, 18, 17, 7, 4, 8, ZSTD_lazy2},     /* level 12.*/
-	{18, 19, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13 */
-	{18, 18, 18, 4, 4, 16, ZSTD_btopt},    /* level 14.*/
-	{18, 18, 18, 4, 3, 16, ZSTD_btopt},    /* level 15.*/
-	{18, 19, 18, 6, 3, 32, ZSTD_btopt},    /* level 16.*/
-	{18, 19, 18, 8, 3, 64, ZSTD_btopt},    /* level 17.*/
-	{18, 19, 18, 9, 3, 128, ZSTD_btopt},   /* level 18.*/
-	{18, 19, 18, 10, 3, 256, ZSTD_btopt},  /* level 19.*/
-	{18, 19, 18, 11, 3, 512, ZSTD_btopt2}, /* level 20.*/
-	{18, 19, 18, 12, 3, 512, ZSTD_btopt2}, /* level 21.*/
-	{18, 19, 18, 13, 3, 512, ZSTD_btopt2}, /* level 22.*/
-    },
-    {
-	/* for srcSize <= 128 KB */
-	/* W,  C,  H,  S,  L,  T, strat */
-	{17, 12, 12, 1, 7, 8, ZSTD_fast},      /* level  0 - not used */
-	{17, 12, 13, 1, 6, 8, ZSTD_fast},      /* level  1 */
-	{17, 13, 16, 1, 5, 8, ZSTD_fast},      /* level  2 */
-	{17, 16, 16, 2, 5, 8, ZSTD_dfast},     /* level  3 */
-	{17, 13, 15, 3, 4, 8, ZSTD_greedy},    /* level  4 */
-	{17, 15, 17, 4, 4, 8, ZSTD_greedy},    /* level  5 */
-	{17, 16, 17, 3, 4, 8, ZSTD_lazy},      /* level  6 */
-	{17, 15, 17, 4, 4, 8, ZSTD_lazy2},     /* level  7 */
-	{17, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
-	{17, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
-	{17, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
-	{17, 17, 17, 7, 4, 8, ZSTD_lazy2},     /* level 11 */
-	{17, 17, 17, 8, 4, 8, ZSTD_lazy2},     /* level 12 */
-	{17, 18, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13.*/
-	{17, 17, 17, 7, 3, 8, ZSTD_btopt},     /* level 14.*/
-	{17, 17, 17, 7, 3, 16, ZSTD_btopt},    /* level 15.*/
-	{17, 18, 17, 7, 3, 32, ZSTD_btopt},    /* level 16.*/
-	{17, 18, 17, 7, 3, 64, ZSTD_btopt},    /* level 17.*/
-	{17, 18, 17, 7, 3, 256, ZSTD_btopt},   /* level 18.*/
-	{17, 18, 17, 8, 3, 256, ZSTD_btopt},   /* level 19.*/
-	{17, 18, 17, 9, 3, 256, ZSTD_btopt2},  /* level 20.*/
-	{17, 18, 17, 10, 3, 256, ZSTD_btopt2}, /* level 21.*/
-	{17, 18, 17, 11, 3, 512, ZSTD_btopt2}, /* level 22.*/
-    },
-    {
-	/* for srcSize <= 16 KB */
-	/* W,  C,  H,  S,  L,  T, strat */
-	{14, 12, 12, 1, 7, 6, ZSTD_fast},      /* level  0 - not used */
-	{14, 14, 14, 1, 6, 6, ZSTD_fast},      /* level  1 */
-	{14, 14, 14, 1, 4, 6, ZSTD_fast},      /* level  2 */
-	{14, 14, 14, 1, 4, 6, ZSTD_dfast},     /* level  3.*/
-	{14, 14, 14, 4, 4, 6, ZSTD_greedy},    /* level  4.*/
-	{14, 14, 14, 3, 4, 6, ZSTD_lazy},      /* level  5.*/
-	{14, 14, 14, 4, 4, 6, ZSTD_lazy2},     /* level  6 */
-	{14, 14, 14, 5, 4, 6, ZSTD_lazy2},     /* level  7 */
-	{14, 14, 14, 6, 4, 6, ZSTD_lazy2},     /* level  8.*/
-	{14, 15, 14, 6, 4, 6, ZSTD_btlazy2},   /* level  9.*/
-	{14, 15, 14, 3, 3, 6, ZSTD_btopt},     /* level 10.*/
-	{14, 15, 14, 6, 3, 8, ZSTD_btopt},     /* level 11.*/
-	{14, 15, 14, 6, 3, 16, ZSTD_btopt},    /* level 12.*/
-	{14, 15, 14, 6, 3, 24, ZSTD_btopt},    /* level 13.*/
-	{14, 15, 15, 6, 3, 48, ZSTD_btopt},    /* level 14.*/
-	{14, 15, 15, 6, 3, 64, ZSTD_btopt},    /* level 15.*/
-	{14, 15, 15, 6, 3, 96, ZSTD_btopt},    /* level 16.*/
-	{14, 15, 15, 6, 3, 128, ZSTD_btopt},   /* level 17.*/
-	{14, 15, 15, 6, 3, 256, ZSTD_btopt},   /* level 18.*/
-	{14, 15, 15, 7, 3, 256, ZSTD_btopt},   /* level 19.*/
-	{14, 15, 15, 8, 3, 256, ZSTD_btopt2},  /* level 20.*/
-	{14, 15, 15, 9, 3, 256, ZSTD_btopt2},  /* level 21.*/
-	{14, 15, 15, 10, 3, 256, ZSTD_btopt2}, /* level 22.*/
-    },
-};
-
-/*! ZSTD_getCParams() :
-*   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
-*   Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
-{
-	ZSTD_compressionParameters cp;
-	size_t const addedSize = srcSize ? 0 : 500;
-	U64 const rSize = srcSize + dictSize ? srcSize + dictSize + addedSize : (U64)-1;
-	U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
-	if (compressionLevel <= 0)
-		compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
-	if (compressionLevel > ZSTD_MAX_CLEVEL)
-		compressionLevel = ZSTD_MAX_CLEVEL;
-	cp = ZSTD_defaultCParameters[tableID][compressionLevel];
-	if (ZSTD_32bits()) { /* auto-correction, for 32-bits mode */
-		if (cp.windowLog > ZSTD_WINDOWLOG_MAX)
-			cp.windowLog = ZSTD_WINDOWLOG_MAX;
-		if (cp.chainLog > ZSTD_CHAINLOG_MAX)
-			cp.chainLog = ZSTD_CHAINLOG_MAX;
-		if (cp.hashLog > ZSTD_HASHLOG_MAX)
-			cp.hashLog = ZSTD_HASHLOG_MAX;
-	}
-	cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
-	return cp;
-}
-
-/*! ZSTD_getParams() :
-*   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
-*   All fields of `ZSTD_frameParameters` are set to default (0) */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
-{
-	ZSTD_parameters params;
-	ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
-	memset(&params, 0, sizeof(params));
-	params.cParams = cParams;
-	return params;
-}
-
-EXPORT_SYMBOL(ZSTD_maxCLevel);
-EXPORT_SYMBOL(ZSTD_compressBound);
-
-EXPORT_SYMBOL(ZSTD_CCtxWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCCtx);
-EXPORT_SYMBOL(ZSTD_compressCCtx);
-EXPORT_SYMBOL(ZSTD_compress_usingDict);
-
-EXPORT_SYMBOL(ZSTD_CDictWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCDict);
-EXPORT_SYMBOL(ZSTD_compress_usingCDict);
-
-EXPORT_SYMBOL(ZSTD_CStreamWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCStream);
-EXPORT_SYMBOL(ZSTD_initCStream_usingCDict);
-EXPORT_SYMBOL(ZSTD_resetCStream);
-EXPORT_SYMBOL(ZSTD_compressStream);
-EXPORT_SYMBOL(ZSTD_flushStream);
-EXPORT_SYMBOL(ZSTD_endStream);
-EXPORT_SYMBOL(ZSTD_CStreamInSize);
-EXPORT_SYMBOL(ZSTD_CStreamOutSize);
-
-EXPORT_SYMBOL(ZSTD_getCParams);
-EXPORT_SYMBOL(ZSTD_getParams);
-EXPORT_SYMBOL(ZSTD_checkCParams);
-EXPORT_SYMBOL(ZSTD_adjustCParams);
-
-EXPORT_SYMBOL(ZSTD_compressBegin);
-EXPORT_SYMBOL(ZSTD_compressBegin_usingDict);
-EXPORT_SYMBOL(ZSTD_compressBegin_advanced);
-EXPORT_SYMBOL(ZSTD_copyCCtx);
-EXPORT_SYMBOL(ZSTD_compressBegin_usingCDict);
-EXPORT_SYMBOL(ZSTD_compressContinue);
-EXPORT_SYMBOL(ZSTD_compressEnd);
-
-EXPORT_SYMBOL(ZSTD_getBlockSizeMax);
-EXPORT_SYMBOL(ZSTD_compressBlock);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
new file mode 100644
index 0000000..436985b
--- /dev/null
+++ b/lib/zstd/compress/fse_compress.c
@@ -0,0 +1,625 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "../common/compiler.h"
+#include "../common/mem.h"        /* U32, U16, etc. */
+#include "../common/debug.h"      /* assert, DEBUGLOG */
+#include "hist.h"       /* HIST_count_wksp */
+#include "../common/bitstream.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#include "../common/error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#define ZSTD_DEPS_NEED_MATH64
+#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct,
+                      const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                            void* workSpace, size_t wkspSize)
+{
+    U32 const tableSize = 1 << tableLog;
+    U32 const tableMask = tableSize - 1;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    U32 const step = FSE_TABLESTEP(tableSize);
+
+    U32* cumul = (U32*)workSpace;
+    FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+
+    U32 highThreshold = tableSize-1;
+
+    if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
+    /* CTable header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+    assert(tableLog < 16);   /* required for threshold strategy to work */
+
+    /* For explanations on how to distribute symbol values over the table :
+     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+     #ifdef __clang_analyzer__
+     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
+     #endif
+
+    /* symbol start positions */
+    {   U32 u;
+        cumul[0] = 0;
+        for (u=1; u <= maxSymbolValue+1; u++) {
+            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
+                cumul[u] = cumul[u-1] + 1;
+                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
+            } else {
+                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+        }   }
+        cumul[maxSymbolValue+1] = tableSize+1;
+    }
+
+    /* Spread symbols */
+    {   U32 position = 0;
+        U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            int nbOccurrences;
+            int const freq = normalizedCounter[symbol];
+            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
+                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+                position = (position + step) & tableMask;
+                while (position > highThreshold)
+                    position = (position + step) & tableMask;   /* Low proba area */
+        }   }
+
+        assert(position==0);  /* Must have initialized all positions */
+    }
+
+    /* Build table */
+    {   U32 u; for (u=0; u<tableSize; u++) {
+        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
+    }   }
+
+    /* Build Symbol Transformation Table */
+    {   unsigned total = 0;
+        unsigned s;
+        for (s=0; s<=maxSymbolValue; s++) {
+            switch (normalizedCounter[s])
+            {
+            case  0:
+                /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
+                symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
+                break;
+
+            case -1:
+            case  1:
+                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                break;
+            default :
+                {
+                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                    total +=  normalizedCounter[s];
+    }   }   }   }
+
+#if 0  /* debug : symbol costs */
+    DEBUGLOG(5, "\n --- table statistics : ");
+    {   U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
+                symbol, normalizedCounter[symbol],
+                FSE_getMaxNbBits(symbolTT, symbol),
+                (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
+        }
+    }
+#endif
+
+    return 0;
+}
+
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+
+/*-**************************************************************
+*  FSE NCount encoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+}
+
+static size_t
+FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                         unsigned writeIsSafe)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream = 0;
+    int bitCount = 0;
+    unsigned symbol = 0;
+    unsigned const alphabetSize = maxSymbolValue + 1;
+    int previousIs0 = 0;
+
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
+        if (previousIs0) {
+            unsigned start = symbol;
+            while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
+            if (symbol == alphabetSize) break;   /* incorrect distribution */
+            while (symbol >= start+24) {
+                start+=24;
+                bitStream += 0xFFFFU << bitCount;
+                if ((!writeIsSafe) && (out > oend-2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE) bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (symbol >= start+3) {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (symbol-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16) {
+                if ((!writeIsSafe) && (out > oend - 2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+        }   }
+        {   int count = normalizedCounter[symbol++];
+            int const max = (2*threshold-1) - remaining;
+            remaining -= count < 0 ? -count : count;
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold)
+                count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previousIs0  = (count==1);
+            if (remaining<1) return ERROR(GENERIC);
+            while (remaining<threshold) { nbBits--; threshold>>=1; }
+        }
+        if (bitCount>16) {
+            if ((!writeIsSafe) && (out > oend - 2))
+                return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+    }   }
+
+    if (remaining != 1)
+        return ERROR(GENERIC);  /* incorrect normalized distribution */
+    assert(symbol <= alphabetSize);
+
+    /* flush remaining bitStream */
+    if ((!writeIsSafe) && (out > oend - 2))
+        return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                  const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+
+    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
+}
+
+
+/*-**************************************************************
+*  FSE Compression Code
+****************************************************************/
+
+FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return (FSE_CTable*)ZSTD_malloc(size);
+}
+
+void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
+    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    return minBits;
+}
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+{
+    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 tableLog = maxTableLog;
+    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
+}
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
+{
+    short const NOT_YET_ASSIGNED = -2;
+    U32 s;
+    U32 distributed = 0;
+    U32 ToDistribute;
+
+    /* Init */
+    U32 const lowThreshold = (U32)(total >> tableLog);
+    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (count[s] == 0) {
+            norm[s]=0;
+            continue;
+        }
+        if (count[s] <= lowThreshold) {
+            norm[s] = lowProbCount;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        if (count[s] <= lowOne) {
+            norm[s] = 1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+
+        norm[s]=NOT_YET_ASSIGNED;
+    }
+    ToDistribute = (1 << tableLog) - distributed;
+
+    if (ToDistribute == 0)
+        return 0;
+
+    if ((total / ToDistribute) > lowOne) {
+        /* risk of rounding to zero */
+        lowOne = (U32)((total * 3) / (ToDistribute * 2));
+        for (s=0; s<=maxSymbolValue; s++) {
+            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
+                norm[s] = 1;
+                distributed++;
+                total -= count[s];
+                continue;
+        }   }
+        ToDistribute = (1 << tableLog) - distributed;
+    }
+
+    if (distributed == maxSymbolValue+1) {
+        /* all values are pretty poor;
+           probably incompressible data (should have already been detected);
+           find max, then give all remaining points to max */
+        U32 maxV = 0, maxC = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > maxC) { maxV=s; maxC=count[s]; }
+        norm[maxV] += (short)ToDistribute;
+        return 0;
+    }
+
+    if (total == 0) {
+        /* all of the symbols were low enough for the lowOne or lowThreshold */
+        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
+            if (norm[s] > 0) { ToDistribute--; norm[s]++; }
+        return 0;
+    }
+
+    {   U64 const vStepLog = 62 - tableLog;
+        U64 const mid = (1ULL << (vStepLog-1)) - 1;
+        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
+        U64 tmpTotal = mid;
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (norm[s]==NOT_YET_ASSIGNED) {
+                U64 const end = tmpTotal + (count[s] * rStep);
+                U32 const sStart = (U32)(tmpTotal >> vStepLog);
+                U32 const sEnd = (U32)(end >> vStepLog);
+                U32 const weight = sEnd - sStart;
+                if (weight < 1)
+                    return ERROR(GENERIC);
+                norm[s] = (short)weight;
+                tmpTotal = end;
+    }   }   }
+
+    return 0;
+}
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue, unsigned useLowProbCount)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
+    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
+
+    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        short const lowProbCount = useLowProbCount ? -1 : 1;
+        U64 const scale = 62 - tableLog;
+        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (count[s] == total) return 0;   /* rle special case */
+            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
+            if (count[s] <= lowThreshold) {
+                normalizedCounter[s] = lowProbCount;
+                stillToDistribute--;
+            } else {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8) {
+                    U64 restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP) { largestP=proba; largest=s; }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+        }   }
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
+            /* corner case, need another normalization method */
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
+            if (FSE_isError(errorCode)) return errorCode;
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        U32 s;
+        U32 nTotal = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
+        for (s=0; s<=maxSymbolValue; s++)
+            nTotal += abs(normalizedCounter[s]);
+        if (nTotal != (1U<<tableLog))
+            RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+        for (s=0; s<=maxSymbolValue; s++) {
+            symbolTT[s].deltaNbBits = deltaNbBits;
+            symbolTT[s].deltaFindState = s-1;
+    }   }
+
+    return 0;
+}
+
+/* fake FSE_CTable, for rle input (always same symbol) */
+size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+{
+    void* ptr = ct;
+    U16* tableU16 = ( (U16*) ptr) + 2;
+    void* FSCTptr = (U32*)ptr + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    symbolTT[symbolValue].deltaNbBits = 0;
+    symbolTT[symbolValue].deltaFindState = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct, const unsigned fast)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip=iend;
+
+    BIT_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    /* init */
+    if (srcSize <= 2) return 0;
+    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
+
+#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+    if (srcSize & 1) {
+        FSE_initCState2(&CState1, ct, *--ip);
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    } else {
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_initCState2(&CState1, ct, *--ip);
+    }
+
+    /* join to mod 4 */
+    srcSize -= 2;
+    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while ( ip>istart ) {
+
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_FLUSHBITS(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
+            FSE_encodeSymbol(&bitC, &CState2, *--ip);
+            FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        }
+
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct)
+{
+    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+    if (fast)
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+    else
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/zstd/compress/hist.c b/lib/zstd/compress/hist.c
new file mode 100644
index 0000000..3ddc6df
--- /dev/null
+++ b/lib/zstd/compress/hist.c
@@ -0,0 +1,165 @@
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+#include "../common/mem.h"             /* U32, BYTE, etc. */
+#include "../common/debug.h"           /* assert, DEBUGLOG */
+#include "../common/error_private.h"   /* ERROR */
+#include "hist.h"
+
+
+/* --- Error management --- */
+unsigned HIST_isError(size_t code) { return ERR_isError(code); }
+
+/*-**************************************************************
+ *  Histogram functions
+ ****************************************************************/
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned largestCount=0;
+
+    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
+    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
+
+    while (ip<end) {
+        assert(*ip <= maxSymbolValue);
+        count[*ip++]++;
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+
+    {   U32 s;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > largestCount) largestCount = count[s];
+    }
+
+    return largestCount;
+}
+
+typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
+
+/* HIST_count_parallel_wksp() :
+ * store histogram into 4 intermediate tables, recombined at the end.
+ * this design makes better use of OoO cpus,
+ * and is noticeably faster when some values are heavily repeated.
+ * But it needs some additional workspace for intermediate tables.
+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
+ * @return : largest histogram frequency,
+ *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
+static size_t HIST_count_parallel_wksp(
+                                unsigned* count, unsigned* maxSymbolValuePtr,
+                                const void* source, size_t sourceSize,
+                                HIST_checkInput_e check,
+                                U32* const workSpace)
+{
+    const BYTE* ip = (const BYTE*)source;
+    const BYTE* const iend = ip+sourceSize;
+    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
+    unsigned max=0;
+    U32* const Counting1 = workSpace;
+    U32* const Counting2 = Counting1 + 256;
+    U32* const Counting3 = Counting2 + 256;
+    U32* const Counting4 = Counting3 + 256;
+
+    /* safety checks */
+    assert(*maxSymbolValuePtr <= 255);
+    if (!sourceSize) {
+        ZSTD_memset(count, 0, countSize);
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
+
+    /* by stripes of 16 bytes */
+    {   U32 cached = MEM_read32(ip); ip += 4;
+        while (ip < iend-15) {
+            U32 c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) Counting1[*ip++]++;
+
+    {   U32 s;
+        for (s=0; s<256; s++) {
+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+            if (Counting1[s] > max) max = Counting1[s];
+    }   }
+
+    {   unsigned maxSymbolValue = 255;
+        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
+        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
+        *maxSymbolValuePtr = maxSymbolValue;
+        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
+    }
+    return (size_t)max;
+}
+
+/* HIST_countFast_wksp() :
+ * Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                          const void* source, size_t sourceSize,
+                          void* workSpace, size_t workSpaceSize)
+{
+    if (sourceSize < 1500) /* heuristic threshold */
+        return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
+}
+
+/* HIST_count_wksp() :
+ * Same as HIST_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* source, size_t sourceSize,
+                       void* workSpace, size_t workSpaceSize)
+{
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    if (*maxSymbolValuePtr < 255)
+        return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
+    *maxSymbolValuePtr = 255;
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
+}
+
diff --git a/lib/zstd/compress/hist.h b/lib/zstd/compress/hist.h
new file mode 100644
index 0000000..fc1830a
--- /dev/null
+++ b/lib/zstd/compress/hist.h
@@ -0,0 +1,75 @@
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+#include "../common/zstd_deps.h"   /* size_t */
+
+
+/* --- simple histogram functions --- */
+
+/*! HIST_count():
+ *  Provides the precise count of each byte within a table 'count'.
+ * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+ *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
+ * @return : count of the most frequent symbol (which isn't identified).
+ *           or an error code, which can be tested using HIST_isError().
+ *           note : if return == srcSize, there is only one symbol.
+ */
+size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                  const void* src, size_t srcSize);
+
+unsigned HIST_isError(size_t code);  /*< tells if a return value is an error code */
+
+
+/* --- advanced histogram functions --- */
+
+#define HIST_WKSP_SIZE_U32 1024
+#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
+/* HIST_count_wksp() :
+ *  Same as HIST_count(), but using an externally provided scratch buffer.
+ *  Benefit is this function will use very little stack space.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* src, size_t srcSize,
+                       void* workSpace, size_t workSpaceSize);
+
+/* HIST_countFast() :
+ *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
+ *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
+ */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                      const void* src, size_t srcSize);
+
+/* HIST_countFast_wksp() :
+ *  Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize,
+                           void* workSpace, size_t workSpaceSize);
+
+/*! HIST_count_simple() :
+ *  Same as HIST_countFast(), this function is unsafe,
+ *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
+ *  It is also a bit slower for large inputs.
+ *  However, it does not need any additional memory (not even on stack).
+ * @return : count of the most frequent symbol.
+ *  Note this function doesn't produce any error (i.e. it must succeed).
+ */
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize);
diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c
new file mode 100644
index 0000000..f76a526
--- /dev/null
+++ b/lib/zstd/compress/huf_compress.c
@@ -0,0 +1,905 @@
+/* ******************************************************************
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h"
+#include "hist.h"
+#define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
+#include "../common/fse.h"        /* header compression */
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/error_private.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+
+typedef struct {
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+} HUF_CompressWeightsWksp;
+
+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    unsigned maxSymbolValue = HUF_TABLELOG_MAX;
+    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
+
+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
+
+    /* init conditions */
+    if (wtSize <= 1) return 0;  /* Not compressible */
+
+    /* Scan input and build symbol stats */
+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
+        op += hSize;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+
+typedef struct {
+    HUF_CompressWeightsWksp wksp;
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+} HUF_WriteCTableWksp;
+
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
+                            void* workspace, size_t workspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    U32 n;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
+
+    /* check conditions */
+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+
+    /* convert to weight */
+    wksp->bitsToWeight[0] = 0;
+    for (n=1; n<huffLog+1; n++)
+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
+
+    /* attempt weights compression by FSE */
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
+        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)hSize;
+            return hSize+1;
+    }   }
+
+    /* write raw values as 4-bits (max : 15) */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
+    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    for (n=0; n<maxSymbolValue; n+=2)
+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
+    return ((maxSymbolValue+1)/2) + 1;
+}
+
+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    HUF_WriteCTableWksp wksp;
+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+}
+
+
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
+{
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+
+    /* get symbol weights */
+    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
+    *hasZeroWeights = (rankVal[0] > 0);
+
+    /* check result */
+    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
+
+    /* Prepare base value per rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 curr = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = curr;
+    }   }
+
+    /* fill nbBits */
+    {   U32 n; for (n=0; n<nbSymbols; n++) {
+            const U32 w = huffWeight[n];
+            CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
+    }   }
+
+    /* fill val */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
+        {   U16 min = 0;
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+    }
+
+    *maxSymbolValuePtr = nbSymbols - 1;
+    return readSize;
+}
+
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
+{
+    const HUF_CElt* table = (const HUF_CElt*)symbolTable;
+    assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+    return table[symbolValue].nbBits;
+}
+
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+/*
+ * HUF_setMaxHeight():
+ * Enforces maxNbBits on the Huffman tree described in huffNode.
+ *
+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
+ * the tree to so that it is a valid canonical Huffman tree.
+ *
+ * @pre               The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits == huffNode[lastNonNull].nbBits.
+ * @post              The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits is the return value <= maxNbBits.
+ *
+ * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
+ * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
+ *                    may not respect. After this function the Huffman tree will
+ *                    respect maxNbBits.
+ * @return            The maximum number of bits of the Huffman tree after adjustment,
+ *                    necessarily no more than maxNbBits.
+ */
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+    /* early exit : no elt > maxNbBits, so the tree is already valid. */
+    if (largestBits <= maxNbBits) return largestBits;
+
+    /* there are several too large elements (at least >= 2) */
+    {   int totalCost = 0;
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        int n = (int)lastNonNull;
+
+        /* Adjust any ranks > maxNbBits to maxNbBits.
+         * Compute totalCost, which is how far the sum of the ranks is
+         * we are over 2^largestBits after adjust the offending ranks.
+         */
+        while (huffNode[n].nbBits > maxNbBits) {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n--;
+        }
+        /* n stops at huffNode[n].nbBits <= maxNbBits */
+        assert(huffNode[n].nbBits <= maxNbBits);
+        /* n end at index of smallest symbol using < maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) --n;
+
+        /* renorm totalCost from 2^largestBits to 2^maxNbBits
+         * note : totalCost is necessarily a multiple of baseCost */
+        assert((totalCost & (baseCost - 1)) == 0);
+        totalCost >>= (largestBits - maxNbBits);
+        assert(totalCost > 0);
+
+        /* repay normalized cost */
+        {   U32 const noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_TABLELOG_MAX+2];
+
+            /* Get pos of last (smallest = lowest cum. count) symbol per rank */
+            ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
+            {   U32 currentNbBits = maxNbBits;
+                int pos;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
+            }   }
+
+            while (totalCost > 0) {
+                /* Try to reduce the next power of 2 above totalCost because we
+                 * gain back half the rank.
+                 */
+                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                    U32 const highPos = rankLast[nBitsToDecrease];
+                    U32 const lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    /* Decrease highPos if no symbols of lowPos or if it is
+                     * not cheaper to remove 2 lowPos than highPos.
+                     */
+                    if (lowPos == noSymbol) break;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                }   }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
+                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
+                    nBitsToDecrease++;
+                assert(rankLast[nBitsToDecrease] != noSymbol);
+                /* Increase the number of bits to gain back half the rank cost. */
+                totalCost -= 1 << (nBitsToDecrease-1);
+                huffNode[rankLast[nBitsToDecrease]].nbBits++;
+
+                /* Fix up the new rank.
+                 * If the new rank was empty, this symbol is now its smallest.
+                 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
+                 */
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
+                /* Fix up the old rank.
+                 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
+                 * it must be the only symbol in its rank, so the old rank now has no symbols.
+                 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
+                 * the smallest node in the rank. If the previous position belongs to a different rank,
+                 * then the rank is now empty.
+                 */
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+                }
+            }   /* while (totalCost > 0) */
+
+            /* If we've removed too much weight, then we have to add it back.
+             * To avoid overshooting again, we only adjust the smallest rank.
+             * We take the largest nodes from the lowest rank 0 and move them
+             * to rank 1. There's guaranteed to be enough rank 0 symbols because
+             * TODO.
+             */
+            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+                /* special case : no rank 1 symbol (using maxNbBits-1);
+                 * let's create one from largest rank 0 (using maxNbBits).
+                 */
+                if (rankLast[1] == noSymbol) {
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    assert(n >= 0);
+                    rankLast[1] = (U32)(n+1);
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+            }
+        }   /* repay normalized cost */
+    }   /* there are several too large elements (at least >= 2) */
+
+    return maxNbBits;
+}
+
+typedef struct {
+    U32 base;
+    U32 curr;
+} rankPos;
+
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+  huffNodeTable huffNodeTbl;
+  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+/*
+ * HUF_sort():
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
+ *
+ * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
+ *                            Must have (maxSymbolValue + 1) entries.
+ * @param[in]  count          Histogram of the symbols.
+ * @param[in]  maxSymbolValue Maximum symbol value.
+ * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
+ */
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
+{
+    int n;
+    int const maxSymbolValue1 = (int)maxSymbolValue + 1;
+
+    /* Compute base and set curr to base.
+     * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
+     * Then 2^lowerRank <= count[n]+1 <= 2^rank.
+     * We attribute each symbol to lowerRank's base value, because we want to know where
+     * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
+     */
+    ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 lowerRank = BIT_highbit32(count[n] + 1);
+        rankPosition[lowerRank].base++;
+    }
+    assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
+    for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
+        rankPosition[n-1].base += rankPosition[n].base;
+        rankPosition[n-1].curr = rankPosition[n-1].base;
+    }
+    /* Sort */
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
+        U32 pos = rankPosition[r].curr++;
+        /* Insert into the correct position in the rank.
+         * We have at most 256 symbols, so this insertion should be fine.
+         */
+        while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
+            huffNode[pos] = huffNode[pos-1];
+            pos--;
+        }
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+/* HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
+ */
+#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
+
+/* HUF_buildTree():
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
+ *
+ * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
+ * @param maxSymbolValue  The maximum symbol value.
+ * @return                The smallest node in the Huffman tree (by count).
+ */
+static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
+{
+    nodeElt* const huffNode0 = huffNode - 1;
+    int nonNullRank;
+    int lowS, lowN;
+    int nodeNb = STARTNODE;
+    int n, nodeRoot;
+    /* init for parents */
+    nonNullRank = (int)maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
+
+    /* create parents */
+    while (nodeNb <= nodeRoot) {
+        int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
+        nodeNb++;
+    }
+
+    /* distribute weights (unlimited tree height) */
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    return nonNullRank;
+}
+
+/*
+ * HUF_buildCTableFromTree():
+ * Build the CTable given the Huffman tree in huffNode.
+ *
+ * @param[out] CTable         The output Huffman CTable.
+ * @param      huffNode       The Huffman tree.
+ * @param      nonNullRank    The last and smallest node in the Huffman tree.
+ * @param      maxSymbolValue The maximum symbol value.
+ * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
+ */
+static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
+{
+    /* fill result into ctable (val, nbBits) */
+    int n;
+    U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+    U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    int const alphabetSize = (int)(maxSymbolValue + 1);
+    for (n=0; n<=nonNullRank; n++)
+        nbPerRank[huffNode[n].nbBits]++;
+    /* determine starting value per rank */
+    {   U16 min = 0;
+        for (n=(int)maxNbBits; n>0; n--) {
+            valPerRank[n] = min;      /* get starting value within each rank */
+            min += nbPerRank[n];
+            min >>= 1;
+    }   }
+    for (n=0; n<alphabetSize; n++)
+        CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+    for (n=0; n<alphabetSize; n++)
+        CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
+}
+
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+{
+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
+    nodeElt* const huffNode = huffNode0+1;
+    int nonNullRank;
+
+    /* safety checks */
+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+      return ERROR(workSpace_tooSmall);
+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+      return ERROR(maxSymbolValue_tooLarge);
+    ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
+
+    /* build tree */
+    nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
+    if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+
+    HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
+
+    return maxNbBits;
+}
+
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+{
+    size_t nbBits = 0;
+    int s;
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        nbBits += CTable[s].nbBits * count[s];
+    }
+    return nbBits >> 3;
+}
+
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  int bad = 0;
+  int s;
+  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+  }
+  return !bad;
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+FORCE_INLINE_TEMPLATE void
+HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t n;
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
+      if (HUF_isError(initErr)) return 0; }
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3:
+            HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+            HUF_FLUSHBITS_2(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 2:
+            HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+            HUF_FLUSHBITS_1(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 1:
+            HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+            HUF_FLUSHBITS(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 0: ZSTD_FALLTHROUGH;
+        default: break;
+    }
+
+    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+                                      const void* src, size_t srcSize,
+                                      const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    if (bmi2) {
+        return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+    }
+    return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+}
+
+#else
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    (void)bmi2;
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+#endif
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+
+static size_t
+HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, int bmi2)
+{
+    size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+2, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+4, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    assert(ip <= iend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
+        if (cSize==0) return 0;
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
+
+static size_t HUF_compressCTable_internal(
+                BYTE* const ostart, BYTE* op, BYTE* const oend,
+                const void* src, size_t srcSize,
+                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
+{
+    size_t const cSize = (nbStreams==HUF_singleStream) ?
+                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
+    if (HUF_isError(cSize)) { return cSize; }
+    if (cSize==0) { return 0; }   /* uncompressible */
+    op += cSize;
+    /* check compressibility */
+    assert(op >= ostart);
+    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
+    return (size_t)(op-ostart);
+}
+
+typedef struct {
+    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
+    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+    union {
+        HUF_buildCTable_wksp_tables buildCTable_wksp;
+        HUF_WriteCTableWksp writeCTable_wksp;
+    } wksps;
+} HUF_compress_tables_t;
+
+/* HUF_compress_internal() :
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
+static size_t
+HUF_compress_internal (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned huffLog,
+                       HUF_nbStreams_e nbStreams,
+                       void* workSpace_align4, size_t wkspSize,
+                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+                 const int bmi2)
+{
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
+
+    /* checks & inits */
+    if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
+    if (!srcSize) return 0;  /* Uncompressed */
+    if (!dstSize) return 0;  /* cannot fit anything within dst budget */
+    if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
+
+    /* Heuristic : If old table is valid, use it for small inputs */
+    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
+        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
+        if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
+    }
+
+    /* Check validity of previous table */
+    if ( repeat
+      && *repeat == HUF_repeat_check
+      && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
+        *repeat = HUF_repeat_none;
+    }
+    /* Heuristic : use existing table for small inputs */
+    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Build Huffman Tree */
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
+                                            maxSymbolValue, huffLog,
+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
+        CHECK_F(maxBits);
+        huffLog = (U32)maxBits;
+        /* Zero unused symbols in CTable, so we can check it for validity */
+        ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
+               sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
+    }
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
+        /* Check if using previous huffman table is beneficial */
+        if (repeat && *repeat != HUF_repeat_none) {
+            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
+            size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
+            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                return HUF_compressCTable_internal(ostart, op, oend,
+                                                   src, srcSize,
+                                                   nbStreams, oldHufTable, bmi2);
+        }   }
+
+        /* Use the new huffman table */
+        if (hSize + 12ul >= srcSize) { return 0; }
+        op += hSize;
+        if (repeat) { *repeat = HUF_repeat_none; }
+        if (oldHufTable)
+            ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
+    }
+    return HUF_compressCTable_internal(ostart, op, oend,
+                                       src, srcSize,
+                                       nbStreams, table->CTable, bmi2);
+}
+
+
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize, hufTable,
+                                 repeat, preferRepeat, bmi2);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * provide workspace to generate compression tables */
+size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * re-use an existing huffman compression table */
+size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 hufTable, repeat, preferRepeat, bmi2);
+}
+
diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
new file mode 100644
index 0000000..a4e9160
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress.c
@@ -0,0 +1,5109 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
+#include "../common/cpu.h"
+#include "../common/mem.h"
+#include "hist.h"           /* HIST_countFast_wksp */
+#define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+#include "zstd_fast.h"
+#include "zstd_double_fast.h"
+#include "zstd_lazy.h"
+#include "zstd_opt.h"
+#include "zstd_ldm.h"
+#include "zstd_compress_superblock.h"
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * COMPRESS_HEAPMODE :
+ * Select how default decompression function ZSTD_compress() allocates its context,
+ * on stack (0, default), or into heap (1).
+ * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
+ */
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+/* ZSTD_compressBound()
+ * Note that the result from this function is only compatible with the "normal"
+ * full-block strategy.
+ * When there are a lot of small blocks due to frequent flush in streaming mode
+ * the overhead of headers can make the compressed data to be larger than the
+ * return value of ZSTD_compressBound().
+ */
+size_t ZSTD_compressBound(size_t srcSize) {
+    return ZSTD_COMPRESSBOUND(srcSize);
+}
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+struct ZSTD_CDict_s {
+    const void* dictContent;
+    size_t dictContentSize;
+    ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
+    U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+    ZSTD_cwksp workspace;
+    ZSTD_matchState_t matchState;
+    ZSTD_compressedBlockState_t cBlockState;
+    ZSTD_customMem customMem;
+    U32 dictID;
+    int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+};  /* typedef'd to ZSTD_CDict within "zstd.h" */
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
+{
+    assert(cctx != NULL);
+    ZSTD_memset(cctx, 0, sizeof(*cctx));
+    cctx->customMem = memManager;
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
+        assert(!ZSTD_isError(err));
+        (void)err;
+    }
+}
+
+ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
+{
+    ZSTD_STATIC_ASSERT(zcss_init==0);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
+        if (!cctx) return NULL;
+        ZSTD_initCCtx(cctx, customMem);
+        return cctx;
+    }
+}
+
+ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
+{
+    ZSTD_cwksp ws;
+    ZSTD_CCtx* cctx;
+    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
+    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
+    ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+
+    cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
+    if (cctx == NULL) return NULL;
+
+    ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
+    ZSTD_cwksp_move(&cctx->workspace, &ws);
+    cctx->staticSize = workspaceSize;
+
+    /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+    if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
+    cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    return cctx;
+}
+
+/*
+ * Clears and frees all of the dictionaries in the CCtx.
+ */
+static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
+{
+    ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
+    ZSTD_freeCDict(cctx->localDict.cdict);
+    ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
+    cctx->cdict = NULL;
+}
+
+static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
+{
+    size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
+    size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
+    return bufferSize + cdictSize;
+}
+
+static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
+{
+    assert(cctx != NULL);
+    assert(cctx->staticSize == 0);
+    ZSTD_clearAllDicts(cctx);
+    ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                    "not compatible with static CCtx");
+    {
+        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+        ZSTD_freeCCtxContent(cctx);
+        if (!cctxInWorkspace) {
+            ZSTD_customFree(cctx, cctx->customMem);
+        }
+    }
+    return 0;
+}
+
+
+static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
+{
+    (void)cctx;
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support sizeof on NULL */
+    /* cctx may be in the workspace */
+    return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+           + ZSTD_cwksp_sizeof(&cctx->workspace)
+           + ZSTD_sizeof_localDict(cctx->localDict)
+           + ZSTD_sizeof_mtctx(cctx);
+}
+
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
+{
+    return ZSTD_sizeof_CCtx(zcs);  /* same object */
+}
+
+/* private API call, for dictBuilder only */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+
+/* Returns 1 if compression parameters are such that we should
+ * enable long distance matching (wlog >= 27, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
+}
+
+static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
+        ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params cctxParams;
+    /* should not matter, as all cParams are presumed properly defined */
+    ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
+    cctxParams.cParams = cParams;
+
+    if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
+        cctxParams.ldmParams.enableLdm = 1;
+        /* LDM is enabled by default for optimal parser and window size >= 128MB */
+        ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
+        assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
+        assert(cctxParams.ldmParams.hashRateLog < 32);
+    }
+
+    assert(!ZSTD_checkCParams(cParams));
+    return cctxParams;
+}
+
+static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params* params;
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
+            sizeof(ZSTD_CCtx_params), customMem);
+    if (!params) { return NULL; }
+    ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+    params->customMem = customMem;
+    return params;
+}
+
+ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
+{
+    return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
+}
+
+size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
+{
+    if (params == NULL) { return 0; }
+    ZSTD_customFree(params, params->customMem);
+    return 0;
+}
+
+size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
+{
+    return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+}
+
+size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->compressionLevel = compressionLevel;
+    cctxParams->fParams.contentSizeFlag = 1;
+    return 0;
+}
+
+#define ZSTD_NO_CLEVEL 0
+
+/*
+ * Initializes the cctxParams from params and compressionLevel.
+ * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
+ */
+static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = compressionLevel;
+}
+
+size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
+{
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_init_internal(cctxParams, &params, ZSTD_NO_CLEVEL);
+    return 0;
+}
+
+/*
+ * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
+ * @param param Validated zstd parameters.
+ */
+static void ZSTD_CCtxParams_setZstdParams(
+        ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
+}
+
+ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+        bounds.lowerBound = ZSTD_minCLevel();
+        bounds.upperBound = ZSTD_maxCLevel();
+        return bounds;
+
+    case ZSTD_c_windowLog:
+        bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
+        bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_hashLog:
+        bounds.lowerBound = ZSTD_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_chainLog:
+        bounds.lowerBound = ZSTD_CHAINLOG_MIN;
+        bounds.upperBound = ZSTD_CHAINLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_searchLog:
+        bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
+        bounds.upperBound = ZSTD_SEARCHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_minMatch:
+        bounds.lowerBound = ZSTD_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_targetLength:
+        bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
+        bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
+        return bounds;
+
+    case ZSTD_c_strategy:
+        bounds.lowerBound = ZSTD_STRATEGY_MIN;
+        bounds.upperBound = ZSTD_STRATEGY_MAX;
+        return bounds;
+
+    case ZSTD_c_contentSizeFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_checksumFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_dictIDFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_nbWorkers:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_jobSize:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_overlapLog:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_enableDedicatedDictSearch:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_enableLongDistanceMatching:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_ldmHashLog:
+        bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmMinMatch:
+        bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmBucketSizeLog:
+        bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmHashRateLog:
+        bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
+        return bounds;
+
+    /* experimental parameters */
+    case ZSTD_c_rsyncable:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_forceMaxWindow :
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_format:
+        ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+        bounds.lowerBound = ZSTD_f_zstd1;
+        bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_forceAttachDict:
+        ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
+        bounds.lowerBound = ZSTD_dictDefaultAttach;
+        bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_literalCompressionMode:
+        ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
+        bounds.lowerBound = ZSTD_lcm_auto;
+        bounds.upperBound = ZSTD_lcm_uncompressed;
+        return bounds;
+
+    case ZSTD_c_targetCBlockSize:
+        bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
+        bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
+        return bounds;
+
+    case ZSTD_c_srcSizeHint:
+        bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
+        bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+        return bounds;
+
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+        bounds.lowerBound = (int)ZSTD_bm_buffered;
+        bounds.upperBound = (int)ZSTD_bm_stable;
+        return bounds;
+
+    case ZSTD_c_blockDelimiters:
+        bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
+        bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
+        return bounds;
+
+    case ZSTD_c_validateSequences:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    default:
+        bounds.error = ERROR(parameter_unsupported);
+        return bounds;
+    }
+}
+
+/* ZSTD_cParam_clampBounds:
+ * Clamps the value into the bounded range.
+ */
+static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return bounds.error;
+    if (*value < bounds.lowerBound) *value = bounds.lowerBound;
+    if (*value > bounds.upperBound) *value = bounds.upperBound;
+    return 0;
+}
+
+#define BOUNDCHECK(cParam, val) { \
+    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+                    parameter_outOfBound, "Param out of bounds"); \
+}
+
+
+static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+{
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+        return 1;
+
+    case ZSTD_c_format:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow :
+    case ZSTD_c_nbWorkers:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+    default:
+        return 0;
+    }
+}
+
+size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
+    if (cctx->streamStage != zcss_init) {
+        if (ZSTD_isUpdateAuthorized(param)) {
+            cctx->cParamsChanged = 1;
+        } else {
+            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
+    }   }
+
+    switch(param)
+    {
+    case ZSTD_c_nbWorkers:
+        RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
+                        "MT not compatible with static alloc");
+        break;
+
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_format:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+        break;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+                                    ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
+    switch(param)
+    {
+    case ZSTD_c_format :
+        BOUNDCHECK(ZSTD_c_format, value);
+        CCtxParams->format = (ZSTD_format_e)value;
+        return (size_t)CCtxParams->format;
+
+    case ZSTD_c_compressionLevel : {
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        if (value == 0)
+            CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
+        else
+            CCtxParams->compressionLevel = value;
+        if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
+        return 0;  /* return type (size_t) cannot represent negative values */
+    }
+
+    case ZSTD_c_windowLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_windowLog, value);
+        CCtxParams->cParams.windowLog = (U32)value;
+        return CCtxParams->cParams.windowLog;
+
+    case ZSTD_c_hashLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_hashLog, value);
+        CCtxParams->cParams.hashLog = (U32)value;
+        return CCtxParams->cParams.hashLog;
+
+    case ZSTD_c_chainLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_chainLog, value);
+        CCtxParams->cParams.chainLog = (U32)value;
+        return CCtxParams->cParams.chainLog;
+
+    case ZSTD_c_searchLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_searchLog, value);
+        CCtxParams->cParams.searchLog = (U32)value;
+        return (size_t)value;
+
+    case ZSTD_c_minMatch :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_minMatch, value);
+        CCtxParams->cParams.minMatch = value;
+        return CCtxParams->cParams.minMatch;
+
+    case ZSTD_c_targetLength :
+        BOUNDCHECK(ZSTD_c_targetLength, value);
+        CCtxParams->cParams.targetLength = value;
+        return CCtxParams->cParams.targetLength;
+
+    case ZSTD_c_strategy :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_strategy, value);
+        CCtxParams->cParams.strategy = (ZSTD_strategy)value;
+        return (size_t)CCtxParams->cParams.strategy;
+
+    case ZSTD_c_contentSizeFlag :
+        /* Content size written in frame header _when known_ (default:1) */
+        DEBUGLOG(4, "set content size flag = %u", (value!=0));
+        CCtxParams->fParams.contentSizeFlag = value != 0;
+        return CCtxParams->fParams.contentSizeFlag;
+
+    case ZSTD_c_checksumFlag :
+        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+        CCtxParams->fParams.checksumFlag = value != 0;
+        return CCtxParams->fParams.checksumFlag;
+
+    case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+        DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
+        CCtxParams->fParams.noDictIDFlag = !value;
+        return !CCtxParams->fParams.noDictIDFlag;
+
+    case ZSTD_c_forceMaxWindow :
+        CCtxParams->forceWindow = (value != 0);
+        return CCtxParams->forceWindow;
+
+    case ZSTD_c_forceAttachDict : {
+        const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
+        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
+        CCtxParams->attachDictPref = pref;
+        return CCtxParams->attachDictPref;
+    }
+
+    case ZSTD_c_literalCompressionMode : {
+        const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
+        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
+        CCtxParams->literalCompressionMode = lcm;
+        return CCtxParams->literalCompressionMode;
+    }
+
+    case ZSTD_c_nbWorkers :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_jobSize :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_overlapLog :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_rsyncable :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_enableDedicatedDictSearch :
+        CCtxParams->enableDedicatedDictSearch = (value!=0);
+        return CCtxParams->enableDedicatedDictSearch;
+
+    case ZSTD_c_enableLongDistanceMatching :
+        CCtxParams->ldmParams.enableLdm = (value!=0);
+        return CCtxParams->ldmParams.enableLdm;
+
+    case ZSTD_c_ldmHashLog :
+        if (value!=0)   /* 0 ==> auto */
+            BOUNDCHECK(ZSTD_c_ldmHashLog, value);
+        CCtxParams->ldmParams.hashLog = value;
+        return CCtxParams->ldmParams.hashLog;
+
+    case ZSTD_c_ldmMinMatch :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
+        CCtxParams->ldmParams.minMatchLength = value;
+        return CCtxParams->ldmParams.minMatchLength;
+
+    case ZSTD_c_ldmBucketSizeLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
+        CCtxParams->ldmParams.bucketSizeLog = value;
+        return CCtxParams->ldmParams.bucketSizeLog;
+
+    case ZSTD_c_ldmHashRateLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
+        CCtxParams->ldmParams.hashRateLog = value;
+        return CCtxParams->ldmParams.hashRateLog;
+
+    case ZSTD_c_targetCBlockSize :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        CCtxParams->targetCBlockSize = value;
+        return CCtxParams->targetCBlockSize;
+
+    case ZSTD_c_srcSizeHint :
+        if (value!=0)    /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+        CCtxParams->srcSizeHint = value;
+        return CCtxParams->srcSizeHint;
+
+    case ZSTD_c_stableInBuffer:
+        BOUNDCHECK(ZSTD_c_stableInBuffer, value);
+        CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->inBufferMode;
+
+    case ZSTD_c_stableOutBuffer:
+        BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
+        CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->outBufferMode;
+
+    case ZSTD_c_blockDelimiters:
+        BOUNDCHECK(ZSTD_c_blockDelimiters, value);
+        CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
+        return CCtxParams->blockDelimiters;
+
+    case ZSTD_c_validateSequences:
+        BOUNDCHECK(ZSTD_c_validateSequences, value);
+        CCtxParams->validateSequences = value;
+        return CCtxParams->validateSequences;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+}
+
+size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
+{
+    return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_getParameter(
+        ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
+{
+    switch(param)
+    {
+    case ZSTD_c_format :
+        *value = CCtxParams->format;
+        break;
+    case ZSTD_c_compressionLevel :
+        *value = CCtxParams->compressionLevel;
+        break;
+    case ZSTD_c_windowLog :
+        *value = (int)CCtxParams->cParams.windowLog;
+        break;
+    case ZSTD_c_hashLog :
+        *value = (int)CCtxParams->cParams.hashLog;
+        break;
+    case ZSTD_c_chainLog :
+        *value = (int)CCtxParams->cParams.chainLog;
+        break;
+    case ZSTD_c_searchLog :
+        *value = CCtxParams->cParams.searchLog;
+        break;
+    case ZSTD_c_minMatch :
+        *value = CCtxParams->cParams.minMatch;
+        break;
+    case ZSTD_c_targetLength :
+        *value = CCtxParams->cParams.targetLength;
+        break;
+    case ZSTD_c_strategy :
+        *value = (unsigned)CCtxParams->cParams.strategy;
+        break;
+    case ZSTD_c_contentSizeFlag :
+        *value = CCtxParams->fParams.contentSizeFlag;
+        break;
+    case ZSTD_c_checksumFlag :
+        *value = CCtxParams->fParams.checksumFlag;
+        break;
+    case ZSTD_c_dictIDFlag :
+        *value = !CCtxParams->fParams.noDictIDFlag;
+        break;
+    case ZSTD_c_forceMaxWindow :
+        *value = CCtxParams->forceWindow;
+        break;
+    case ZSTD_c_forceAttachDict :
+        *value = CCtxParams->attachDictPref;
+        break;
+    case ZSTD_c_literalCompressionMode :
+        *value = CCtxParams->literalCompressionMode;
+        break;
+    case ZSTD_c_nbWorkers :
+        assert(CCtxParams->nbWorkers == 0);
+        *value = CCtxParams->nbWorkers;
+        break;
+    case ZSTD_c_jobSize :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_overlapLog :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_rsyncable :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_enableDedicatedDictSearch :
+        *value = CCtxParams->enableDedicatedDictSearch;
+        break;
+    case ZSTD_c_enableLongDistanceMatching :
+        *value = CCtxParams->ldmParams.enableLdm;
+        break;
+    case ZSTD_c_ldmHashLog :
+        *value = CCtxParams->ldmParams.hashLog;
+        break;
+    case ZSTD_c_ldmMinMatch :
+        *value = CCtxParams->ldmParams.minMatchLength;
+        break;
+    case ZSTD_c_ldmBucketSizeLog :
+        *value = CCtxParams->ldmParams.bucketSizeLog;
+        break;
+    case ZSTD_c_ldmHashRateLog :
+        *value = CCtxParams->ldmParams.hashRateLog;
+        break;
+    case ZSTD_c_targetCBlockSize :
+        *value = (int)CCtxParams->targetCBlockSize;
+        break;
+    case ZSTD_c_srcSizeHint :
+        *value = (int)CCtxParams->srcSizeHint;
+        break;
+    case ZSTD_c_stableInBuffer :
+        *value = (int)CCtxParams->inBufferMode;
+        break;
+    case ZSTD_c_stableOutBuffer :
+        *value = (int)CCtxParams->outBufferMode;
+        break;
+    case ZSTD_c_blockDelimiters :
+        *value = (int)CCtxParams->blockDelimiters;
+        break;
+    case ZSTD_c_validateSequences :
+        *value = (int)CCtxParams->validateSequences;
+        break;
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return 0;
+}
+
+/* ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  just applies `params` into `cctx`
+ *  no action is performed, parameters are merely stored.
+ *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
+ *    This is possible even if a compression is ongoing.
+ *    In which case, new parameters will be applied on the fly, starting with next compression job.
+ */
+size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "The context is in the wrong stage!");
+    RETURN_ERROR_IF(cctx->cdict, stage_wrong,
+                    "Can't override parameters with cdict attached (some must "
+                    "be inherited from the cdict).");
+
+    cctx->requestedParams = *params;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't set pledgedSrcSize when not in init stage.");
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    return 0;
+}
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
+        int const compressionLevel,
+        size_t const dictSize);
+static int ZSTD_dedicatedDictSearch_isSupported(
+        const ZSTD_compressionParameters* cParams);
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams);
+
+/*
+ * Initializes the local dict using the requested parameters.
+ * NOTE: This does not use the pledged src size, because it may be used for more
+ * than one compression.
+ */
+static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+{
+    ZSTD_localDict* const dl = &cctx->localDict;
+    if (dl->dict == NULL) {
+        /* No local dictionary. */
+        assert(dl->dictBuffer == NULL);
+        assert(dl->cdict == NULL);
+        assert(dl->dictSize == 0);
+        return 0;
+    }
+    if (dl->cdict != NULL) {
+        assert(cctx->cdict == dl->cdict);
+        /* Local dictionary already initialized. */
+        return 0;
+    }
+    assert(dl->dictSize > 0);
+    assert(cctx->cdict == NULL);
+    assert(cctx->prefixDict.dict == NULL);
+
+    dl->cdict = ZSTD_createCDict_advanced2(
+            dl->dict,
+            dl->dictSize,
+            ZSTD_dlm_byRef,
+            dl->dictContentType,
+            &cctx->requestedParams,
+            cctx->customMem);
+    RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
+    cctx->cdict = dl->cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_loadDictionary_advanced(
+        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't load a dictionary when ctx is not in init stage.");
+    DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
+    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
+    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
+        return 0;
+    if (dictLoadMethod == ZSTD_dlm_byRef) {
+        cctx->localDict.dict = dict;
+    } else {
+        void* dictBuffer;
+        RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                        "no malloc for static CCtx");
+        dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
+        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
+        ZSTD_memcpy(dictBuffer, dict, dictSize);
+        cctx->localDict.dictBuffer = dictBuffer;
+        cctx->localDict.dict = dictBuffer;
+    }
+    cctx->localDict.dictSize = dictSize;
+    cctx->localDict.dictContentType = dictContentType;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
+      ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+
+size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a dict when ctx not in init stage.");
+    /* Free the existing local cdict (if any) to save memory. */
+    ZSTD_clearAllDicts(cctx);
+    cctx->cdict = cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a pool when ctx not in init stage.");
+    cctx->pool = pool;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+size_t ZSTD_CCtx_refPrefix_advanced(
+        ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a prefix when ctx not in init stage.");
+    ZSTD_clearAllDicts(cctx);
+    if (prefix != NULL && prefixSize > 0) {
+        cctx->prefixDict.dict = prefix;
+        cctx->prefixDict.dictSize = prefixSize;
+        cctx->prefixDict.dictContentType = dictContentType;
+    }
+    return 0;
+}
+
+/*! ZSTD_CCtx_reset() :
+ *  Also dumps dictionary */
+size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        cctx->streamStage = zcss_init;
+        cctx->pledgedSrcSizePlusOne = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                        "Can't reset parameters only when not in init stage.");
+        ZSTD_clearAllDicts(cctx);
+        return ZSTD_CCtxParams_reset(&cctx->requestedParams);
+    }
+    return 0;
+}
+
+
+/* ZSTD_checkCParams() :
+    control CParam values remain within authorized range.
+    @return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+    BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
+    BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
+    BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
+    BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
+    BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
+    BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
+    BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
+    return 0;
+}
+
+/* ZSTD_clampCParams() :
+ *  make CParam values within valid range.
+ *  @return : valid CParams */
+static ZSTD_compressionParameters
+ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+{
+#   define CLAMP_TYPE(cParam, val, type) {                                \
+        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
+        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+    }
+#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
+    CLAMP(ZSTD_c_windowLog, cParams.windowLog);
+    CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
+    CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
+    CLAMP(ZSTD_c_searchLog, cParams.searchLog);
+    CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
+    CLAMP(ZSTD_c_targetLength,cParams.targetLength);
+    CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
+    return cParams;
+}
+
+/* ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    return hashLog - btScale;
+}
+
+/* ZSTD_dictAndWindowLog() :
+ * Returns an adjusted window log that is large enough to fit the source and the dictionary.
+ * The zstd format says that the entire dictionary is valid if one byte of the dictionary
+ * is within the window. So the hashLog and chainLog should be large enough to reference both
+ * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
+ * the hashLog and windowLog.
+ * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
+ */
+static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
+{
+    const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
+    /* No dictionary ==> No change */
+    if (dictSize == 0) {
+        return windowLog;
+    }
+    assert(windowLog <= ZSTD_WINDOWLOG_MAX);
+    assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
+    {
+        U64 const windowSize = 1ULL << windowLog;
+        U64 const dictAndWindowSize = dictSize + windowSize;
+        /* If the window size is already large enough to fit both the source and the dictionary
+         * then just use the window size. Otherwise adjust so that it fits the dictionary and
+         * the window.
+         */
+        if (windowSize >= dictSize + srcSize) {
+            return windowLog; /* Window size large enough already */
+        } else if (dictAndWindowSize >= maxWindowSize) {
+            return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
+        } else  {
+            return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
+        }
+    }
+}
+
+/* ZSTD_adjustCParams_internal() :
+ *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
+ *  mostly downsize to reduce memory consumption and initialization latency.
+ * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
+ * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
+ *  note : `srcSize==0` means 0!
+ *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
+static ZSTD_compressionParameters
+ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+                            unsigned long long srcSize,
+                            size_t dictSize,
+                            ZSTD_cParamMode_e mode)
+{
+    const U64 minSrcSize = 513; /* (1<<9) + 1 */
+    const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
+    assert(ZSTD_checkCParams(cPar)==0);
+
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+        /* If we don't know the source size, don't make any
+         * assumptions about it. We will already have selected
+         * smaller parameters if a dictionary is in use.
+         */
+        break;
+    case ZSTD_cpm_createCDict:
+        /* Assume a small source size when creating a dictionary
+         * with an unkown source size.
+         */
+        if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            srcSize = minSrcSize;
+        break;
+    case ZSTD_cpm_attachDict:
+        /* Dictionary has its own dedicated parameters which have
+         * already been selected. We are selecting parameters
+         * for only the source.
+         */
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    /* resize windowLog if input is small enough, to use less memory */
+    if ( (srcSize < maxWindowResize)
+      && (dictSize < maxWindowResize) )  {
+        U32 const tSize = (U32)(srcSize + dictSize);
+        static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
+        U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
+                            ZSTD_highbit32(tSize-1) + 1;
+        if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
+    }
+    if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
+        U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
+        if (cycleLog > dictAndWindowLog)
+            cPar.chainLog -= (cycleLog - dictAndWindowLog);
+    }
+
+    if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
+        cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
+
+    return cPar;
+}
+
+ZSTD_compressionParameters
+ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
+                   unsigned long long srcSize,
+                   size_t dictSize)
+{
+    cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
+    if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
+}
+
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+static void ZSTD_overrideCParams(
+              ZSTD_compressionParameters* cParams,
+        const ZSTD_compressionParameters* overrides)
+{
+    if (overrides->windowLog)    cParams->windowLog    = overrides->windowLog;
+    if (overrides->hashLog)      cParams->hashLog      = overrides->hashLog;
+    if (overrides->chainLog)     cParams->chainLog     = overrides->chainLog;
+    if (overrides->searchLog)    cParams->searchLog    = overrides->searchLog;
+    if (overrides->minMatch)     cParams->minMatch     = overrides->minMatch;
+    if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
+    if (overrides->strategy)     cParams->strategy     = overrides->strategy;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    ZSTD_compressionParameters cParams;
+    if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+      srcSizeHint = CCtxParams->srcSizeHint;
+    }
+    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
+    if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+    ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
+    assert(!ZSTD_checkCParams(cParams));
+    /* srcSizeHint == 0 means 0 */
+    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
+}
+
+static size_t
+ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const U32 forCCtx)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+    /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
+     * surrounded by redzones in ASAN. */
+    size_t const tableSpace = chainSize * sizeof(U32)
+                            + hSize * sizeof(U32)
+                            + h3Size * sizeof(U32);
+    size_t const optPotentialSpace =
+        ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
+                                ? optPotentialSpace
+                                : 0;
+    DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
+                (U32)chainSize, (U32)hSize, (U32)h3Size);
+    return tableSpace + optSpace;
+}
+
+static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        const ZSTD_compressionParameters* cParams,
+        const ldmParams_t* ldmParams,
+        const int isStatic,
+        const size_t buffInSize,
+        const size_t buffOutSize,
+        const U64 pledgedSrcSize)
+{
+    size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+    U32    const divider = (cParams->minMatch==3) ? 3 : 4;
+    size_t const maxNbSeq = blockSize / divider;
+    size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+                            + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                            + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+    size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
+    size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
+
+    size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
+    size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
+    size_t const ldmSeqSpace = ldmParams->enableLdm ?
+        ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+
+
+    size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
+                             + ZSTD_cwksp_alloc_size(buffOutSize);
+
+    size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+
+    size_t const neededSpace =
+        cctxSpace +
+        entropySpace +
+        blockStateSpace +
+        ldmSpace +
+        ldmSeqSpace +
+        matchStateSize +
+        tokenSpace +
+        bufferSpace;
+
+    DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+    return neededSpace;
+}
+
+size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    /* estimateCCtxSize is for one-shot compression. So no buffers should
+     * be needed. However, we still allocate two 0-sized buffers, which can
+     * take space under ASAN. */
+    return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
+{
+    int tier = 0;
+    size_t largestSize = 0;
+    static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
+    for (; tier < 4; ++tier) {
+        /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
+        ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
+        largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
+    }
+    return largestSize;
+}
+
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        /* Ensure monotonically increasing memory usage as compression level increases */
+        size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    {   ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+        size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
+                ? ((size_t)1 << cParams.windowLog) + blockSize
+                : 0;
+        size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+
+        return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+            &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
+            ZSTD_CONTENTSIZE_UNKNOWN);
+    }
+}
+
+size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    return ZSTD_estimateCStreamSize_usingCParams(cParams);
+}
+
+size_t ZSTD_estimateCStreamSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads (non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
+{
+    {   ZSTD_frameProgression fp;
+        size_t const buffered = (cctx->inBuff == NULL) ? 0 :
+                                cctx->inBuffPos - cctx->inToCompress;
+        if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
+        assert(buffered <= ZSTD_BLOCKSIZE_MAX);
+        fp.ingested = cctx->consumedSrcSize + buffered;
+        fp.consumed = cctx->consumedSrcSize;
+        fp.produced = cctx->producedCSize;
+        fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
+        fp.currentJobID = 0;
+        fp.nbActiveWorkers = 0;
+        return fp;
+}   }
+
+/*! ZSTD_toFlushNow()
+ *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
+ */
+size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
+{
+    (void)cctx;
+    return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
+}
+
+static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
+                                    ZSTD_compressionParameters cParams2)
+{
+    (void)cParams1;
+    (void)cParams2;
+    assert(cParams1.windowLog    == cParams2.windowLog);
+    assert(cParams1.chainLog     == cParams2.chainLog);
+    assert(cParams1.hashLog      == cParams2.hashLog);
+    assert(cParams1.searchLog    == cParams2.searchLog);
+    assert(cParams1.minMatch     == cParams2.minMatch);
+    assert(cParams1.targetLength == cParams2.targetLength);
+    assert(cParams1.strategy     == cParams2.strategy);
+}
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+{
+    int i;
+    for (i = 0; i < ZSTD_REP_NUM; ++i)
+        bs->rep[i] = repStartValue[i];
+    bs->entropy.huf.repeatMode = HUF_repeat_none;
+    bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
+}
+
+/*! ZSTD_invalidateMatchState()
+ *  Invalidate all the matches in the match finder tables.
+ *  Requires nextSrc and base to be set (can be NULL).
+ */
+static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
+{
+    ZSTD_window_clear(&ms->window);
+
+    ms->nextToUpdate = ms->window.dictLimit;
+    ms->loadedDictEnd = 0;
+    ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
+    ms->dictMatchState = NULL;
+}
+
+/*
+ * Controls, for this matchState reset, whether the tables need to be cleared /
+ * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
+ * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
+ * subsequent operation will overwrite the table space anyways (e.g., copying
+ * the matchState contents in from a CDict).
+ */
+typedef enum {
+    ZSTDcrp_makeClean,
+    ZSTDcrp_leaveDirty
+} ZSTD_compResetPolicy_e;
+
+/*
+ * Controls, for this matchState reset, whether indexing can continue where it
+ * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
+ * (ZSTDirp_reset).
+ */
+typedef enum {
+    ZSTDirp_continue,
+    ZSTDirp_reset
+} ZSTD_indexResetPolicy_e;
+
+typedef enum {
+    ZSTD_resetTarget_CDict,
+    ZSTD_resetTarget_CCtx
+} ZSTD_resetTarget_e;
+
+static size_t
+ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+                      ZSTD_cwksp* ws,
+                const ZSTD_compressionParameters* cParams,
+                const ZSTD_compResetPolicy_e crp,
+                const ZSTD_indexResetPolicy_e forceResetIndex,
+                const ZSTD_resetTarget_e forWho)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+
+    DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    if (forceResetIndex == ZSTDirp_reset) {
+        ZSTD_window_init(&ms->window);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+    }
+
+    ms->hashLog3 = hashLog3;
+
+    ZSTD_invalidateMatchState(ms);
+
+    assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
+
+    ZSTD_cwksp_clear_tables(ws);
+
+    DEBUGLOG(5, "reserving table space");
+    /* table Space */
+    ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
+    ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
+    ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
+    if (crp!=ZSTDcrp_leaveDirty) {
+        /* reset tables only */
+        ZSTD_cwksp_clean_tables(ws);
+    }
+
+    /* opt parser space */
+    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
+        DEBUGLOG(4, "reserving optimal parser space");
+        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    }
+
+    ms->cParams = *cParams;
+
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    return 0;
+}
+
+/* ZSTD_indexTooCloseToMax() :
+ * minor optimization : prefer memset() rather than reduceIndex()
+ * which is measurably slow in some circumstances (reported for Visual Studio).
+ * Works when re-using a context for a lot of smallish inputs :
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
+ * memset() will be triggered before reduceIndex().
+ */
+#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
+static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
+{
+    return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
+}
+
+/*! ZSTD_resetCCtx_internal() :
+    note : `params` are assumed fully validated at this stage */
+static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+                                      ZSTD_CCtx_params params,
+                                      U64 const pledgedSrcSize,
+                                      ZSTD_compResetPolicy_e const crp,
+                                      ZSTD_buffered_policy_e const zbuff)
+{
+    ZSTD_cwksp* const ws = &zc->workspace;
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
+                (U32)pledgedSrcSize, params.cParams.windowLog);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+
+    zc->isFirstBlock = 1;
+
+    if (params.ldmParams.enableLdm) {
+        /* Adjust long distance matching parameters */
+        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
+        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
+        assert(params.ldmParams.hashRateLog < 32);
+    }
+
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+        U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
+        size_t const maxNbSeq = blockSize / divider;
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
+                ? windowSize + blockSize
+                : 0;
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
+
+        int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
+        ZSTD_indexResetPolicy_e needsIndexReset =
+            (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
+
+        size_t const neededSpace =
+            ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+                &params.cParams, &params.ldmParams, zc->staticSize != 0,
+                buffInSize, buffOutSize, pledgedSrcSize);
+        FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
+
+        if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
+
+        /* Check if workspace is large enough, alloc a new one if needed */
+        {
+            int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+            int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
+
+            DEBUGLOG(4, "Need %zu B workspace", neededSpace);
+            DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
+
+            if (workspaceTooSmall || workspaceWasteful) {
+                DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
+                            ZSTD_cwksp_sizeof(ws) >> 10,
+                            neededSpace >> 10);
+
+                RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
+
+                needsIndexReset = ZSTDirp_reset;
+
+                ZSTD_cwksp_free(ws, zc->customMem);
+                FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
+
+                DEBUGLOG(5, "reserving object space");
+                /* Statically sized space.
+                 * entropyWorkspace never moves,
+                 * though prev/next block swap places */
+                assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
+                zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
+                zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
+                zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
+        }   }
+
+        ZSTD_cwksp_clear(ws);
+
+        /* init params */
+        zc->appliedParams = params;
+        zc->blockState.matchState.cParams = params.cParams;
+        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+        zc->consumedSrcSize = 0;
+        zc->producedCSize = 0;
+        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            zc->appliedParams.fParams.contentSizeFlag = 0;
+        DEBUGLOG(4, "pledged content size : %u ; flag : %u",
+            (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
+        zc->blockSize = blockSize;
+
+        xxh64_reset(&zc->xxhState, 0);
+        zc->stage = ZSTDcs_init;
+        zc->dictID = 0;
+        zc->dictContentSize = 0;
+
+        ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+
+        /* ZSTD_wildcopy() is used to copy into the literals buffer,
+         * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
+         */
+        zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
+        zc->seqStore.maxNbLit = blockSize;
+
+        /* buffers */
+        zc->bufferedPolicy = zbuff;
+        zc->inBuffSize = buffInSize;
+        zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+        zc->outBuffSize = buffOutSize;
+        zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
+
+        /* ldm bucketOffsets table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const numBuckets =
+                  ((size_t)1) << (params.ldmParams.hashLog -
+                                  params.ldmParams.bucketSizeLog);
+            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
+            ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
+        }
+
+        /* sequences storage */
+        ZSTD_referenceExternalSequences(zc, NULL, 0);
+        zc->seqStore.maxNbSeq = maxNbSeq;
+        zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+        FORWARD_IF_ERROR(ZSTD_reset_matchState(
+            &zc->blockState.matchState,
+            ws,
+            &params.cParams,
+            crp,
+            needsIndexReset,
+            ZSTD_resetTarget_CCtx), "");
+
+        /* ldm hash table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+            zc->maxNbLdmSequences = maxNbLdmSeq;
+
+            ZSTD_window_init(&zc->ldmState.window);
+            ZSTD_window_clear(&zc->ldmState.window);
+            zc->ldmState.loadedDictEnd = 0;
+        }
+
+        /* Due to alignment, when reusing a workspace, we can actually consume
+         * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
+         */
+        assert(ZSTD_cwksp_used(ws) >= neededSpace &&
+               ZSTD_cwksp_used(ws) <= neededSpace + 3);
+
+        DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+        zc->initialized = 1;
+
+        return 0;
+    }
+}
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
+    assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+}
+
+/* These are the approximate sizes for each strategy past which copying the
+ * dictionary tables into the working context is faster than using them
+ * in-place.
+ */
+static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
+    8 KB,  /* unused */
+    8 KB,  /* ZSTD_fast */
+    16 KB, /* ZSTD_dfast */
+    32 KB, /* ZSTD_greedy */
+    32 KB, /* ZSTD_lazy */
+    32 KB, /* ZSTD_lazy2 */
+    32 KB, /* ZSTD_btlazy2 */
+    32 KB, /* ZSTD_btopt */
+    8 KB,  /* ZSTD_btultra */
+    8 KB   /* ZSTD_btultra2 */
+};
+
+static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
+                                 const ZSTD_CCtx_params* params,
+                                 U64 pledgedSrcSize)
+{
+    size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
+    int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
+    return dedicatedDictSearch
+        || ( ( pledgedSrcSize <= cutoff
+            || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+            || params->attachDictPref == ZSTD_dictForceAttach )
+          && params->attachDictPref != ZSTD_dictForceCopy
+          && !params->forceWindow ); /* dictMatchState isn't correctly
+                                      * handled in _enforceMaxDist */
+}
+
+static size_t
+ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+                        const ZSTD_CDict* cdict,
+                        ZSTD_CCtx_params params,
+                        U64 pledgedSrcSize,
+                        ZSTD_buffered_policy_e zbuff)
+{
+    {
+        ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
+        unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Resize working context table params for input only, since the dict
+         * has its own tables. */
+        /* pledgedSrcSize == 0 means 0! */
+
+        if (cdict->matchState.dedicatedDictSearch) {
+            ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
+        }
+
+        params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
+                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_makeClean, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
+    }
+
+    {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
+                                  - cdict->matchState.window.base);
+        const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
+        if (cdictLen == 0) {
+            /* don't even attach dictionaries with no contents */
+            DEBUGLOG(4, "skipping attaching empty dictionary");
+        } else {
+            DEBUGLOG(4, "attaching dictionary into context");
+            cctx->blockState.matchState.dictMatchState = &cdict->matchState;
+
+            /* prep working match state so dict matches never have negative indices
+             * when they are translated to the working context's index space. */
+            if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
+                cctx->blockState.matchState.window.nextSrc =
+                    cctx->blockState.matchState.window.base + cdictEnd;
+                ZSTD_window_clear(&cctx->blockState.matchState.window);
+            }
+            /* loadedDictEnd is expressed within the referential of the active context */
+            cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
+    }   }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            ZSTD_CCtx_params params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
+
+    assert(!cdict->matchState.dedicatedDictSearch);
+
+    DEBUGLOG(4, "copying dictionary into context");
+
+    {   unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Copy only compression parameters related to tables. */
+        params.cParams = *cdict_cParams;
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_leaveDirty, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
+        assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
+        assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+        size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
+
+        ZSTD_memcpy(cctx->blockState.matchState.hashTable,
+               cdict->matchState.hashTable,
+               hSize * sizeof(U32));
+        ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+               cdict->matchState.chainTable,
+               chainSize * sizeof(U32));
+    }
+
+    /* Zero the hashTable3, since the cdict never fills it */
+    {   int const h3log = cctx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+        assert(cdict->matchState.hashLog3 == 0);
+        ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
+
+    /* copy dictionary offsets */
+    {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
+        ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+/* We have a choice between copying the dictionary context into the working
+ * context, or referencing the dictionary context from the working context
+ * in-place. We decide here which strategy to use. */
+static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            const ZSTD_CCtx_params* params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+
+    DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
+                (unsigned)pledgedSrcSize);
+
+    if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
+        return ZSTD_resetCCtx_byAttachingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    } else {
+        return ZSTD_resetCCtx_byCopyingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    }
+}
+
+/*! ZSTD_copyCCtx_internal() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  The "context", in this case, refers to the hash and chain tables,
+ *  entropy tables, and dictionary references.
+ * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
+ * @return : 0, or an error code */
+static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+                            const ZSTD_CCtx* srcCCtx,
+                            ZSTD_frameParameters fParams,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
+    RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
+                    "Can't copy a ctx that's not in init stage.");
+
+    ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
+    {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
+        /* Copy only compression parameters related to tables. */
+        params.cParams = srcCCtx->appliedParams.cParams;
+        params.fParams = fParams;
+        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+                                ZSTDcrp_leaveDirty, zbuff);
+        assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
+        assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
+        assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
+        assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
+        assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
+        int const h3log = srcCCtx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
+               srcCCtx->blockState.matchState.hashTable,
+               hSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
+               srcCCtx->blockState.matchState.chainTable,
+               chainSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
+               srcCCtx->blockState.matchState.hashTable3,
+               h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
+
+    /* copy dictionary offsets */
+    {
+        const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
+        ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+    dstCCtx->dictID = srcCCtx->dictID;
+    dstCCtx->dictContentSize = srcCCtx->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
+
+    return 0;
+}
+
+/*! ZSTD_copyCCtx() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize==0 means "unknown".
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
+    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
+    if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
+
+    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
+                                fParams, pledgedSrcSize,
+                                zbuff);
+}
+
+
+#define ZSTD_ROWSIZE 16
+/*! ZSTD_reduceTable() :
+ *  reduce table indexes by `reducerValue`, or squash to zero.
+ *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
+ *  It must be set to a clear 0/1 value, to remove branch during inlining.
+ *  Presume table size is a multiple of ZSTD_ROWSIZE
+ *  to help auto-vectorization */
+FORCE_INLINE_TEMPLATE void
+ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
+{
+    int const nbRows = (int)size / ZSTD_ROWSIZE;
+    int cellNb = 0;
+    int rowNb;
+    assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
+    assert(size < (1U<<31));   /* can be casted to int */
+
+
+    for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+        int column;
+        for (column=0; column<ZSTD_ROWSIZE; column++) {
+            if (preserveMark) {
+                U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+                table[cellNb] += adder;
+            }
+            if (table[cellNb] < reducerValue) table[cellNb] = 0;
+            else table[cellNb] -= reducerValue;
+            cellNb++;
+    }   }
+}
+
+static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 0);
+}
+
+static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 1);
+}
+
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
+{
+    {   U32 const hSize = (U32)1 << params->cParams.hashLog;
+        ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
+    }
+
+    if (params->cParams.strategy != ZSTD_fast) {
+        U32 const chainSize = (U32)1 << params->cParams.chainLog;
+        if (params->cParams.strategy == ZSTD_btlazy2)
+            ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
+        else
+            ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
+    }
+
+    if (ms->hashLog3) {
+        U32 const h3Size = (U32)1 << ms->hashLog3;
+        ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
+    }
+}
+
+
+/*-*******************************************************
+*  Block entropic compression
+*********************************************************/
+
+/* See doc/zstd_compression_format.md for detailed format description */
+
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
+{
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    BYTE* const llCodeTable = seqStorePtr->llCode;
+    BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    U32 u;
+    assert(nbSeq <= seqStorePtr->maxNbSeq);
+    for (u=0; u<nbSeq; u++) {
+        U32 const llv = sequences[u].litLength;
+        U32 const mlv = sequences[u].matchLength;
+        llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
+        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+        mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
+    }
+    if (seqStorePtr->longLengthID==1)
+        llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+    if (seqStorePtr->longLengthID==2)
+        mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+}
+
+/* ZSTD_useTargetCBlockSize():
+ * Returns if target compressed block size param is being used.
+ * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
+    return (cctxParams->targetCBlockSize != 0);
+}
+
+/* ZSTD_entropyCompressSequences_internal():
+ * actually compresses both literals and sequences */
+MEM_STATIC size_t
+ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
+                          const ZSTD_entropyCTables_t* prevEntropy,
+                                ZSTD_entropyCTables_t* nextEntropy,
+                          const ZSTD_CCtx_params* cctxParams,
+                                void* dst, size_t dstCapacity,
+                                void* entropyWorkspace, size_t entropyWkspSize,
+                          const int bmi2)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    unsigned* count = (unsigned*)entropyWorkspace;
+    FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
+    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    BYTE* seqHead;
+    BYTE* lastNCount = NULL;
+
+    entropyWorkspace = count + (MaxSeq + 1);
+    entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
+
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
+
+    /* Compress literals */
+    {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const litSize = (size_t)(seqStorePtr->lit - literals);
+        size_t const cSize = ZSTD_compressLiterals(
+                                    &prevEntropy->huf, &nextEntropy->huf,
+                                    cctxParams->cParams.strategy,
+                                    ZSTD_disableLiteralsCompression(cctxParams),
+                                    op, dstCapacity,
+                                    literals, litSize,
+                                    entropyWorkspace, entropyWkspSize,
+                                    bmi2);
+        FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
+        assert(cSize <= dstCapacity);
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "Can't fit seq hdr in output buf!");
+    if (nbSeq < 128) {
+        *op++ = (BYTE)nbSeq;
+    } else if (nbSeq < LONGNBSEQ) {
+        op[0] = (BYTE)((nbSeq>>8) + 0x80);
+        op[1] = (BYTE)nbSeq;
+        op+=2;
+    } else {
+        op[0]=0xFF;
+        MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+        op+=3;
+    }
+    assert(op <= oend);
+    if (nbSeq==0) {
+        /* Copy the old tables over as if we repeated them */
+        ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
+        return (size_t)(op - ostart);
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+    assert(op <= oend);
+
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->fse.litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                count, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->fse.litlengthCTable,
+                sizeof(prevEntropy->fse.litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->fse.offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                count, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->fse.offcodeCTable,
+                sizeof(prevEntropy->fse.offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->fse.matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                count, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->fse.matchlengthCTable,
+                sizeof(prevEntropy->fse.matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+
+    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, (size_t)(oend - op),
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        assert(op <= oend);
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+        if (lastNCount && (op - lastNCount) < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(op - lastNCount == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+    }
+
+    DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
+    return (size_t)(op - ostart);
+}
+
+MEM_STATIC size_t
+ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
+                       const ZSTD_entropyCTables_t* prevEntropy,
+                             ZSTD_entropyCTables_t* nextEntropy,
+                       const ZSTD_CCtx_params* cctxParams,
+                             void* dst, size_t dstCapacity,
+                             size_t srcSize,
+                             void* entropyWorkspace, size_t entropyWkspSize,
+                             int bmi2)
+{
+    size_t const cSize = ZSTD_entropyCompressSequences_internal(
+                            seqStorePtr, prevEntropy, nextEntropy, cctxParams,
+                            dst, dstCapacity,
+                            entropyWorkspace, entropyWkspSize, bmi2);
+    if (cSize == 0) return 0;
+    /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+     * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
+     */
+    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+        return 0;  /* block not compressed */
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
+
+    /* Check compressibility */
+    {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
+        if (cSize >= maxCSize) return 0;  /* block not compressed */
+    }
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
+    return cSize;
+}
+
+/* ZSTD_selectBlockCompressor() :
+ * Not static, but internal use only (used by long distance matcher)
+ * assumption : strat is a valid strategy */
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+{
+    static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
+        { ZSTD_compressBlock_fast  /* default for 0 */,
+          ZSTD_compressBlock_fast,
+          ZSTD_compressBlock_doubleFast,
+          ZSTD_compressBlock_greedy,
+          ZSTD_compressBlock_lazy,
+          ZSTD_compressBlock_lazy2,
+          ZSTD_compressBlock_btlazy2,
+          ZSTD_compressBlock_btopt,
+          ZSTD_compressBlock_btultra,
+          ZSTD_compressBlock_btultra2 },
+        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+          ZSTD_compressBlock_fast_extDict,
+          ZSTD_compressBlock_doubleFast_extDict,
+          ZSTD_compressBlock_greedy_extDict,
+          ZSTD_compressBlock_lazy_extDict,
+          ZSTD_compressBlock_lazy2_extDict,
+          ZSTD_compressBlock_btlazy2_extDict,
+          ZSTD_compressBlock_btopt_extDict,
+          ZSTD_compressBlock_btultra_extDict,
+          ZSTD_compressBlock_btultra_extDict },
+        { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
+          ZSTD_compressBlock_fast_dictMatchState,
+          ZSTD_compressBlock_doubleFast_dictMatchState,
+          ZSTD_compressBlock_greedy_dictMatchState,
+          ZSTD_compressBlock_lazy_dictMatchState,
+          ZSTD_compressBlock_lazy2_dictMatchState,
+          ZSTD_compressBlock_btlazy2_dictMatchState,
+          ZSTD_compressBlock_btopt_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState },
+        { NULL  /* default for 0 */,
+          NULL,
+          NULL,
+          ZSTD_compressBlock_greedy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
+          NULL,
+          NULL,
+          NULL,
+          NULL }
+    };
+    ZSTD_blockCompressor selectedCompressor;
+    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
+
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    assert(selectedCompressor != NULL);
+    return selectedCompressor;
+}
+
+static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
+                                   const BYTE* anchor, size_t lastLLSize)
+{
+    ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
+    seqStorePtr->lit += lastLLSize;
+}
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->sequences = ssPtr->sequencesStart;
+    ssPtr->longLengthID = 0;
+}
+
+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+
+static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+    ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+    DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    /* Assert that we have correctly flushed the ctx params into the ms's copy */
+    ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+        if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
+            ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
+        } else {
+            ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
+        }
+        return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
+    }
+    ZSTD_resetSeqStore(&(zc->seqStore));
+    /* required for optimal parser to read stats from dictionary */
+    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
+    /* tell the optimal parser how we expect to compress literals */
+    ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
+    /* a gap between an attached dict and the current window is not safe,
+     * they must remain adjacent,
+     * and when that stops being the case, the dict must be unset */
+    assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
+
+    /* limited update after a very long match */
+    {   const BYTE* const base = ms->window.base;
+        const BYTE* const istart = (const BYTE*)src;
+        const U32 curr = (U32)(istart-base);
+        if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
+        if (curr > ms->nextToUpdate + 384)
+            ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
+    }
+
+    /* select and store sequences */
+    {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
+        size_t lastLLSize;
+        {   int i;
+            for (i = 0; i < ZSTD_REP_NUM; ++i)
+                zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+        }
+        if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+            assert(!zc->appliedParams.ldmParams.enableLdm);
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&zc->externSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+        } else if (zc->appliedParams.ldmParams.enableLdm) {
+            rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
+
+            ldmSeqStore.seq = zc->ldmSequences;
+            ldmSeqStore.capacity = zc->maxNbLdmSequences;
+            /* Updates ldmSeqStore.size */
+            FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+                                               &zc->appliedParams.ldmParams,
+                                               src, srcSize), "");
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&ldmSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(ldmSeqStore.pos == ldmSeqStore.size);
+        } else {   /* not long range mode */
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            ms->ldmSeqStore = NULL;
+            lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+        }
+        {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+            ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+    }   }
+    return ZSTDbss_compress;
+}
+
+static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+{
+    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
+    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
+    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
+    size_t literalsRead = 0;
+    size_t lastLLSize;
+
+    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+    size_t i;
+    repcodes_t updatedRepcodes;
+
+    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+    /* Ensure we have enough space for last literals "sequence" */
+    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
+    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (i = 0; i < seqStoreSeqSize; ++i) {
+        U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
+        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
+        outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
+        outSeqs[i].rep = 0;
+
+        if (i == seqStore->longLengthPos) {
+            if (seqStore->longLengthID == 1) {
+                outSeqs[i].litLength += 0x10000;
+            } else if (seqStore->longLengthID == 2) {
+                outSeqs[i].matchLength += 0x10000;
+            }
+        }
+
+        if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
+            /* Derive the correct offset corresponding to a repcode */
+            outSeqs[i].rep = seqStoreSeqs[i].offset;
+            if (outSeqs[i].litLength != 0) {
+                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+            } else {
+                if (outSeqs[i].rep == 3) {
+                    rawOffset = updatedRepcodes.rep[0] - 1;
+                } else {
+                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
+                }
+            }
+        }
+        outSeqs[i].offset = rawOffset;
+        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
+           so we provide seqStoreSeqs[i].offset - 1 */
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
+                                         seqStoreSeqs[i].offset - 1,
+                                         seqStoreSeqs[i].litLength == 0);
+        literalsRead += outSeqs[i].litLength;
+    }
+    /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
+     * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
+     * for the block boundary, according to the API.
+     */
+    assert(seqStoreLiteralsSize >= literalsRead);
+    lastLLSize = seqStoreLiteralsSize - literalsRead;
+    outSeqs[i].litLength = (U32)lastLLSize;
+    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
+    seqStoreSeqSize++;
+    zc->seqCollector.seqIndex += seqStoreSeqSize;
+}
+
+size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                              size_t outSeqsSize, const void* src, size_t srcSize)
+{
+    const size_t dstCapacity = ZSTD_compressBound(srcSize);
+    void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
+    SeqCollector seqCollector;
+
+    RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
+
+    seqCollector.collectSequences = 1;
+    seqCollector.seqStart = outSeqs;
+    seqCollector.seqIndex = 0;
+    seqCollector.maxSequences = outSeqsSize;
+    zc->seqCollector = seqCollector;
+
+    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+    ZSTD_customFree(dst, ZSTD_defaultCMem);
+    return zc->seqCollector.seqIndex;
+}
+
+size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
+    size_t in = 0;
+    size_t out = 0;
+    for (; in < seqsSize; ++in) {
+        if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
+            if (in != seqsSize - 1) {
+                sequences[in+1].litLength += sequences[in].litLength;
+            }
+        } else {
+            sequences[out] = sequences[in];
+            ++out;
+        }
+    }
+    return out;
+}
+
+/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
+static int ZSTD_isRLE(const BYTE* src, size_t length) {
+    const BYTE* ip = src;
+    const BYTE value = ip[0];
+    const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
+    const size_t unrollSize = sizeof(size_t) * 4;
+    const size_t unrollMask = unrollSize - 1;
+    const size_t prefixLength = length & unrollMask;
+    size_t i;
+    size_t u;
+    if (length == 1) return 1;
+    /* Check if prefix is RLE first before using unrolled loop */
+    if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
+        return 0;
+    }
+    for (i = prefixLength; i != length; i += unrollSize) {
+        for (u = 0; u < unrollSize; u += sizeof(size_t)) {
+            if (MEM_readST(ip + i + u) != valueST) {
+                return 0;
+            }
+        }
+    }
+    return 1;
+}
+
+/* Returns true if the given block may be RLE.
+ * This is just a heuristic based on the compressibility.
+ * It may return both false positives and false negatives.
+ */
+static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+{
+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+    size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
+
+    return nbSeqs < 4 && nbLits < 10;
+}
+
+static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+{
+    ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+    zc->blockState.prevCBlock = zc->blockState.nextCBlock;
+    zc->blockState.nextCBlock = tmp;
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 frame)
+{
+    /* This the upper bound for the length of an rle block.
+     * This isn't the actual upper bound. Finding the real threshold
+     * needs further investigation.
+     */
+    const U32 rleMaxLength = 25;
+    size_t cSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        return 0;
+    }
+
+    /* encode sequences and literals */
+    cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
+            &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            srcSize,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+            zc->bmi2);
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        return 0;
+    }
+
+
+    if (frame &&
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+         * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+         * This is only an issue for zstd <= v1.4.3
+         */
+        !zc->isFirstBlock &&
+        cSize < rleMaxLength &&
+        ZSTD_isRLE(ip, srcSize))
+    {
+        cSize = 1;
+        op[0] = ip[0];
+    }
+
+out:
+    if (!ZSTD_isError(cSize) && cSize > 1) {
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
+    }
+    /* We check that dictionaries have offset codes available for the first
+     * block. After the first block, the offcode table might not have large
+     * enough codes to represent the offsets in the data.
+     */
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               const size_t bss, U32 lastBlock)
+{
+    DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
+    if (bss == ZSTDbss_compress) {
+        if (/* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            !zc->isFirstBlock &&
+            ZSTD_maybeRLE(&zc->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize))
+        {
+            return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
+        }
+        /* Attempt superblock compression.
+         *
+         * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
+         * standard ZSTD_compressBound(). This is a problem, because even if we have
+         * space now, taking an extra byte now could cause us to run out of space later
+         * and violate ZSTD_compressBound().
+         *
+         * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
+         *
+         * In order to respect ZSTD_compressBound() we must attempt to emit a raw
+         * uncompressed block in these cases:
+         *   * cSize == 0: Return code for an uncompressed block.
+         *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
+         *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
+         *     output space.
+         *   * cSize >= blockBound(srcSize): We have expanded the block too much so
+         *     emit an uncompressed block.
+         */
+        {
+            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+            if (cSize != ERROR(dstSize_tooSmall)) {
+                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
+                if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    return cSize;
+                }
+            }
+        }
+    }
+
+    DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
+    /* Superblock compression failed, attempt to emit a single no compress block.
+     * The decoder will be able to stream this block since it is uncompressed.
+     */
+    return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               U32 lastBlock)
+{
+    size_t cSize = 0;
+    const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+    DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
+    FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+
+    cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         void const* ip,
+                                         void const* iend)
+{
+    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
+        U32 const maxDist = (U32)1 << params->cParams.windowLog;
+        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+        U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
+        ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+        ZSTD_reduceIndex(ms, params, correction);
+        ZSTD_cwksp_mark_tables_clean(ws);
+        if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+        else ms->nextToUpdate -= correction;
+        /* invalidate dictionaries on overflow correction */
+        ms->loadedDictEnd = 0;
+        ms->dictMatchState = NULL;
+    }
+}
+
+/*! ZSTD_compress_frameChunk() :
+*   Compress a chunk of data into one or multiple blocks.
+*   All blocks will be terminated, all input will be consumed.
+*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+*   Frame is supposed already started (header already produced)
+*   @return : compressed size, or an error code
+*/
+static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     U32 lastFrameChunk)
+{
+    size_t blockSize = cctx->blockSize;
+    size_t remaining = srcSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
+
+    assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
+
+    DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
+        xxh64_update(&cctx->xxhState, src, srcSize);
+
+    while (remaining) {
+        ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+        U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
+
+        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
+                        dstSize_tooSmall,
+                        "not enough space to store compressed block");
+        if (remaining < blockSize) blockSize = remaining;
+
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
+        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+
+        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
+        if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
+
+        {   size_t cSize;
+            if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
+                assert(cSize > 0);
+                assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else {
+                cSize = ZSTD_compressBlock_internal(cctx,
+                                        op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
+                                        ip, blockSize, 1 /* frame */);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
+
+                if (cSize == 0) {  /* block is not compressible */
+                    cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+                    FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+                } else {
+                    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+                    MEM_writeLE24(op, cBlockHeader);
+                    cSize += ZSTD_blockHeaderSize;
+                }
+            }
+
+
+            ip += blockSize;
+            assert(remaining >= blockSize);
+            remaining -= blockSize;
+            op += cSize;
+            assert(dstCapacity >= cSize);
+            dstCapacity -= cSize;
+            cctx->isFirstBlock = 0;
+            DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
+                        (unsigned)cSize);
+    }   }
+
+    if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
+    return (size_t)(op-ostart);
+}
+
+
+static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
+{   BYTE* const op = (BYTE*)dst;
+    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
+    U32   const checksumFlag = params->fParams.checksumFlag>0;
+    U32   const windowSize = (U32)1 << params->cParams.windowLog;
+    U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+    BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+    U32   const fcsCode = params->fParams.contentSizeFlag ?
+                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
+    BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
+    size_t pos=0;
+
+    assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
+    RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
+                    "dst buf is too small to fit worst-case frame header size.");
+    DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
+                !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
+    if (params->format == ZSTD_f_zstd1) {
+        MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
+        pos = 4;
+    }
+    op[pos++] = frameHeaderDescriptionByte;
+    if (!singleSegment) op[pos++] = windowLogByte;
+    switch(dictIDSizeCode)
+    {
+        default:
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 : break;
+        case 1 : op[pos] = (BYTE)(dictID); pos++; break;
+        case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
+        case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
+    }
+    switch(fcsCode)
+    {
+        default:
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
+        case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
+        case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
+        case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
+    }
+    return pos;
+}
+
+/* ZSTD_writeSkippableFrame_advanced() :
+ * Writes out a skippable frame with the specified magic number variant (16 are supported),
+ * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
+ *
+ * Returns the total number of bytes written, or a ZSTD error code.
+ */
+size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize, unsigned magicVariant) {
+    BYTE* op = (BYTE*)dst;
+    RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,
+                    dstSize_tooSmall, "Not enough room for skippable frame");
+    RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");
+    RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");
+
+    MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));
+    MEM_writeLE32(op+4, (U32)srcSize);
+    ZSTD_memcpy(op+8, src, srcSize);
+    return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
+}
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+{
+    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
+                    "dst buf is too small to write frame trailer empty block.");
+    {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
+        MEM_writeLE24(dst, cBlockHeader24);
+        return ZSTD_blockHeaderSize;
+    }
+}
+
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
+                    "wrong cctx stage");
+    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
+                    parameter_unsupported,
+                    "incompatible with ldm");
+    cctx->externSeqStore.seq = seq;
+    cctx->externSeqStore.size = nbSeq;
+    cctx->externSeqStore.capacity = nbSeq;
+    cctx->externSeqStore.pos = 0;
+    cctx->externSeqStore.posInSequence = 0;
+    return 0;
+}
+
+
+static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                               U32 frame, U32 lastFrameChunk)
+{
+    ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+    size_t fhSize = 0;
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
+                cctx->stage, (unsigned)srcSize);
+    RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
+                    "missing init (ZSTD_compressBegin)");
+
+    if (frame && (cctx->stage==ZSTDcs_init)) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
+                                       cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        assert(fhSize <= dstCapacity);
+        dstCapacity -= fhSize;
+        dst = (char*)dst + fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
+
+    if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+        ms->nextToUpdate = ms->window.dictLimit;
+    }
+    if (cctx->appliedParams.ldmParams.enableLdm) {
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+    }
+
+    if (!frame) {
+        /* overflow check and correction for block mode */
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams,
+            src, (BYTE const*)src + srcSize);
+    }
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
+    {   size_t const cSize = frame ?
+                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
+                             ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
+        FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
+        cctx->consumedSrcSize += srcSize;
+        cctx->producedCSize += (cSize + fhSize);
+        assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+        if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+            ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+            RETURN_ERROR_IF(
+                cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
+                srcSize_wrong,
+                "error : pledgedSrcSize = %u, while realSrcSize >= %u",
+                (unsigned)cctx->pledgedSrcSizePlusOne-1,
+                (unsigned)cctx->consumedSrcSize);
+        }
+        return cSize + fhSize;
+    }
+}
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
+}
+
+
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+{
+    ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+    assert(!ZSTD_checkCParams(cParams));
+    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
+}
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+      RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
+
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
+}
+
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                                         ldmState_t* ls,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         const void* src, size_t srcSize,
+                                         ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+
+    ZSTD_window_update(&ms->window, src, srcSize);
+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+
+    if (params->ldmParams.enableLdm && ls != NULL) {
+        ZSTD_window_update(&ls->window, src, srcSize);
+        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+    }
+
+    /* Assert that we the ms params match the params we're being given */
+    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+
+    if (srcSize <= HASH_READ_SIZE) return 0;
+
+    while (iend - ip > HASH_READ_SIZE) {
+        size_t const remaining = (size_t)(iend - ip);
+        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
+        const BYTE* const ichunk = ip + chunk;
+
+        ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+
+        if (params->ldmParams.enableLdm && ls != NULL)
+            ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+
+        switch(params->cParams.strategy)
+        {
+        case ZSTD_fast:
+            ZSTD_fillHashTable(ms, ichunk, dtlm);
+            break;
+        case ZSTD_dfast:
+            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
+            break;
+
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
+                assert(chunk == remaining); /* must load everything in one go */
+                ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
+            } else if (chunk >= HASH_READ_SIZE) {
+                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+            }
+            break;
+
+        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            if (chunk >= HASH_READ_SIZE)
+                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
+            break;
+
+        default:
+            assert(0);  /* not possible : not a valid strategy id */
+        }
+
+        ip = ichunk;
+    }
+
+    ms->nextToUpdate = (U32)(iend - ms->window.base);
+    return 0;
+}
+
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+ * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
+ * and only dictionaries with 100% valid symbols can be assumed valid.
+ */
+static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
+{
+    U32 s;
+    if (dictMaxSymbolValue < maxSymbolValue) {
+        return FSE_repeat_check;
+    }
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        if (normalizedCounter[s] == 0) {
+            return FSE_repeat_check;
+        }
+    }
+    return FSE_repeat_valid;
+}
+
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize)
+{
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
+    const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    dictPtr += 8;
+    bs->entropy.huf.repeatMode = HUF_repeat_check;
+
+    {   unsigned maxSymbolValue = 255;
+        unsigned hasZeroWeights = 1;
+        size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
+            dictEnd-dictPtr, &hasZeroWeights);
+
+        /* We only set the loaded table as valid if it contains all non-zero
+         * weights. Otherwise, we set it to check */
+        if (!hasZeroWeights)
+            bs->entropy.huf.repeatMode = HUF_repeat_valid;
+
+        RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
+        dictPtr += hufHeaderSize;
+    }
+
+    {   unsigned offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        /* fill all offset symbols to avoid garbage at end of table */
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.offcodeCTable,
+                offcodeNCount, MaxOff, offcodeLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.matchlengthCTable,
+                matchlengthNCount, matchlengthMaxValue, matchlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.litlengthCTable,
+                litlengthNCount, litlengthMaxValue, litlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    bs->rep[0] = MEM_readLE32(dictPtr+0);
+    bs->rep[1] = MEM_readLE32(dictPtr+4);
+    bs->rep[2] = MEM_readLE32(dictPtr+8);
+    dictPtr += 12;
+
+    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        U32 offcodeMax = MaxOff;
+        if (dictContentSize <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+        }
+        /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
+        bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
+
+        /* All repCodes must be <= dictContentSize and != 0 */
+        {   U32 u;
+            for (u=0; u<3; u++) {
+                RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
+                RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
+    }   }   }
+
+    return dictPtr - (const BYTE*)dict;
+}
+
+/* Dictionary format :
+ * See :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : dictID, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed >= 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+                                      ZSTD_matchState_t* ms,
+                                      ZSTD_cwksp* ws,
+                                      ZSTD_CCtx_params const* params,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictTableLoadMethod_e dtlm,
+                                      void* workspace)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    size_t dictID;
+    size_t eSize;
+
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(dictSize >= 8);
+    assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
+
+    dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
+    eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
+    FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
+    dictPtr += eSize;
+
+    {
+        size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
+    }
+    return dictID;
+}
+
+/* ZSTD_compress_insertDictionary() :
+*   @return : dictID, or an error code */
+static size_t
+ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+                               ZSTD_matchState_t* ms,
+                               ldmState_t* ls,
+                               ZSTD_cwksp* ws,
+                         const ZSTD_CCtx_params* params,
+                         const void* dict, size_t dictSize,
+                               ZSTD_dictContentType_e dictContentType,
+                               ZSTD_dictTableLoadMethod_e dtlm,
+                               void* workspace)
+{
+    DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
+    if ((dict==NULL) || (dictSize<8)) {
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        return 0;
+    }
+
+    ZSTD_reset_compressedBlockState(bs);
+
+    /* dict restricted modes */
+    if (dictContentType == ZSTD_dct_rawContent)
+        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
+
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+        if (dictContentType == ZSTD_dct_auto) {
+            DEBUGLOG(4, "raw content dictionary detected");
+            return ZSTD_loadDictionaryContent(
+                ms, ls, ws, params, dict, dictSize, dtlm);
+        }
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        assert(0);   /* impossible */
+    }
+
+    /* dict as full zstd dictionary */
+    return ZSTD_loadZstdDictionary(
+        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
+}
+
+#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
+
+/*! ZSTD_compressBegin_internal() :
+ * @return : 0, or an error code */
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
+                                    ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if ( (cdict)
+      && (cdict->dictContentSize > 0)
+      && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+        || cdict->compressionLevel == 0)
+      && (params->attachDictPref != ZSTD_dictForceLoad) ) {
+        return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
+    }
+
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+                                     ZSTDcrp_makeClean, zbuff) , "");
+    {   size_t const dictID = cdict ?
+                ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+                        cdict->dictContentSize, cdict->dictContentType, dtlm,
+                        cctx->entropyWorkspace)
+              : ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+                        dictContentType, dtlm, cctx->entropyWorkspace);
+        FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+        assert(dictID <= UINT_MAX);
+        cctx->dictID = (U32)dictID;
+        cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
+    /* compression parameters verification and optimization */
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
+    return ZSTD_compressBegin_internal(cctx,
+                                       dict, dictSize, dictContentType, dtlm,
+                                       cdict,
+                                       params, pledgedSrcSize,
+                                       ZSTDb_not_buffered);
+}
+
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compressBegin_advanced_internal(cctx,
+                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                            NULL /*cdict*/,
+                                            &cctxParams, pledgedSrcSize);
+}
+
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                                       &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+}
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
+{
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
+}
+
+
+/*! ZSTD_writeEpilogue() :
+*   Ends a frame.
+*   @return : nb of bytes written into dst (or an error code) */
+static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    size_t fhSize = 0;
+
+    DEBUGLOG(4, "ZSTD_writeEpilogue");
+    RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
+
+    /* special case : empty frame */
+    if (cctx->stage == ZSTDcs_init) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        dstCapacity -= fhSize;
+        op += fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (cctx->stage != ZSTDcs_ending) {
+        /* write one last empty block, make it the "last" block */
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+    }
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32(op, checksum);
+        op += 4;
+    }
+
+    cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
+    return op-ostart;
+}
+
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
+{
+    (void)cctx;
+    (void)extraCSize;
+}
+
+size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize)
+{
+    size_t endResult;
+    size_t const cSize = ZSTD_compressContinue_internal(cctx,
+                                dst, dstCapacity, src, srcSize,
+                                1 /* frame mode */, 1 /* last chunk */);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
+    endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
+    FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
+    assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+    if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+        ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+        DEBUGLOG(4, "end of frame : controlling src size");
+        RETURN_ERROR_IF(
+            cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
+            srcSize_wrong,
+             "error : pledgedSrcSize = %u, while realSrcSize = %u",
+            (unsigned)cctx->pledgedSrcSizePlusOne-1,
+            (unsigned)cctx->consumedSrcSize);
+    }
+    ZSTD_CCtx_trace(cctx, endResult);
+    return cSize + endResult;
+}
+
+size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compress_advanced");
+    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compress_advanced_internal(cctx,
+                                           dst, dstCapacity,
+                                           src, srcSize,
+                                           dict, dictSize,
+                                           &cctxParams);
+}
+
+/* Internal */
+size_t ZSTD_compress_advanced_internal(
+        ZSTD_CCtx* cctx,
+        void* dst, size_t dstCapacity,
+        const void* src, size_t srcSize,
+        const void* dict,size_t dictSize,
+        const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
+    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                         dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                         params, srcSize, ZSTDb_not_buffered) , "");
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict, size_t dictSize,
+                               int compressionLevel)
+{
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
+        assert(params.fParams.contentSizeFlag == 1);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
+}
+
+size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize,
+                         int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
+    assert(cctx != NULL);
+    return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+}
+
+size_t ZSTD_compress(void* dst, size_t dstCapacity,
+               const void* src, size_t srcSize,
+                     int compressionLevel)
+{
+    size_t result;
+    ZSTD_CCtx* cctx = ZSTD_createCCtx();
+    RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
+    result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
+    ZSTD_freeCCtx(cctx);
+    return result;
+}
+
+
+/* =====  Dictionary API  ===== */
+
+/*! ZSTD_estimateCDictSize_advanced() :
+ *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
+size_t ZSTD_estimateCDictSize_advanced(
+        size_t dictSize, ZSTD_compressionParameters cParams,
+        ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
+    return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+         + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+         + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+         + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+            : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
+}
+
+size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
+}
+
+size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support sizeof on NULL */
+    DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
+    /* cdict may be in the workspace */
+    return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
+        + ZSTD_cwksp_sizeof(&cdict->workspace);
+}
+
+static size_t ZSTD_initCDict_internal(
+                    ZSTD_CDict* cdict,
+              const void* dictBuffer, size_t dictSize,
+                    ZSTD_dictLoadMethod_e dictLoadMethod,
+                    ZSTD_dictContentType_e dictContentType,
+                    ZSTD_CCtx_params params)
+{
+    DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
+    assert(!ZSTD_checkCParams(params.cParams));
+    cdict->matchState.cParams = params.cParams;
+    cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
+    if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
+        cdict->matchState.dedicatedDictSearch = 0;
+    }
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
+        cdict->dictContent = dictBuffer;
+    } else {
+         void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
+        RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
+        cdict->dictContent = internalBuffer;
+        ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
+    }
+    cdict->dictContentSize = dictSize;
+    cdict->dictContentType = dictContentType;
+
+    cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
+
+
+    /* Reset the state to no dictionary */
+    ZSTD_reset_compressedBlockState(&cdict->cBlockState);
+    FORWARD_IF_ERROR(ZSTD_reset_matchState(
+        &cdict->matchState,
+        &cdict->workspace,
+        &params.cParams,
+        ZSTDcrp_makeClean,
+        ZSTDirp_reset,
+        ZSTD_resetTarget_CDict), "");
+    /* (Maybe) load the dictionary
+     * Skips loading the dictionary if it is < 8 bytes.
+     */
+    {   params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+        params.fParams.contentSizeFlag = 1;
+        {   size_t const dictID = ZSTD_compress_insertDictionary(
+                    &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
+                    &params, cdict->dictContent, cdict->dictContentSize,
+                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
+            FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+            assert(dictID <= (size_t)(U32)-1);
+            cdict->dictID = (U32)dictID;
+        }
+    }
+
+    return 0;
+}
+
+static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   size_t const workspaceSize =
+            ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
+            ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
+            ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+            (dictLoadMethod == ZSTD_dlm_byRef ? 0
+             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
+        void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
+        ZSTD_cwksp ws;
+        ZSTD_CDict* cdict;
+
+        if (!workspace) {
+            ZSTD_customFree(workspace, customMem);
+            return NULL;
+        }
+
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
+
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        assert(cdict != NULL);
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+        cdict->customMem = customMem;
+        cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
+
+        return cdict;
+    }
+}
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
+    ZSTD_CCtxParams_init(&cctxParams, 0);
+    cctxParams.cParams = cParams;
+    cctxParams.customMem = customMem;
+    return ZSTD_createCDict_advanced2(
+        dictBuffer, dictSize,
+        dictLoadMethod, dictContentType,
+        &cctxParams, customMem);
+}
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+        const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod,
+        ZSTD_dictContentType_e dictContentType,
+        const ZSTD_CCtx_params* originalCctxParams,
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams = *originalCctxParams;
+    ZSTD_compressionParameters cParams;
+    ZSTD_CDict* cdict;
+
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    if (cctxParams.enableDedicatedDictSearch) {
+        cParams = ZSTD_dedicatedDictSearch_getCParams(
+            cctxParams.compressionLevel, dictSize);
+        ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
+    } else {
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
+        /* Fall back to non-DDSS params */
+        cctxParams.enableDedicatedDictSearch = 0;
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    cctxParams.cParams = cParams;
+
+    cdict = ZSTD_createCDict_advanced_internal(dictSize,
+                        dictLoadMethod, cctxParams.cParams,
+                        customMem);
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                    dict, dictSize,
+                                    dictLoadMethod, dictContentType,
+                                    cctxParams) )) {
+        ZSTD_freeCDict(cdict);
+        return NULL;
+    }
+
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                                  ZSTD_dlm_byCopy, ZSTD_dct_auto,
+                                                  cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                     ZSTD_dlm_byRef, ZSTD_dct_auto,
+                                     cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = cdict->customMem;
+        int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
+        ZSTD_cwksp_free(&cdict->workspace, cMem);
+        if (!cdictInWorkspace) {
+            ZSTD_customFree(cdict, cMem);
+        }
+        return 0;
+    }
+}
+
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+const ZSTD_CDict* ZSTD_initStaticCDict(
+                                 void* workspace, size_t workspaceSize,
+                           const void* dict, size_t dictSize,
+                                 ZSTD_dictLoadMethod_e dictLoadMethod,
+                                 ZSTD_dictContentType_e dictContentType,
+                                 ZSTD_compressionParameters cParams)
+{
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+    size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+                            + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+                               : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
+                            + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+                            + matchStateSize;
+    ZSTD_CDict* cdict;
+    ZSTD_CCtx_params params;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+
+    {
+        ZSTD_cwksp ws;
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        if (cdict == NULL) return NULL;
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+    }
+
+    DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
+        (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
+    if (workspaceSize < neededSize) return NULL;
+
+    ZSTD_CCtxParams_init(&params, 0);
+    params.cParams = cParams;
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                              dict, dictSize,
+                                              dictLoadMethod, dictContentType,
+                                              params) ))
+        return NULL;
+
+    return cdict;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
+{
+    assert(cdict != NULL);
+    return cdict->matchState.cParams;
+}
+
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;
+    return cdict->dictID;
+}
+
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
+    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
+    /* Initialize the cctxParams from the cdict */
+    {
+        ZSTD_parameters params;
+        params.fParams = fParams;
+        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+                        || cdict->compressionLevel == 0 ) ?
+                ZSTD_getCParamsFromCDict(cdict)
+              : ZSTD_getCParams(cdict->compressionLevel,
+                                pledgedSrcSize,
+                                cdict->dictContentSize);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, cdict->compressionLevel);
+    }
+    /* Increase window log to fit the entire dictionary and source if the
+     * source size is known. Limit the increase to 19, which is the
+     * window log for compression level 1 with the largest source size.
+     */
+    if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+        U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+        cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);
+    }
+    return ZSTD_compressBegin_internal(cctx,
+                                        NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                        cdict,
+                                        &cctxParams, pledgedSrcSize,
+                                        ZSTDb_not_buffered);
+}
+
+/* ZSTD_compressBegin_usingCDict() :
+ * pledgedSrcSize=0 means "unknown"
+ * if pledgedSrcSize>0, it will enable contentSizeFlag */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
+    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression parameters are decided at CDict creation time
+ *  while frame parameters are hardcoded */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+}
+
+
+
+/* ******************************************************************
+*  Streaming
+********************************************************************/
+
+ZSTD_CStream* ZSTD_createCStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createCStream");
+    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticCCtx(workspace, workspaceSize);
+}
+
+ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
+{   /* CStream and CCtx are now same object */
+    return ZSTD_createCCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
+{
+    return ZSTD_freeCCtx(zcs);   /* same object */
+}
+
+
+
+/*======   Initialization   ======*/
+
+size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_CStreamOutSize(void)
+{
+    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
+
+static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
+{
+    if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
+        return ZSTD_cpm_attachDict;
+    else
+        return ZSTD_cpm_noAttachDict;
+}
+
+/* ZSTD_resetCStream():
+ * pledgedSrcSize == 0 means "unknown" */
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+/*! ZSTD_initCStream_internal() :
+ *  Note : for lib/compress only. Used by zstdmt_compress.c.
+ *  Assumption 1 : params are valid
+ *  Assumption 2 : either dict, or cdict, is defined, not both */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    const ZSTD_CCtx_params* params,
+                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_internal");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    zcs->requestedParams = *params;
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if (dict) {
+        FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    } else {
+        /* Dictionary is cleared if !cdict */
+        FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    }
+    return 0;
+}
+
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                            const ZSTD_CDict* cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    zcs->requestedParams.fParams = fParams;
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+
+/* ZSTD_initCStream_advanced() :
+ * pledgedSrcSize must be exact.
+ * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                                 const void* dict, size_t dictSize,
+                                 ZSTD_parameters params, unsigned long long pss)
+{
+    /* for compatibility with older programs relying on this behavior.
+     * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
+     * This line will be removed in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, &params);
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_srcSize");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    return 0;
+}
+
+/*======   Compression   ======*/
+
+static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
+{
+    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
+    if (hintInSize==0) hintInSize = cctx->blockSize;
+    return hintInSize;
+}
+
+/* ZSTD_compressStream_generic():
+ *  internal function for all *compressStream*() variants
+ *  non-static, because can be called from zstdmt_compress.c
+ * @return : hint size for next input */
+static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                          ZSTD_outBuffer* output,
+                                          ZSTD_inBuffer* input,
+                                          ZSTD_EndDirective const flushMode)
+{
+    const char* const istart = (const char*)input->src;
+    const char* const iend = input->size != 0 ? istart + input->size : istart;
+    const char* ip = input->pos != 0 ? istart + input->pos : istart;
+    char* const ostart = (char*)output->dst;
+    char* const oend = output->size != 0 ? ostart + output->size : ostart;
+    char* op = output->pos != 0 ? ostart + output->pos : ostart;
+    U32 someMoreWork = 1;
+
+    /* check expectations */
+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
+    if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->inBuff != NULL);
+        assert(zcs->inBuffSize > 0);
+    }
+    if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->outBuff !=  NULL);
+        assert(zcs->outBuffSize > 0);
+    }
+    assert(output->pos <= output->size);
+    assert(input->pos <= input->size);
+    assert((U32)flushMode <= (U32)ZSTD_e_end);
+
+    while (someMoreWork) {
+        switch(zcs->streamStage)
+        {
+        case zcss_init:
+            RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
+
+        case zcss_load:
+            if ( (flushMode == ZSTD_e_end)
+              && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip)     /* Enough output space */
+                || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
+              && (zcs->inBuffPos == 0) ) {
+                /* shortcut to compression pass directly into output buffer */
+                size_t const cSize = ZSTD_compressEnd(zcs,
+                                                op, oend-op, ip, iend-ip);
+                DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
+                ip = iend;
+                op += cSize;
+                zcs->frameEnded = 1;
+                ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                someMoreWork = 0; break;
+            }
+            /* complete loading into inBuffer in buffered mode */
+            if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+                size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
+                size_t const loaded = ZSTD_limitCopy(
+                                        zcs->inBuff + zcs->inBuffPos, toLoad,
+                                        ip, iend-ip);
+                zcs->inBuffPos += loaded;
+                if (loaded != 0)
+                    ip += loaded;
+                if ( (flushMode == ZSTD_e_continue)
+                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                    /* not enough input to fill full block : stop here */
+                    someMoreWork = 0; break;
+                }
+                if ( (flushMode == ZSTD_e_flush)
+                  && (zcs->inBuffPos == zcs->inToCompress) ) {
+                    /* empty */
+                    someMoreWork = 0; break;
+                }
+            }
+            /* compress current block (note : this stage cannot be stopped in the middle) */
+            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
+            {   int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
+                void* cDst;
+                size_t cSize;
+                size_t oSize = oend-op;
+                size_t const iSize = inputBuffered
+                    ? zcs->inBuffPos - zcs->inToCompress
+                    : MIN((size_t)(iend - ip), zcs->blockSize);
+                if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
+                    cDst = op;   /* compress into output buffer, to skip flush stage */
+                else
+                    cDst = zcs->outBuff, oSize = zcs->outBuffSize;
+                if (inputBuffered) {
+                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize);
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    /* prepare next block */
+                    zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+                    if (zcs->inBuffTarget > zcs->inBuffSize)
+                        zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                    DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                            (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
+                    if (!lastBlock)
+                        assert(zcs->inBuffTarget <= zcs->inBuffSize);
+                    zcs->inToCompress = zcs->inBuffPos;
+                } else {
+                    unsigned const lastBlock = (ip + iSize == iend);
+                    assert(flushMode == ZSTD_e_end /* Already validated */);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
+                    /* Consume the input prior to error checking to mirror buffered mode. */
+                    if (iSize > 0)
+                        ip += iSize;
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    if (lastBlock)
+                        assert(ip == iend);
+                }
+                if (cDst == op) {  /* no need to flush */
+                    op += cSize;
+                    if (zcs->frameEnded) {
+                        DEBUGLOG(5, "Frame completed directly in outBuffer");
+                        someMoreWork = 0;
+                        ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    }
+                    break;
+                }
+                zcs->outBuffContentSize = cSize;
+                zcs->outBuffFlushedSize = 0;
+                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
+            }
+	    ZSTD_FALLTHROUGH;
+        case zcss_flush:
+            DEBUGLOG(5, "flush stage");
+            assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
+            {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+                size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
+                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
+                            (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
+                if (flushed)
+                    op += flushed;
+                zcs->outBuffFlushedSize += flushed;
+                if (toFlush!=flushed) {
+                    /* flush not fully completed, presumably because dst is too small */
+                    assert(op==oend);
+                    someMoreWork = 0;
+                    break;
+                }
+                zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
+                if (zcs->frameEnded) {
+                    DEBUGLOG(5, "Frame completed on flush");
+                    someMoreWork = 0;
+                    ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    break;
+                }
+                zcs->streamStage = zcss_load;
+                break;
+            }
+
+        default: /* impossible */
+            assert(0);
+        }
+    }
+
+    input->pos = ip - istart;
+    output->pos = op - ostart;
+    if (zcs->frameEnded) return 0;
+    return ZSTD_nextInputSizeHint(zcs);
+}
+
+static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
+{
+    return ZSTD_nextInputSizeHint(cctx);
+
+}
+
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
+    return ZSTD_nextInputSizeHint_MTorST(zcs);
+}
+
+/* After a compression call set the expected input/output buffer.
+ * This is validated at the start of the next compression call.
+ */
+static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        cctx->expectedInBuffer = *input;
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        cctx->expectedOutBufferSize = output->size - output->pos;
+    }
+}
+
+/* Validate that the input/output buffers match the expectations set by
+ * ZSTD_setBufferExpectations.
+ */
+static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
+                                        ZSTD_outBuffer const* output,
+                                        ZSTD_inBuffer const* input,
+                                        ZSTD_EndDirective endOp)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        ZSTD_inBuffer const expect = cctx->expectedInBuffer;
+        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
+        if (endOp != ZSTD_e_end)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        size_t const outBufferSize = output->size - output->pos;
+        if (cctx->expectedOutBufferSize != outBufferSize)
+            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
+    }
+    return 0;
+}
+
+static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+                                             ZSTD_EndDirective endOp,
+                                             size_t inSize) {
+    ZSTD_CCtx_params params = cctx->requestedParams;
+    ZSTD_prefixDict const prefixDict = cctx->prefixDict;
+    FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
+    assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
+    if (cctx->cdict)
+        params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
+    DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
+    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
+    {
+        size_t const dictSize = prefixDict.dict
+                ? prefixDict.dictSize
+                : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
+        ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
+        params.cParams = ZSTD_getCParamsFromCCtxParams(
+                &params, cctx->pledgedSrcSizePlusOne-1,
+                dictSize, mode);
+    }
+
+    if (ZSTD_CParams_shouldEnableLdm(&params.cParams)) {
+        /* Enable LDM by default for optimal parser and window size >= 128MB */
+        DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
+        params.ldmParams.enableLdm = 1;
+    }
+
+    {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
+        assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+        FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
+                cctx->cdict,
+                &params, pledgedSrcSize,
+                ZSTDb_buffered) , "");
+        assert(cctx->appliedParams.nbWorkers == 0);
+        cctx->inToCompress = 0;
+        cctx->inBuffPos = 0;
+        if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+            /* for small input: avoid automatic flush on reaching end of block, since
+            * it would require to add a 3-bytes null block to end frame
+            */
+            cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
+        } else {
+            cctx->inBuffTarget = 0;
+        }
+        cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+        cctx->streamStage = zcss_load;
+        cctx->frameEnded = 0;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                             ZSTD_outBuffer* output,
+                             ZSTD_inBuffer* input,
+                             ZSTD_EndDirective endOp)
+{
+    DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
+    /* check conditions */
+    RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
+    RETURN_ERROR_IF(input->pos  > input->size, srcSize_wrong, "invalid input buffer");
+    RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
+    assert(cctx != NULL);
+
+    /* transparent initialization stage */
+    if (cctx->streamStage == zcss_init) {
+        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
+        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
+    }
+    /* end of transparent initialization stage */
+
+    FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
+    /* compression stage */
+    FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
+    DEBUGLOG(5, "completed ZSTD_compressStream2");
+    ZSTD_setBufferExpectations(cctx, output, input);
+    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
+
+size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
+    ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
+    DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
+    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+    /* Enable stable input/output buffers. */
+    cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
+    cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
+    {   size_t oPos = 0;
+        size_t iPos = 0;
+        size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
+                                        dst, dstCapacity, &oPos,
+                                        src, srcSize, &iPos,
+                                        ZSTD_e_end);
+        /* Reset to the original values. */
+        cctx->requestedParams.inBufferMode = originalInBufferMode;
+        cctx->requestedParams.outBufferMode = originalOutBufferMode;
+        FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
+        if (result != 0) {  /* compression not completed, due to lack of output space */
+            assert(oPos == dstCapacity);
+            RETURN_ERROR(dstSize_tooSmall, "");
+        }
+        assert(iPos == srcSize);   /* all input is expected consumed */
+        return oPos;
+    }
+}
+
+typedef struct {
+    U32 idx;             /* Index in array of ZSTD_Sequence */
+    U32 posInSequence;   /* Position within sequence at idx */
+    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
+} ZSTD_sequencePosition;
+
+/* Returns a ZSTD error code if sequence is not valid */
+static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
+                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
+    size_t offsetBound;
+    U32 windowSize = 1 << windowLog;
+    /* posInSrc represents the amount of data the the decoder would decode up to this point.
+     * As long as the amount of data decoded is less than or equal to window size, offsets may be
+     * larger than the total length of output decoded in order to reference the dict, even larger than
+     * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
+     */
+    offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+    RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
+    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
+    return 0;
+}
+
+/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
+    U32 offCode = rawOffset + ZSTD_REP_MOVE;
+    U32 repCode = 0;
+
+    if (!ll0 && rawOffset == rep[0]) {
+        repCode = 1;
+    } else if (rawOffset == rep[1]) {
+        repCode = 2 - ll0;
+    } else if (rawOffset == rep[2]) {
+        repCode = 3 - ll0;
+    } else if (ll0 && rawOffset == rep[0] - 1) {
+        repCode = 3;
+    }
+    if (repCode) {
+        /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
+        offCode = repCode - 1;
+    }
+    return offCode;
+}
+
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ */
+static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                             const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                             const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    BYTE const* ip = (BYTE const*)(src);
+    const BYTE* const iend = ip + blockSize;
+    repcodes_t updatedRepcodes;
+    U32 dictSize;
+    U32 litLength;
+    U32 matchLength;
+    U32 ll0;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = (U32)cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = (U32)cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
+        litLength = inSeqs[idx].litLength;
+        matchLength = inSeqs[idx].matchLength;
+        ll0 = litLength == 0;
+        offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                cctx->appliedParams.cParams.windowLog, dictSize,
+                                                cctx->appliedParams.cParams.minMatch),
+                                                "Sequence validation failed");
+        }
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    if (inSeqs[idx].litLength) {
+        DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
+        ip += inSeqs[idx].litLength;
+        seqPos->posInSrc += inSeqs[idx].litLength;
+    }
+    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
+    seqPos->idx = idx+1;
+    return 0;
+}
+
+/* Returns the number of bytes to move the current read position back by. Only non-zero
+ * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+ * went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes represented by the sequences
+ * in inSeqs, storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+ */
+static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                       const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    U32 startPosInSequence = seqPos->posInSequence;
+    U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
+    size_t dictSize;
+    BYTE const* ip = (BYTE const*)(src);
+    BYTE const* iend = ip + blockSize;  /* May be adjusted if we decide to process fewer than blockSize bytes */
+    repcodes_t updatedRepcodes;
+    U32 bytesAdjustment = 0;
+    U32 finalMatchSplit = 0;
+    U32 litLength;
+    U32 matchLength;
+    U32 rawOffset;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
+    DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
+        const ZSTD_Sequence currSeq = inSeqs[idx];
+        litLength = currSeq.litLength;
+        matchLength = currSeq.matchLength;
+        rawOffset = currSeq.offset;
+
+        /* Modify the sequence depending on where endPosInSequence lies */
+        if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
+            if (startPosInSequence >= litLength) {
+                startPosInSequence -= litLength;
+                litLength = 0;
+                matchLength -= startPosInSequence;
+            } else {
+                litLength -= startPosInSequence;
+            }
+            /* Move to the next sequence */
+            endPosInSequence -= currSeq.litLength + currSeq.matchLength;
+            startPosInSequence = 0;
+            idx++;
+        } else {
+            /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
+               does not reach the end of the match. So, we have to split the sequence */
+            DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
+                     currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
+            if (endPosInSequence > litLength) {
+                U32 firstHalfMatchLength;
+                litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
+                firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
+                if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
+                    /* Only ever split the match if it is larger than the block size */
+                    U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
+                    if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
+                        /* Move the endPosInSequence backward so that it creates match of minMatch length */
+                        endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        firstHalfMatchLength -= bytesAdjustment;
+                    }
+                    matchLength = firstHalfMatchLength;
+                    /* Flag that we split the last match - after storing the sequence, exit the loop,
+                       but keep the value of endPosInSequence */
+                    finalMatchSplit = 1;
+                } else {
+                    /* Move the position in sequence backwards so that we don't split match, and break to store
+                     * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
+                     * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
+                     * would cause the first half of the match to be too small
+                     */
+                    bytesAdjustment = endPosInSequence - currSeq.litLength;
+                    endPosInSequence = currSeq.litLength;
+                    break;
+                }
+            } else {
+                /* This sequence ends inside the literals, break to store the last literals */
+                break;
+            }
+        }
+        /* Check if this offset can be represented with a repcode */
+        {   U32 ll0 = (litLength == 0);
+            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
+            updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+        }
+
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                   cctx->appliedParams.cParams.windowLog, dictSize,
+                                                   cctx->appliedParams.cParams.minMatch),
+                                                   "Sequence validation failed");
+        }
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
+    seqPos->idx = idx;
+    seqPos->posInSequence = endPosInSequence;
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    iend -= bytesAdjustment;
+    if (ip != iend) {
+        /* Store any last literals */
+        U32 lastLLSize = (U32)(iend - ip);
+        assert(ip <= iend);
+        DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
+        seqPos->posInSrc += lastLLSize;
+    }
+
+    return bytesAdjustment;
+}
+
+typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                       const void* src, size_t blockSize);
+static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+    ZSTD_sequenceCopier sequenceCopier = NULL;
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
+    if (mode == ZSTD_sf_explicitBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
+    } else if (mode == ZSTD_sf_noBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreNoBlockDelim;
+    }
+    assert(sequenceCopier != NULL);
+    return sequenceCopier;
+}
+
+/* Compress, block-by-block, all of the sequences given.
+ *
+ * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
+ */
+static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                              const void* src, size_t srcSize) {
+    size_t cSize = 0;
+    U32 lastBlock;
+    size_t blockSize;
+    size_t compressedSeqsSize;
+    size_t remaining = srcSize;
+    ZSTD_sequencePosition seqPos = {0, 0, 0};
+
+    BYTE const* ip = (BYTE const*)src;
+    BYTE* op = (BYTE*)dst;
+    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+
+    DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
+    /* Special case: empty frame */
+    if (remaining == 0) {
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+        cSize += ZSTD_blockHeaderSize;
+    }
+
+    while (remaining) {
+        size_t cBlockSize;
+        size_t additionalByteAdjustment;
+        lastBlock = remaining <= cctx->blockSize;
+        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
+        ZSTD_resetSeqStore(&cctx->seqStore);
+        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
+
+        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
+        FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
+        blockSize -= additionalByteAdjustment;
+
+        /* If blocks are too small, emit as a nocompress block */
+        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
+            cSize += cBlockSize;
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            continue;
+        }
+
+        compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
+                                &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
+                                &cctx->appliedParams,
+                                op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
+                                blockSize,
+                                cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                                cctx->bmi2);
+        FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
+        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
+
+        if (!cctx->isFirstBlock &&
+            ZSTD_maybeRLE(&cctx->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize)) {
+            /* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            compressedSeqsSize = 1;
+        }
+
+        if (compressedSeqsSize == 0) {
+            /* ZSTD_noCompressBlock writes the block header as well */
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
+        } else if (compressedSeqsSize == 1) {
+            cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
+            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
+        } else {
+            U32 cBlockHeader;
+            /* Error checking and repcodes update */
+            ZSTD_confirmRepcodesAndEntropyTables(cctx);
+            if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+            /* Write block header into beginning of block*/
+            cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
+            MEM_writeLE24(op, cBlockHeader);
+            cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
+            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
+        }
+
+        cSize += cBlockSize;
+        DEBUGLOG(4, "cSize running total: %zu", cSize);
+
+        if (lastBlock) {
+            break;
+        } else {
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            cctx->isFirstBlock = 0;
+        }
+    }
+
+    return cSize;
+}
+
+size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
+                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                              const void* src, size_t srcSize) {
+    BYTE* op = (BYTE*)dst;
+    size_t cSize = 0;
+    size_t compressedBlocksSize = 0;
+    size_t frameHeaderSize = 0;
+
+    /* Transparent initialization stage, same as compressStream2() */
+    DEBUGLOG(3, "ZSTD_compressSequences()");
+    assert(cctx != NULL);
+    FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+    /* Begin writing output, starting with frame header */
+    frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
+    op += frameHeaderSize;
+    dstCapacity -= frameHeaderSize;
+    cSize += frameHeaderSize;
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
+        xxh64_update(&cctx->xxhState, src, srcSize);
+    }
+    /* cSize includes block header size and compressed sequences size */
+    compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
+                                                           op, dstCapacity,
+                                                           inSeqs, inSeqsSize,
+                                                           src, srcSize);
+    FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
+    cSize += compressedBlocksSize;
+    dstCapacity -= compressedBlocksSize;
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32((char*)dst + cSize, checksum);
+        cSize += 4;
+    }
+
+    DEBUGLOG(3, "Final compressed size: %zu", cSize);
+    return cSize;
+}
+
+/*======   Finalize   ======*/
+
+/*! ZSTD_flushStream() :
+ * @return : amount of data remaining to flush */
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
+}
+
+
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
+    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
+    if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
+    /* single thread mode : attempt to calculate remaining to flush more precisely */
+    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+        size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
+        size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
+        DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
+        return toFlush;
+    }
+}
+
+
+/*-=====  Pre-defined compression levels  =====-*/
+
+#define ZSTD_MAX_CLEVEL     22
+int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" - for any srcSize > 256 KB */
+    /* W,  C,  H,  S,  L, TL, strat */
+    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
+    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
+    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
+    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
+    { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
+    { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
+    { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
+    { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
+    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
+    { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
+    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
+    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
+    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
+    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
+    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
+    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
+    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
+    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
+},
+{   /* for srcSize <= 256 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
+    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
+    { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
+    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
+    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
+    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
+    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
+    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
+    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
+    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
+    { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
+    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
+    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
+    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
+    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 16 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
+    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
+    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
+    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+};
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
+    switch (cParams.strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+    return cParams;
+}
+
+static int ZSTD_dedicatedDictSearch_isSupported(
+        ZSTD_compressionParameters const* cParams)
+{
+    return (cParams->strategy >= ZSTD_greedy)
+        && (cParams->strategy <= ZSTD_lazy2)
+        && (cParams->hashLog >= cParams->chainLog)
+        && (cParams->chainLog <= 24);
+}
+
+/*
+ * Reverses the adjustment applied to cparams when enabling dedicated dict
+ * search. This is used to recover the params set to be used in the working
+ * context. (Otherwise, those tables would also grow.)
+ */
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams) {
+    switch (cParams->strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+}
+
+static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+    case ZSTD_cpm_createCDict:
+        break;
+    case ZSTD_cpm_attachDict:
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+    {   int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
+        size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
+        return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
+    }
+}
+
+/*! ZSTD_getCParams_internal() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
+ *        Use dictSize == 0 for unknown or unused.
+ *  Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
+    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
+    int row;
+    DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
+
+    /* row */
+    if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
+    else if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
+    else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+    else row = compressionLevel;
+
+    {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        /* acceleration factor */
+        if (compressionLevel < 0) {
+            int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
+            cp.targetLength = (unsigned)(-clampedCompressionLevel);
+        }
+        /* refine parameters based on srcSize & dictSize */
+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
+    }
+}
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Size values are optional, provide 0 if not known or unused */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+{
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
+    ZSTD_parameters params;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
+    DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
+    ZSTD_memset(&params, 0, sizeof(params));
+    params.cParams = cParams;
+    params.fParams.contentSizeFlag = 1;
+    return params;
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h
new file mode 100644
index 0000000..685d2f9
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_internal.h
@@ -0,0 +1,1188 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This header contains definitions
+ * that shall **only** be used by modules within lib/compress.
+ */
+
+#ifndef ZSTD_COMPRESS_H
+#define ZSTD_COMPRESS_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_internal.h"
+#include "zstd_cwksp.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define kSearchStrength      8
+#define HASH_READ_SIZE       8
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
+                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
+                                       It's not a big deal though : candidate will just be sorted again.
+                                       Additionally, candidate position 1 will be lost.
+                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
+typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
+
+typedef struct ZSTD_prefixDict_s {
+    const void* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+} ZSTD_prefixDict;
+
+typedef struct {
+    void* dictBuffer;
+    void const* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+    ZSTD_CDict* cdict;
+} ZSTD_localDict;
+
+typedef struct {
+    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
+    HUF_repeat repeatMode;
+} ZSTD_hufCTables_t;
+
+typedef struct {
+    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    FSE_repeat offcode_repeatMode;
+    FSE_repeat matchlength_repeatMode;
+    FSE_repeat litlength_repeatMode;
+} ZSTD_fseCTables_t;
+
+typedef struct {
+    ZSTD_hufCTables_t huf;
+    ZSTD_fseCTables_t fse;
+} ZSTD_entropyCTables_t;
+
+typedef struct {
+    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    U32 len;            /* Raw length of match */
+} ZSTD_match_t;
+
+typedef struct {
+    U32 offset;         /* Offset of sequence */
+    U32 litLength;      /* Length of literals prior to match */
+    U32 matchLength;    /* Raw length of match */
+} rawSeq;
+
+typedef struct {
+  rawSeq* seq;          /* The start of the sequences */
+  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
+  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
+                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
+  size_t size;          /* The number of sequences. <= capacity. */
+  size_t capacity;      /* The capacity starting from `seq` pointer */
+} rawSeqStore_t;
+
+UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+
+typedef struct {
+    int price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_optimal_t;
+
+typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+
+typedef struct {
+    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
+    unsigned* litFreq;           /* table of literals statistics, of size 256 */
+    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
+    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
+    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+
+    U32  litSum;                 /* nb of literals */
+    U32  litLengthSum;           /* nb of litLength codes */
+    U32  matchLengthSum;         /* nb of matchLength codes */
+    U32  offCodeSum;             /* nb of offset codes */
+    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
+    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
+    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
+    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
+    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
+    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+} optState_t;
+
+typedef struct {
+  ZSTD_entropyCTables_t entropy;
+  U32 rep[ZSTD_REP_NUM];
+} ZSTD_compressedBlockState_t;
+
+typedef struct {
+    BYTE const* nextSrc;    /* next block here to continue on current prefix */
+    BYTE const* base;       /* All regular indexes relative to this position */
+    BYTE const* dictBase;   /* extDict indexes relative to this position */
+    U32 dictLimit;          /* below that point, need extDict */
+    U32 lowLimit;           /* below that point, no more valid data */
+} ZSTD_window_t;
+
+typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+struct ZSTD_matchState_t {
+    ZSTD_window_t window;   /* State for window round buffer management */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
+                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+                             * When dict referential is copied into active context (i.e. not attached),
+                             * loadedDictEnd == dictSize, since referential starts from zero.
+                             */
+    U32 nextToUpdate;       /* index from which to continue table update */
+    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+    U32* hashTable;
+    U32* hashTable3;
+    U32* chainTable;
+    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
+                               * dedicated dictionary search structure.
+                               */
+    optState_t opt;         /* optimal parser state */
+    const ZSTD_matchState_t* dictMatchState;
+    ZSTD_compressionParameters cParams;
+    const rawSeqStore_t* ldmSeqStore;
+};
+
+typedef struct {
+    ZSTD_compressedBlockState_t* prevCBlock;
+    ZSTD_compressedBlockState_t* nextCBlock;
+    ZSTD_matchState_t matchState;
+} ZSTD_blockState_t;
+
+typedef struct {
+    U32 offset;
+    U32 checksum;
+} ldmEntry_t;
+
+typedef struct {
+    BYTE const* split;
+    U32 hash;
+    U32 checksum;
+    ldmEntry_t* bucket;
+} ldmMatchCandidate_t;
+
+#define LDM_BATCH_SIZE 64
+
+typedef struct {
+    ZSTD_window_t window;   /* State for the window round buffer management */
+    ldmEntry_t* hashTable;
+    U32 loadedDictEnd;
+    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
+    size_t splitIndices[LDM_BATCH_SIZE];
+    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
+} ldmState_t;
+
+typedef struct {
+    U32 enableLdm;          /* 1 if enable long distance matching */
+    U32 hashLog;            /* Log size of hashTable */
+    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
+    U32 minMatchLength;     /* Minimum match length */
+    U32 hashRateLog;       /* Log number of entries to skip */
+    U32 windowLog;          /* Window log for the LDM */
+} ldmParams_t;
+
+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    size_t seqIndex;
+    size_t maxSequences;
+} SeqCollector;
+
+struct ZSTD_CCtx_params_s {
+    ZSTD_format_e format;
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+
+    int compressionLevel;
+    int forceWindow;           /* force back-references to respect limit of
+                                * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */
+    int srcSizeHint;           /* User's best guess of source size.
+                                * Hint is not valid when srcSizeHint == 0.
+                                * There is no guarantee that hint is close to actual source size */
+
+    ZSTD_dictAttachPref_e attachDictPref;
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+
+    /* Multithreading: used to pass parameters to mtctx */
+    int nbWorkers;
+    size_t jobSize;
+    int overlapLog;
+    int rsyncable;
+
+    /* Long distance matching parameters */
+    ldmParams_t ldmParams;
+
+    /* Dedicated dict search algorithm trigger */
+    int enableDedicatedDictSearch;
+
+    /* Input/output buffer modes */
+    ZSTD_bufferMode_e inBufferMode;
+    ZSTD_bufferMode_e outBufferMode;
+
+    /* Sequence compression API */
+    ZSTD_sequenceFormat_e blockDelimiters;
+    int validateSequences;
+
+    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
+    ZSTD_customMem customMem;
+};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
+
+#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
+#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
+
+/*
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+    ZSTDb_not_buffered,
+    ZSTDb_buffered
+} ZSTD_buffered_policy_e;
+
+struct ZSTD_CCtx_s {
+    ZSTD_compressionStage_e stage;
+    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
+    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+    ZSTD_CCtx_params requestedParams;
+    ZSTD_CCtx_params appliedParams;
+    U32   dictID;
+    size_t dictContentSize;
+
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
+    size_t blockSize;
+    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
+    unsigned long long consumedSrcSize;
+    unsigned long long producedCSize;
+    struct xxh64_state xxhState;
+    ZSTD_customMem customMem;
+    ZSTD_threadPool* pool;
+    size_t staticSize;
+    SeqCollector seqCollector;
+    int isFirstBlock;
+    int initialized;
+
+    seqStore_t seqStore;      /* sequences storage ptrs */
+    ldmState_t ldmState;      /* long distance matching state */
+    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
+    size_t maxNbLdmSequences;
+    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+    ZSTD_blockState_t blockState;
+    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
+
+    /* Wether we are streaming or not */
+    ZSTD_buffered_policy_e bufferedPolicy;
+
+    /* streaming */
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZSTD_cStreamStage streamStage;
+    U32    frameEnded;
+
+    /* Stable in/out buffer verification */
+    ZSTD_inBuffer expectedInBuffer;
+    size_t expectedOutBufferSize;
+
+    /* Dictionary */
+    ZSTD_localDict localDict;
+    const ZSTD_CDict* cdict;
+    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
+
+    /* Multi-threading */
+
+    /* Tracing */
+};
+
+typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
+
+typedef enum {
+    ZSTD_noDict = 0,
+    ZSTD_extDict = 1,
+    ZSTD_dictMatchState = 2,
+    ZSTD_dedicatedDictSearch = 3
+} ZSTD_dictMode_e;
+
+typedef enum {
+    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
+                                 * In this mode we use both the srcSize and the dictSize
+                                 * when selecting and adjusting parameters.
+                                 */
+    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
+                                 * In this mode we only take the srcSize into account when selecting
+                                 * and adjusting parameters.
+                                 */
+    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
+                                 * In this mode we take both the source size and the dictionary size
+                                 * into account when selecting and adjusting the parameters.
+                                 */
+    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
+                                 * We don't know what these parameters are for. We default to the legacy
+                                 * behavior of taking both the source size and the dict size into account
+                                 * when selecting and adjusting parameters.
+                                 */
+} ZSTD_cParamMode_e;
+
+typedef size_t (*ZSTD_blockCompressor) (
+        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+
+
+MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
+{
+    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                       8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 16, 17, 17, 18, 18, 19, 19,
+                                      20, 20, 20, 20, 21, 21, 21, 21,
+                                      22, 22, 22, 22, 22, 22, 22, 22,
+                                      23, 23, 23, 23, 23, 23, 23, 23,
+                                      24, 24, 24, 24, 24, 24, 24, 24,
+                                      24, 24, 24, 24, 24, 24, 24, 24 };
+    static const U32 LL_deltaCode = 19;
+    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
+}
+
+/* ZSTD_MLcode() :
+ * note : mlBase = matchLength - MINMATCH;
+ *        because it's the format it's stored in seqStore->sequences */
+MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
+{
+    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+    static const U32 ML_deltaCode = 36;
+    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
+}
+
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+    } else {   /* repcode */
+        U32 const repCode = offset + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+                    dstSize_tooSmall, "dst buf too small for uncompressed block");
+    MEM_writeLE24(dst, cBlockHeader24);
+    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+    return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+    BYTE* const op = (BYTE*)dst;
+    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+    MEM_writeLE24(op, cBlockHeader);
+    op[3] = src;
+    return 4;
+}
+
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    return (srcSize >> minlog) + 2;
+}
+
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+    switch (cctxParams->literalCompressionMode) {
+    case ZSTD_lcm_huffman:
+        return 0;
+    case ZSTD_lcm_uncompressed:
+        return 1;
+    default:
+        assert(0 /* impossible: pre-validated */);
+        ZSTD_FALLTHROUGH;
+    case ZSTD_lcm_auto:
+        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+    }
+}
+
+/*! ZSTD_safecopyLiterals() :
+ *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ *  large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+    assert(iend > ilimit_w);
+    if (ip <= ilimit_w) {
+        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+        op += ilimit_w - ip;
+        ip = ilimit_w;
+    }
+    while (ip < iend) *op++ = *ip++;
+}
+
+/*! ZSTD_storeSeq() :
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
+*/
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+{
+    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+    BYTE const* const litEnd = literals + litLength;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+    static const BYTE* g_start = NULL;
+    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
+    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
+        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
+    }
+#endif
+    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
+    /* copy Literals */
+    assert(seqStorePtr->maxNbLit <= 128 KB);
+    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
+    assert(literals + litLength <= litLimit);
+    if (litEnd <= litLimit_w) {
+        /* Common case we can use wildcopy.
+	 * First copy 16 bytes, because literals are likely short.
+	 */
+        assert(WILDCOPY_OVERLENGTH >= 16);
+        ZSTD_copy16(seqStorePtr->lit, literals);
+        if (litLength > 16) {
+            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+        }
+    } else {
+        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+    }
+    seqStorePtr->lit += litLength;
+
+    /* literal Length */
+    if (litLength>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 1;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].litLength = (U16)litLength;
+
+    /* match offset */
+    seqStorePtr->sequences[0].offset = offCode + 1;
+
+    /* match Length */
+    if (mlBase>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 2;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
+
+    seqStorePtr->sequences++;
+}
+
+
+/*-*************************************
+*  Match length counter
+***************************************/
+static unsigned ZSTD_NbCommonBytes (size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if (__GNUC__ >= 4)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if (__GNUC__ >= 4)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
+{
+    const BYTE* const pStart = pIn;
+    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
+
+    if (pIn < pInLoopLimit) {
+        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+          if (diff) return ZSTD_NbCommonBytes(diff); }
+        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
+        while (pIn < pInLoopLimit) {
+            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+            pIn += ZSTD_NbCommonBytes(diff);
+            return (size_t)(pIn - pStart);
+    }   }
+    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+/* ZSTD_count_2segments() :
+ *  can count match length with `ip` & `match` in 2 different segments.
+ *  convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+{
+    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
+    size_t const matchLength = ZSTD_count(ip, match, vEnd);
+    if (match + matchLength != mEnd) return matchLength;
+    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
+    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
+    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
+    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
+    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
+    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
+}
+
+
+/*-*************************************
+ *  Hashes
+ ***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+
+static const U32 prime4bytes = 2654435761U;
+static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+
+static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+
+MEM_STATIC FORCE_INLINE_ATTR
+size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+{
+    switch(mls)
+    {
+    default:
+    case 4: return ZSTD_hash4Ptr(p, hBits);
+    case 5: return ZSTD_hash5Ptr(p, hBits);
+    case 6: return ZSTD_hash6Ptr(p, hBits);
+    case 7: return ZSTD_hash7Ptr(p, hBits);
+    case 8: return ZSTD_hash8Ptr(p, hBits);
+    }
+}
+
+/* ZSTD_ipow() :
+ * Return base^exponent.
+ */
+static U64 ZSTD_ipow(U64 base, U64 exponent)
+{
+    U64 power = 1;
+    while (exponent) {
+      if (exponent & 1) power *= base;
+      exponent >>= 1;
+      base *= base;
+    }
+    return power;
+}
+
+#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
+
+/* ZSTD_rollingHash_append() :
+ * Add the buffer to the hash value.
+ */
+static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
+{
+    BYTE const* istart = (BYTE const*)buf;
+    size_t pos;
+    for (pos = 0; pos < size; ++pos) {
+        hash *= prime8bytes;
+        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    }
+    return hash;
+}
+
+/* ZSTD_rollingHash_compute() :
+ * Compute the rolling hash value of the buffer.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
+{
+    return ZSTD_rollingHash_append(0, buf, size);
+}
+
+/* ZSTD_rollingHash_primePower() :
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
+ * over a window of length bytes.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
+{
+    return ZSTD_ipow(prime8bytes, length - 1);
+}
+
+/* ZSTD_rollingHash_rotate() :
+ * Rotate the rolling hash by one byte.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
+{
+    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
+    hash *= prime8bytes;
+    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    return hash;
+}
+
+/*-*************************************
+*  Round buffer management
+***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
+/* Max current allowed */
+#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Maximum chunk size before overflow correction needs to be called again */
+#define ZSTD_CHUNKSIZE_MAX                                                     \
+    ( ((U32)-1)                  /* Maximum ending current index */            \
+    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
+
+/*
+ * ZSTD_window_clear():
+ * Clears the window containing the history by simply setting it to empty.
+ */
+MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
+{
+    size_t const endT = (size_t)(window->nextSrc - window->base);
+    U32 const end = (U32)endT;
+
+    window->lowLimit = end;
+    window->dictLimit = end;
+}
+
+/*
+ * ZSTD_window_hasExtDict():
+ * Returns non-zero if the window has a non-empty extDict.
+ */
+MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
+{
+    return window.lowLimit < window.dictLimit;
+}
+
+/*
+ * ZSTD_matchState_dictMode():
+ * Inspects the provided matchState and figures out what dictMode should be
+ * passed to the compressor.
+ */
+MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
+{
+    return ZSTD_window_hasExtDict(ms->window) ?
+        ZSTD_extDict :
+        ms->dictMatchState != NULL ?
+            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
+            ZSTD_noDict;
+}
+
+/*
+ * ZSTD_window_needOverflowCorrection():
+ * Returns non-zero if the indices are getting too large and need overflow
+ * protection.
+ */
+MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  void const* srcEnd)
+{
+    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    return curr > ZSTD_CURRENT_MAX;
+}
+
+/*
+ * ZSTD_window_correctOverflow():
+ * Reduces the indices to protect from index overflow.
+ * Returns the correction made to the indices, which must be applied to every
+ * stored index.
+ *
+ * The least significant cycleLog bits of the indices must remain the same,
+ * which may be 0. Every index up to maxDist in the past must be valid.
+ * NOTE: (maxDist & cycleMask) must be zero.
+ */
+MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+                                           U32 maxDist, void const* src)
+{
+    /* preemptive overflow correction:
+     * 1. correction is large enough:
+     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
+     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
+     *
+     *    current - newCurrent
+     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
+     *    > (3<<29) - (1<<chainLog)
+     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
+     *    > 1<<29
+     *
+     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
+     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
+     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
+     *    In 32-bit mode we are safe, because (chainLog <= 29), so
+     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
+     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
+     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
+     */
+    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const curr = (U32)((BYTE const*)src - window->base);
+    U32 const currentCycle0 = curr & cycleMask;
+    /* Exclude zero so that newCurrent - maxDist >= 1. */
+    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
+    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const correction = curr - newCurrent;
+    assert((maxDist & cycleMask) == 0);
+    assert(curr > newCurrent);
+    /* Loose bound, should be around 1<<29 (see above) */
+    assert(correction > 1<<28);
+
+    window->base += correction;
+    window->dictBase += correction;
+    if (window->lowLimit <= correction) window->lowLimit = 1;
+    else window->lowLimit -= correction;
+    if (window->dictLimit <= correction) window->dictLimit = 1;
+    else window->dictLimit -= correction;
+
+    /* Ensure we can still reference the full window. */
+    assert(newCurrent >= maxDist);
+    assert(newCurrent - maxDist >= 1);
+    /* Ensure that lowLimit and dictLimit didn't underflow. */
+    assert(window->lowLimit <= newCurrent);
+    assert(window->dictLimit <= newCurrent);
+
+    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
+             window->lowLimit);
+    return correction;
+}
+
+/*
+ * ZSTD_window_enforceMaxDist():
+ * Updates lowLimit so that:
+ *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
+ *
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
+ *
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
+ *
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
+ */
+MEM_STATIC void
+ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
+                     const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
+    if (blockEndIdx > maxDist + loadedDictEnd) {
+        U32 const newLowLimit = blockEndIdx - maxDist;
+        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
+        if (window->dictLimit < window->lowLimit) {
+            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
+                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
+            window->dictLimit = window->lowLimit;
+        }
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+    }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ *              loadedDictEnd uses same referential as window->base
+ *              maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    assert(loadedDictEndPtr != NULL);
+    assert(dictMatchStatePtr != NULL);
+    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+        U32 const loadedDictEnd = *loadedDictEndPtr;
+        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+        assert(blockEndIdx >= loadedDictEnd);
+
+        if (blockEndIdx > loadedDictEnd + maxDist) {
+            /* On reaching window size, dictionaries are invalidated.
+             * For simplification, if window size is reached anywhere within next block,
+             * the dictionary is invalidated for the full block.
+             */
+            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+            *loadedDictEndPtr = 0;
+            *dictMatchStatePtr = NULL;
+        } else {
+            if (*loadedDictEndPtr != 0) {
+                DEBUGLOG(6, "dictionary considered valid for current block");
+    }   }   }
+}
+
+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+    ZSTD_memset(window, 0, sizeof(*window));
+    window->base = (BYTE const*)"";
+    window->dictBase = (BYTE const*)"";
+    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
+    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
+    window->nextSrc = window->base + 1;   /* see issue #1241 */
+}
+
+/*
+ * ZSTD_window_update():
+ * Updates the window by appending [src, src + srcSize) to the window.
+ * If it is not contiguous, the current prefix becomes the extDict, and we
+ * forget about the extDict. Handles overlap of the prefix and extDict.
+ * Returns non-zero if the segment is contiguous.
+ */
+MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+                                  void const* src, size_t srcSize)
+{
+    BYTE const* const ip = (BYTE const*)src;
+    U32 contiguous = 1;
+    DEBUGLOG(5, "ZSTD_window_update");
+    if (srcSize == 0)
+        return contiguous;
+    assert(window->base != NULL);
+    assert(window->dictBase != NULL);
+    /* Check if blocks follow each other */
+    if (src != window->nextSrc) {
+        /* not contiguous */
+        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
+        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
+        window->lowLimit = window->dictLimit;
+        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
+        window->dictLimit = (U32)distanceFromBase;
+        window->dictBase = window->base;
+        window->base = ip - distanceFromBase;
+        /* ms->nextToUpdate = window->dictLimit; */
+        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
+        contiguous = 0;
+    }
+    window->nextSrc = ip + srcSize;
+    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+    if ( (ip+srcSize > window->dictBase + window->lowLimit)
+       & (ip < window->dictBase + window->dictLimit)) {
+        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
+        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+        window->lowLimit = lowLimitMax;
+        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
+    }
+    return contiguous;
+}
+
+/*
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.lowLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
+     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
+     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+/*
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.dictLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When computing the lowest prefix index we need to take the dictionary into account to handle
+     * the edge case where the dictionary and the source are contiguous in memory.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+
+
+/* debug functions */
+#if (DEBUGLEVEL>=2)
+
+MEM_STATIC double ZSTD_fWeight(U32 rawStat)
+{
+    U32 const fp_accuracy = 8;
+    U32 const fp_multiplier = (1 << fp_accuracy);
+    U32 const newStat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(newStat);
+    U32 const BWeight = hb * fp_multiplier;
+    U32 const FWeight = (newStat << fp_accuracy) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + fp_accuracy < 31);
+    return (double)weight / fp_multiplier;
+}
+
+/* display a table content,
+ * listing each element, its frequency, and its predicted bit cost */
+MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
+{
+    unsigned u, sum;
+    for (u=0, sum=0; u<=max; u++) sum += table[u];
+    DEBUGLOG(2, "total nb elts: %u", sum);
+    for (u=0; u<=max; u++) {
+        DEBUGLOG(2, "%2u: %5u  (%.2f)",
+                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
+    }
+}
+
+#endif
+
+
+
+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ *               and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
+
+/* ==============================================================
+ * Private declarations
+ * These prototypes shall only be called from within lib/compress
+ * ============================================================== */
+
+/* ZSTD_getCParamsFromCCtxParams() :
+ * cParams are built depending on compressionLevel, src size hints,
+ * LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
+ */
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+/*! ZSTD_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                     const ZSTD_CDict* cdict,
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr);
+
+/*! ZSTD_getCParamsFromCDict() :
+ *  as the name implies */
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
+
+/* ZSTD_compressBegin_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize);
+
+/* ZSTD_compress_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict,size_t dictSize,
+                                 const ZSTD_CCtx_params* params);
+
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+
+
+/* ZSTD_referenceExternalSequences() :
+ * Must be called before starting a compression operation.
+ * seqs must parse a prefix of the source.
+ * This cannot be used when long range matching is enabled.
+ * Zstd will use these sequences, and pass the literals to a secondary block
+ * compressor.
+ * @return : An error code on failure.
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+ * access and data corruption.
+ */
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+
+/* ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
+
+/* ZSTD_CCtx_trace() :
+ *  Trace the end of a compression call.
+ */
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+
+#endif /* ZSTD_COMPRESS_H */
diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c
new file mode 100644
index 0000000..655bcda
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_literals.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_literals.h"
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ZSTD_memcpy(ostart + flSize, src, srcSize);
+    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
+    return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
+    return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2)
+{
+    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE*  const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    symbolEncodingType_e hType = set_compressed;
+    size_t cLitSize;
+
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+                disableLiteralCompression, (U32)srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+    {   HUF_repeat repeat = prevHuf->repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ?
+            HUF_compress1X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+            HUF_compress4X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        if (repeat != HUF_repeat_none) {
+            /* reused the existing table */
+            DEBUGLOG(5, "Reusing previous huffman table");
+            hType = set_repeat;
+        }
+    }
+
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+    if (cLitSize==1) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
+
+    if (hType == set_compressed) {
+        /* using a newly constructed table */
+        nextHuf->repeatMode = HUF_repeat_check;
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
+    return lhSize+cLitSize;
+}
diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h
new file mode 100644
index 0000000..9904c0c
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_literals.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c
new file mode 100644
index 0000000..dcfcdc9
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_sequences.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_sequences.h"
+
+/*
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
+    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
+    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
+    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
+    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
+    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
+    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
+    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
+    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
+    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
+    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
+    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
+    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
+    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
+    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
+    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
+    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
+    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
+    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
+    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
+    5,    4,    2,    1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+  void const* ptr = ctable;
+  U16 const* u16ptr = (U16 const*)ptr;
+  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+  return maxSymbolValue;
+}
+
+/*
+ * Returns true if we should use ncount=-1 else we should
+ * use ncount=1 for low probability symbols instead.
+ */
+static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
+{
+    /* Heuristic: This should cover most blocks <= 16K and
+     * start to fade out after 16K to about 32K depending on
+     * comprssibility.
+     */
+    return nbSeq >= 2048;
+}
+
+/*
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+                              size_t const nbSeq, unsigned const FSELog)
+{
+    BYTE wksp[FSE_NCOUNTBOUND];
+    S16 norm[MaxSeq + 1];
+    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
+    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+    unsigned cost = 0;
+    unsigned s;
+    for (s = 0; s <= max; ++s) {
+        unsigned norm = (unsigned)((256 * count[s]) / total);
+        if (count[s] != 0 && norm == 0)
+            norm = 1;
+        assert(count[s] < total);
+        cost += count[s] * kInverseProbabilityLog256[norm];
+    }
+    return cost >> 8;
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max)
+{
+    unsigned const kAccuracyLog = 8;
+    size_t cost = 0;
+    unsigned s;
+    FSE_CState_t cstate;
+    FSE_initCState(&cstate, ctable);
+    if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
+        DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
+                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+        return ERROR(GENERIC);
+    }
+    for (s = 0; s <= max; ++s) {
+        unsigned const tableLog = cstate.stateLog;
+        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+        if (count[s] == 0)
+            continue;
+        if (bitCost >= badCost) {
+            DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
+            return ERROR(GENERIC);
+        }
+        cost += (size_t)count[s] * bitCost;
+    }
+    return cost >> kAccuracyLog;
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max)
+{
+    unsigned const shift = 8 - accuracyLog;
+    size_t cost = 0;
+    unsigned s;
+    assert(accuracyLog <= 8);
+    for (s = 0; s <= max; ++s) {
+        unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
+        unsigned const norm256 = normAcc << shift;
+        assert(norm256 > 0);
+        assert(norm256 < 256);
+        cost += count[s] * kInverseProbabilityLog256[norm256];
+    }
+    return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy)
+{
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if (mostFrequent == nbSeq) {
+        *repeatMode = FSE_repeat_none;
+        if (isDefaultAllowed && nbSeq <= 2) {
+            /* Prefer set_basic over set_rle when there are 2 or less symbols,
+             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+             * If basic encoding isn't possible, always choose RLE.
+             */
+            DEBUGLOG(5, "Selected set_basic");
+            return set_basic;
+        }
+        DEBUGLOG(5, "Selected set_rle");
+        return set_rle;
+    }
+    if (strategy < ZSTD_lazy) {
+        if (isDefaultAllowed) {
+            size_t const staticFse_nbSeq_max = 1000;
+            size_t const mult = 10 - strategy;
+            size_t const baseLog = 3;
+            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
+            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
+            assert(mult <= 9 && mult >= 7);
+            if ( (*repeatMode == FSE_repeat_valid)
+              && (nbSeq < staticFse_nbSeq_max) ) {
+                DEBUGLOG(5, "Selected set_repeat");
+                return set_repeat;
+            }
+            if ( (nbSeq < dynamicFse_nbSeq_min)
+              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+                DEBUGLOG(5, "Selected set_basic");
+                /* The format allows default tables to be repeated, but it isn't useful.
+                 * When using simple heuristics to select encoding type, we don't want
+                 * to confuse these tables with dictionaries. When running more careful
+                 * analysis, we don't need to waste time checking both repeating tables
+                 * and default tables.
+                 */
+                *repeatMode = FSE_repeat_none;
+                return set_basic;
+            }
+        }
+    } else {
+        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+        if (isDefaultAllowed) {
+            assert(!ZSTD_isError(basicCost));
+            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+        }
+        assert(!ZSTD_isError(NCountCost));
+        assert(compressedCost < ERROR(maxCode));
+        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+        if (basicCost <= repeatCost && basicCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_basic");
+            assert(isDefaultAllowed);
+            *repeatMode = FSE_repeat_none;
+            return set_basic;
+        }
+        if (repeatCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_repeat");
+            assert(!ZSTD_isError(repeatCost));
+            return set_repeat;
+        }
+        assert(compressedCost < basicCost && compressedCost < repeatCost);
+    }
+    DEBUGLOG(5, "Selected set_compressed");
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+typedef struct {
+    S16 norm[MaxSeq + 1];
+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
+} ZSTD_BuildCTableWksp;
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    const BYTE* const oend = op + dstCapacity;
+    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+    switch (type) {
+    case set_rle:
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
+        *op = codeTable[0];
+        return 1;
+    case set_repeat:
+        ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
+        return 0;
+    case set_basic:
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
+        return 0;
+    case set_compressed: {
+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        assert(nbSeq_1 > 1);
+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
+        (void)entropyWorkspaceSize;
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
+            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
+            return NCountSize;
+        }
+    }
+    default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    RETURN_ERROR_IF(
+        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+        dstSize_tooSmall, "not enough space remaining");
+    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
+                (int)(blockStream.endPtr - blockStream.startPtr),
+                (unsigned)dstCapacity);
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;
+            U32  const mlBits = ML_bits[mlCode];
+            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+                        (unsigned)sequences[n].litLength,
+                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].offset);
+                                                                            /* 32b*/  /* 64b*/
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+            if (longOffsets) {
+                unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+    }   }
+
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+        return streamSize;
+    }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+                                         CTable_MatchLength, mlCodeTable,
+                                         CTable_OffsetBits, ofCodeTable,
+                                         CTable_LitLength, llCodeTable,
+                                         sequences, nbSeq, longOffsets);
+    }
+#endif
+    (void)bmi2;
+    return ZSTD_encodeSequences_default(dst, dstCapacity,
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq, longOffsets);
+}
diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h
new file mode 100644
index 0000000..7991364
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_sequences.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
+#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
+
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize);
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max);
+
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max);
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c
new file mode 100644
index 0000000..b0610b2
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_superblock.c
@@ -0,0 +1,852 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_superblock.h"
+
+#include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
+#include "hist.h"                     /* HIST_countFast_wksp */
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+
+/*-*************************************
+*  Superblock entropy buffer structs
+***************************************/
+/* ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/* ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+
+/* ZSTD_buildSuperBlockEntropy_literal() :
+ *  Builds entropy for the super-block literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = 255;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable_wksp(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+                    nodeWksp, nodeWkspSize);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+/* ZSTD_buildSuperBlockEntropy_sequences() :
+ *  Builds entropy for the super-block sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
+    BYTE* const cTableWksp = countWkspStart + countWkspSize;
+    const size_t cTableWkspSize = wkspEnd-cTableWksp;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+
+    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
+    ZSTD_memset(workspace, 0, wkspSize);
+
+    fseMetadata->lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   U32 LLtype;
+        unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->llType = (symbolEncodingType_e) LLtype;
+    }   }
+    /* build CTable for Offsets */
+    {   U32 Offtype;
+        unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
+    }   }
+    /* build CTable for MatchLengths */
+    {   U32 MLtype;
+        unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
+    }   }
+    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
+    return op-ostart;
+}
+
+
+/* ZSTD_buildSuperBlockEntropy() :
+ *  Builds entropy for the super-block.
+ *  @return : 0 on success or error code */
+static size_t
+ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
+                      const ZSTD_entropyCTables_t* prevEntropy,
+                            ZSTD_entropyCTables_t* nextEntropy,
+                      const ZSTD_CCtx_params* cctxParams,
+                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
+    return 0;
+}
+
+/* ZSTD_compressSubBlock_literal() :
+ *  Compresses literals section for a sub-block.
+ *  When we have to write the Huffman table we will sometimes choose a header
+ *  size larger than necessary. This is because we have to pick the header size
+ *  before we know the table size + compressed size, so we have a bound on the
+ *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ *  We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
+ *  in writing the header, otherwise it is set to 0.
+ *
+ *  hufMetadata->hType has literals block type info.
+ *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
+ *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
+ *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+ *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+ *  @return : compressed size of literals section of a sub-block
+ *            Or 0 if it unable to compress.
+ *            Or error code */
+static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                    const BYTE* literals, size_t litSize,
+                                    void* dst, size_t dstSize,
+                                    const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    size_t const header = writeEntropy ? 200 : 0;
+    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart + lhSize;
+    U32 const singleStream = lhSize == 3;
+    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+    size_t cLitSize = 0;
+
+    (void)bmi2; /* TODO bmi2... */
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+
+    *entropyWritten = 0;
+    if (litSize == 0 || hufMetadata->hType == set_basic) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
+      return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+    } else if (hufMetadata->hType == set_rle) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
+      return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
+    }
+
+    assert(litSize > 0);
+    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+    if (writeEntropy && hufMetadata->hType == set_compressed) {
+        ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
+        op += hufMetadata->hufDesSize;
+        cLitSize += hufMetadata->hufDesSize;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+    }
+
+    /* TODO bmi2 */
+    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
+        op += cSize;
+        cLitSize += cSize;
+        if (cSize == 0 || ERR_isError(cSize)) {
+            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+            return 0;
+        }
+        /* If we expand and we aren't writing a header then emit uncompressed */
+        if (!writeEntropy && cLitSize >= litSize) {
+            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        /* If we are writing headers then allow expansion that doesn't change our header size. */
+        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+            assert(cLitSize > litSize);
+            DEBUGLOG(5, "Literals expanded beyond allowed header size");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    *entropyWritten = 1;
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+    return op-ostart;
+}
+
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+    const seqDef* const sstart = sequences;
+    const seqDef* const send = sequences + nbSeq;
+    const seqDef* sp = sstart;
+    size_t matchLengthSum = 0;
+    size_t litLengthSum = 0;
+    /* Only used by assert(), suppress unused variable warnings in production. */
+    (void)litLengthSum;
+    while (send-sp > 0) {
+        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+        litLengthSum += seqLen.litLength;
+        matchLengthSum += seqLen.matchLength;
+        sp++;
+    }
+    assert(litLengthSum <= litSize);
+    if (!lastSequence) {
+        assert(litLengthSum == litSize);
+    }
+    return matchLengthSum + litSize;
+}
+
+/* ZSTD_compressSubBlock_sequences() :
+ *  Compresses sequences section for a sub-block.
+ *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
+ *  symbol compression modes for the super-block.
+ *  The first successfully compressed block will have these in its header.
+ *  We set entropyWritten=1 when we succeed in compressing the sequences.
+ *  The following sub-blocks will always have repeat mode.
+ *  @return : compressed size of sequences section of a sub-block
+ *            Or 0 if it is unable to compress
+ *            Or error code. */
+static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                              const seqDef* sequences, size_t nbSeq,
+                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                              void* dst, size_t dstCapacity,
+                                              const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    BYTE* seqHead;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
+
+    *entropyWritten = 0;
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "");
+    if (nbSeq < 0x7F)
+        *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ)
+        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else
+        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    if (nbSeq==0) {
+        return op - ostart;
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
+
+    if (writeEntropy) {
+        const U32 LLtype = fseMetadata->llType;
+        const U32 Offtype = fseMetadata->ofType;
+        const U32 MLtype = fseMetadata->mlType;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
+        *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+        ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
+        op += fseMetadata->fseTablesSize;
+    } else {
+        const U32 repeat = set_repeat;
+        *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
+    }
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, oend - op,
+                                        fseTables->matchlengthCTable, mlCode,
+                                        fseTables->offcodeCTable, ofCode,
+                                        fseTables->litlengthCTable, llCode,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+        if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(fseMetadata->lastCountSize + bitstreamSize == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+#endif
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
+    }
+
+    /* zstd versions <= 1.4.0 mistakenly report error when
+     * sequences section body size is less than 3 bytes.
+     * Fixed by https://github.com/facebook/zstd/pull/1664.
+     * This can happen when the previous sequences section block is compressed
+     * with rle mode and the current block's sequences section is compressed
+     * with repeat mode where sequences section body size can be 1 byte.
+     */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (op-seqHead < 4) {
+        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+                    "an uncompressed block when sequences are < 4 bytes");
+        return 0;
+    }
+#endif
+
+    *entropyWritten = 1;
+    return op - ostart;
+}
+
+/* ZSTD_compressSubBlock() :
+ *  Compresses a single sub-block.
+ *  @return : compressed size of the sub-block
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                    const seqDef* sequences, size_t nbSeq,
+                                    const BYTE* literals, size_t litSize,
+                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                    const ZSTD_CCtx_params* cctxParams,
+                                    void* dst, size_t dstCapacity,
+                                    const int bmi2,
+                                    int writeLitEntropy, int writeSeqEntropy,
+                                    int* litEntropyWritten, int* seqEntropyWritten,
+                                    U32 lastBlock)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+    {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+                                                        &entropyMetadata->hufMetadata, literals, litSize,
+                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+        FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+        if (cLitSize == 0) return 0;
+        op += cLitSize;
+    }
+    {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
+                                                  &entropyMetadata->fseMetadata,
+                                                  sequences, nbSeq,
+                                                  llCode, mlCode, ofCode,
+                                                  cctxParams,
+                                                  op, oend-op,
+                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
+        FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+        if (cSeqSize == 0) return 0;
+        op += cSeqSize;
+    }
+    /* Write block header */
+    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+        U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+        MEM_writeLE24(ostart, cBlockHeader24);
+    }
+    return op-ostart;
+}
+
+static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = 255;
+    size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, unsigned maxCode,
+                        size_t nbSeq, const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        cSymbolTypeSizeEstimateInBits = max <= defaultMax
+                ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
+                : ERROR(GENERIC);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits / 8;
+}
+
+static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
+                                         nbSeq, fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
+                                         nbSeq, fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
+                                         nbSeq, fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                        const BYTE* ofCodeTable,
+                                        const BYTE* llCodeTable,
+                                        const BYTE* mlCodeTable,
+                                        size_t nbSeq,
+                                        const ZSTD_entropyCTables_t* entropy,
+                                        const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                        void* workspace, size_t wkspSize,
+                                        int writeLitEntropy, int writeSeqEntropy) {
+    size_t cSizeEstimate = 0;
+    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return cSizeEstimate + ZSTD_blockHeaderSize;
+}
+
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+        return 1;
+    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+        return 1;
+    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+        return 1;
+    return 0;
+}
+
+/* ZSTD_compressSubBlock_multi() :
+ *  Breaks super-block into multiple sub-blocks and compresses them.
+ *  Entropy will be written to the first block.
+ *  The following blocks will use repeat mode to compress.
+ *  All sub-blocks are compressed blocks (no raw or rle blocks).
+ *  @return : compressed size of the super block (which is multiple ZSTD blocks)
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+                            const ZSTD_compressedBlockState_t* prevCBlock,
+                            ZSTD_compressedBlockState_t* nextCBlock,
+                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            const ZSTD_CCtx_params* cctxParams,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const int bmi2, U32 lastBlock,
+                            void* workspace, size_t wkspSize)
+{
+    const seqDef* const sstart = seqStorePtr->sequencesStart;
+    const seqDef* const send = seqStorePtr->sequences;
+    const seqDef* sp = sstart;
+    const BYTE* const lstart = seqStorePtr->litStart;
+    const BYTE* const lend = seqStorePtr->lit;
+    const BYTE* lp = lstart;
+    BYTE const* ip = (BYTE const*)src;
+    BYTE const* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    const BYTE* llCodePtr = seqStorePtr->llCode;
+    const BYTE* mlCodePtr = seqStorePtr->mlCode;
+    const BYTE* ofCodePtr = seqStorePtr->ofCode;
+    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+    size_t litSize, seqCount;
+    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeSeqEntropy = 1;
+    int lastSequence = 0;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+                (unsigned)(lend-lp), (unsigned)(send-sstart));
+
+    litSize = 0;
+    seqCount = 0;
+    do {
+        size_t cBlockSizeEstimate = 0;
+        if (sstart == send) {
+            lastSequence = 1;
+        } else {
+            const seqDef* const sequence = sp + seqCount;
+            lastSequence = sequence == send - 1;
+            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+            seqCount++;
+        }
+        if (lastSequence) {
+            assert(lp <= lend);
+            assert(litSize <= (size_t)(lend - lp));
+            litSize = (size_t)(lend - lp);
+        }
+        /* I think there is an optimization opportunity here.
+         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+         * since it recalculates estimate from scratch.
+         * For example, it would recount literal distribution and symbol codes everytime.
+         */
+        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+                                                       &nextCBlock->entropy, entropyMetadata,
+                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+            int litEntropyWritten = 0;
+            int seqEntropyWritten = 0;
+            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                       sp, seqCount,
+                                                       lp, litSize,
+                                                       llCodePtr, mlCodePtr, ofCodePtr,
+                                                       cctxParams,
+                                                       op, oend-op,
+                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+                                                       &litEntropyWritten, &seqEntropyWritten,
+                                                       lastBlock && lastSequence);
+            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+            if (cSize > 0 && cSize < decompressedSize) {
+                DEBUGLOG(5, "Committed the sub-block");
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
+                sp += seqCount;
+                lp += litSize;
+                op += cSize;
+                llCodePtr += seqCount;
+                mlCodePtr += seqCount;
+                ofCodePtr += seqCount;
+                litSize = 0;
+                seqCount = 0;
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
+                }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
+            }
+        }
+    } while (!lastSequence);
+    if (writeLitEntropy) {
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+    }
+    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+        /* If we haven't written our entropy tables, then we've violated our contract and
+         * must emit an uncompressed block.
+         */
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        return 0;
+    }
+    if (ip < iend) {
+        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            seqDef const* seq;
+            repcodes_t rep;
+            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+            for (seq = sstart; seq < sp; ++seq) {
+                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+    return op-ostart;
+}
+
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+
+    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+          &zc->blockState.prevCBlock->entropy,
+          &zc->blockState.nextCBlock->entropy,
+          &zc->appliedParams,
+          &entropyMetadata,
+          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+
+    return ZSTD_compressSubBlock_multi(&zc->seqStore,
+            zc->blockState.prevCBlock,
+            zc->blockState.nextCBlock,
+            &entropyMetadata,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            src, srcSize,
+            zc->bmi2, lastBlock,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+}
diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h
new file mode 100644
index 0000000..224ece7
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_superblock.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_ADVANCED_H
+#define ZSTD_COMPRESS_ADVANCED_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+
+#include <linux/zstd.h> /* ZSTD_CCtx */
+
+/*-*************************************
+*  Target Compressed Block Size
+***************************************/
+
+/* ZSTD_compressSuperBlock() :
+ * Used to compress a super block when targetCBlockSize is being used.
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock);
+
+#endif /* ZSTD_COMPRESS_ADVANCED_H */
diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h
new file mode 100644
index 0000000..98e359a
--- /dev/null
+++ b/lib/zstd/compress/zstd_cwksp.h
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_internal.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+/*-*************************************
+*  Structures
+***************************************/
+typedef enum {
+    ZSTD_cwksp_alloc_objects,
+    ZSTD_cwksp_alloc_buffers,
+    ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/*
+ * Used to describe whether the workspace is statically allocated (and will not
+ * necessarily ever be freed), or if it's dynamically allocated and we can
+ * expect a well-formed caller to free this.
+ */
+typedef enum {
+    ZSTD_cwksp_dynamic_alloc,
+    ZSTD_cwksp_static_alloc
+} ZSTD_cwksp_static_alloc_e;
+
+/*
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ *   - The static objects need to be cleared once and can then be trivially
+ *     reused for each compression.
+ *
+ *   - Various buffers don't need to be initialized at all--they are always
+ *     written into before they're read.
+ *
+ *   - The matchstate tables have a unique requirement that they don't need
+ *     their memory to be totally cleared, but they do need the memory to have
+ *     some bound, i.e., a guarantee that all values in the memory they've been
+ *     allocated is less than some maximum value (which is the starting value
+ *     for the indices that they will then use for compression). When this
+ *     guarantee is provided to them, they can use the memory without any setup
+ *     work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ *   compressions without having to perform any expensive reallocation or
+ *   reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ *   multiple compressions **even when the compression parameters change** and
+ *   we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [                        ... workspace ...                         ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ *   so that literally everything fits in a single buffer. Note: if present,
+ *   this must be the first object in the workspace, since ZSTD_customFree{CCtx,
+ *   CDict}() rely on a pointer comparison to see whether one or two frees are
+ *   required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ *   nonetheless "dynamically" allocated in the workspace so that we can
+ *   control how they're initialized separately from the broader ZSTD_CCtx.
+ *   Examples:
+ *   - Entropy Workspace
+ *   - 2 x ZSTD_compressedBlockState_t
+ *   - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ *   chain tables, binary trees) that all respect a common format: they are
+ *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ *   Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ *   alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ *   any alignment or initialization before they're used. This means they can
+ *   be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+    void* workspace;
+    void* workspaceEnd;
+
+    void* objectEnd;
+    void* tableEnd;
+    void* tableValidEnd;
+    void* allocStart;
+
+    BYTE allocFailed;
+    int workspaceOversizedDuration;
+    ZSTD_cwksp_alloc_phase_e phase;
+    ZSTD_cwksp_static_alloc_e isStatic;
+} ZSTD_cwksp;
+
+/*-*************************************
+*  Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+    (void)ws;
+    assert(ws->workspace <= ws->objectEnd);
+    assert(ws->objectEnd <= ws->tableEnd);
+    assert(ws->objectEnd <= ws->tableValidEnd);
+    assert(ws->tableEnd <= ws->allocStart);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/*
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+    size_t const mask = align - 1;
+    assert((align & mask) == 0);
+    return (size + mask) & ~mask;
+}
+
+/*
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+    if (size == 0)
+        return 0;
+    return size;
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+    assert(phase >= ws->phase);
+    if (phase > ws->phase) {
+        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+                phase >= ZSTD_cwksp_alloc_buffers) {
+            ws->tableValidEnd = ws->objectEnd;
+        }
+        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+                phase >= ZSTD_cwksp_alloc_aligned) {
+            /* If unaligned allocations down from a too-large top have left us
+             * unaligned, we need to realign our alloc ptr. Technically, this
+             * can consume space that is unaccounted for in the neededSpace
+             * calculation. However, I believe this can only happen when the
+             * workspace is too large, and specifically when it is too large
+             * by a larger margin than the space that will be consumed. */
+            /* TODO: cleaner, compiler warning friendly way to do this??? */
+            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+            if (ws->allocStart < ws->tableValidEnd) {
+                ws->tableValidEnd = ws->allocStart;
+            }
+        }
+        ws->phase = phase;
+    }
+}
+
+/*
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/*
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+    void* alloc;
+    void* bottom = ws->tableEnd;
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    alloc = (BYTE *)ws->allocStart - bytes;
+
+    if (bytes == 0)
+        return NULL;
+
+
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+
+
+    return alloc;
+}
+
+/*
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/*
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/*
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+    void* alloc = ws->tableEnd;
+    void* end = (BYTE *)alloc + bytes;
+    void* top = ws->allocStart;
+
+    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(end <= top);
+    if (end > top) {
+        DEBUGLOG(4, "cwksp: table alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->tableEnd = end;
+
+
+    return alloc;
+}
+
+/*
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+    void* alloc = ws->objectEnd;
+    void* end = (BYTE*)alloc + roundedBytes;
+
+
+    DEBUGLOG(5,
+        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+    assert((bytes & (sizeof(void*)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    /* we must be in the first phase, no advance is possible */
+    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+        DEBUGLOG(4, "cwksp: object alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->objectEnd = end;
+    ws->tableEnd = end;
+    ws->tableValidEnd = end;
+
+
+    return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    ws->tableValidEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ws->tableValidEnd = ws->tableEnd;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+    }
+    ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/*
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing tables!");
+
+
+    ws->tableEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing!");
+
+
+
+    ws->tableEnd = ws->objectEnd;
+    ws->allocStart = ws->workspaceEnd;
+    ws->allocFailed = 0;
+    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+        ws->phase = ZSTD_cwksp_alloc_buffers;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
+    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+    ws->workspace = start;
+    ws->workspaceEnd = (BYTE*)start + size;
+    ws->objectEnd = ws->workspace;
+    ws->tableValidEnd = ws->objectEnd;
+    ws->phase = ZSTD_cwksp_alloc_objects;
+    ws->isStatic = isStatic;
+    ZSTD_cwksp_clear(ws);
+    ws->workspaceOversizedDuration = 0;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+    void* workspace = ZSTD_customMalloc(size, customMem);
+    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
+    ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
+    return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+    void *ptr = ws->workspace;
+    DEBUGLOG(4, "cwksp: freeing workspace");
+    ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
+    ZSTD_customFree(ptr, customMem);
+}
+
+/*
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before it's used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+    *dst = *src;
+    ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+    return ws->allocFailed;
+}
+
+/*-*************************************
+*  Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_available(
+        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+        ws->workspaceOversizedDuration++;
+    } else {
+        ws->workspaceOversizedDuration = 0;
+    }
+}
+
+
+#endif /* ZSTD_CWKSP_H */
diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c
new file mode 100644
index 0000000..b0424d2
--- /dev/null
+++ b/lib/zstd/compress/zstd_double_fast.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "zstd_double_fast.h"
+
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashLarge = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash tables.
+     * Insert the other positions into the large hash table if their entry
+     * is empty.
+     */
+    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        U32 i;
+        for (i = 0; i < fastHashFillStep; ++i) {
+            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
+            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
+            if (i == 0)
+                hashSmall[smHash] = curr + i;
+            if (i == 0 || hashLarge[lgHash] == 0)
+                hashLarge[lgHash] = curr + i;
+            /* Only load extra positions for ZSTD_dtlm_full */
+            if (dtlm == ZSTD_dtlm_fast)
+                break;
+    }   }
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_doubleFast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    const U32 hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    const U32 hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    /* presumes that, if there is a dictionary, it must be using Attach mode */
+    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams =
+                                     dictMode == ZSTD_dictMatchState ?
+                                     &dms->cParams : NULL;
+    const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ?
+                                     dms->hashTable : NULL;
+    const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
+                                     dms->chainTable : NULL;
+    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.base : NULL;
+    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
+                                     dictBase + dictStartIndex : NULL;
+    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->hashLog : hBitsL;
+    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->chainLog : hBitsS;
+    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
+    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+
+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
+    }
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+    if (dictMode == ZSTD_dictMatchState) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        U32 offset;
+        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
+        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndexL = hashLong[h2];
+        U32 matchIndexS = hashSmall[h];
+        const BYTE* matchLong = base + matchIndexL;
+        const BYTE* match = base + matchIndexS;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
+                            && repIndex < prefixLowestIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
+
+        /* check dictMatchState repcode */
+        if (dictMode == ZSTD_dictMatchState
+            && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        /* check noDict repcode */
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        if (matchIndexL > prefixLowestIndex) {
+            /* check prefix long match */
+            if (MEM_read64(matchLong) == MEM_read64(ip)) {
+                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+                offset = (U32)(ip-matchLong);
+                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+                goto _match_found;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState long match */
+            U32 const dictMatchIndexL = dictHashLong[dictHL];
+            const BYTE* dictMatchL = dictBase + dictMatchIndexL;
+            assert(dictMatchL < dictEnd);
+
+            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
+                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
+                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
+                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
+                goto _match_found;
+        }   }
+
+        if (matchIndexS > prefixLowestIndex) {
+            /* check prefix short match */
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState short match */
+            U32 const dictMatchIndexS = dictHashSmall[dictHS];
+            match = dictBase + dictMatchIndexS;
+            matchIndexS = dictMatchIndexS + dictIndexDelta;
+
+            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+        }   }
+
+        ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+        PREFETCH_L1(ip+256);
+#endif
+        continue;
+
+_search_next_long:
+
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
+            U32 const matchIndexL3 = hashLong[hl3];
+            const BYTE* matchL3 = base + matchIndexL3;
+            hashLong[hl3] = curr + 1;
+
+            /* check prefix long +1 match */
+            if (matchIndexL3 > prefixLowestIndex) {
+                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
+                    ip++;
+                    offset = (U32)(ip-matchL3);
+                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+                }
+            } else if (dictMode == ZSTD_dictMatchState) {
+                /* check dict long +1 match */
+                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
+                const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
+                assert(dictMatchL3 < dictEnd);
+                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
+                    ip++;
+                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
+                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+        }   }   }
+
+        /* if no long +1 match, explore the short match we found */
+        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
+            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
+            offset = (U32)(curr - matchIndexS);
+            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        } else {
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            offset = (U32)(ip - match);
+            while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        }
+
+_match_found:
+        offset_2 = offset_1;
+        offset_1 = offset;
+
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+_match_stored:
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            if (dictMode == ZSTD_dictMatchState) {
+                while (ip <= ilimit) {
+                    U32 const current2 = (U32)(ip-base);
+                    U32 const repIndex2 = current2 - offset_2;
+                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
+                        && repIndex2 < prefixLowestIndex ?
+                            dictBase + repIndex2 - dictIndexDelta :
+                            base + repIndex2;
+                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
+                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
+                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                        ip += repLength2;
+                        anchor = ip;
+                        continue;
+                    }
+                    break;
+            }   }
+
+            if (dictMode == ZSTD_noDict) {
+                while ( (ip <= ilimit)
+                     && ( (offset_2>0)
+                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
+                    ip += rLength;
+                    anchor = ip;
+                    continue;   /* faster when present ... (?) */
+        }   }   }
+    }   /* while (ip < ilimit) */
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
+    }
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
+
+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
+        const U32 matchIndex = hashSmall[hSmall];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+
+        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
+        const U32 matchLongIndex = hashLong[hLong];
+        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* matchLong = matchLongBase + matchLongIndex;
+
+        const U32 curr = (U32)(ip-base);
+        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        size_t mLength;
+        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
+
+        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
+            & (repIndex > dictStartIndex))
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else {
+            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
+                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 offset;
+                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
+                offset = curr - matchLongIndex;
+                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
+                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+                U32 const matchIndex3 = hashLong[h3];
+                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
+                const BYTE* match3 = match3Base + matchIndex3;
+                U32 offset;
+                hashLong[h3] = curr + 1;
+                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
+                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
+                    ip++;
+                    offset = curr+1 - matchIndex3;
+                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+                } else {
+                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                    offset = curr - matchIndex;
+                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                }
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else {
+                ip += ((ip-anchor) >> kSearchStrength) + 1;
+                continue;
+        }   }
+
+        /* move to next sequence start */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
+                    & (repIndex2 > dictStartIndex))
+                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h
new file mode 100644
index 0000000..6822bde
--- /dev/null
+++ b/lib/zstd/compress/zstd_double_fast.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_DOUBLE_FAST_H
+#define ZSTD_DOUBLE_FAST_H
+
+
+#include "../common/mem.h"      /* U32 */
+#include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+
+#endif /* ZSTD_DOUBLE_FAST_H */
diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c
new file mode 100644
index 0000000..96b7d48
--- /dev/null
+++ b/lib/zstd/compress/zstd_fast.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
+#include "zstd_fast.h"
+
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hBits = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash table.
+     * Insert the other positions if their hash entry is empty.
+     */
+    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
+        hashTable[hash0] = curr;
+        if (dtlm == ZSTD_dtlm_fast) continue;
+        /* Only load extra positions for ZSTD_dtlm_full */
+        {   U32 p;
+            for (p = 1; p < fastHashFillStep; ++p) {
+                size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
+                if (hashTable[hash] == 0) {  /* not yet filled */
+                    hashTable[hash] = curr + p;
+    }   }   }   }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
+    const BYTE* ip0 = istart;
+    const BYTE* ip1;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
+    ip0 += (ip0 == prefixStart);
+    ip1 = ip0 + 1;
+    {   U32 const curr = (U32)(ip0 - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Main Search Loop */
+#ifdef __INTEL_COMPILER
+    /* From intel 'The vector pragma indicates that the loop should be
+     * vectorized if it is legal to do so'. Can be used together with
+     * #pragma ivdep (but have opted to exclude that because intel
+     * warns against using it).*/
+    #pragma vector always
+#endif
+    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
+        size_t mLength;
+        BYTE const* ip2 = ip0 + 2;
+        size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
+        U32 const val0 = MEM_read32(ip0);
+        size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
+        U32 const val1 = MEM_read32(ip1);
+        U32 const current0 = (U32)(ip0-base);
+        U32 const current1 = (U32)(ip1-base);
+        U32 const matchIndex0 = hashTable[h0];
+        U32 const matchIndex1 = hashTable[h1];
+        BYTE const* repMatch = ip2 - offset_1;
+        const BYTE* match0 = base + matchIndex0;
+        const BYTE* match1 = base + matchIndex1;
+        U32 offcode;
+
+#if defined(__aarch64__)
+        PREFETCH_L1(ip0+256);
+#endif
+
+        hashTable[h0] = current0;   /* update hash table */
+        hashTable[h1] = current1;   /* update hash table */
+
+        assert(ip0 + 1 == ip1);
+
+        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
+            mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
+            ip0 = ip2 - mLength;
+            match0 = repMatch - mLength;
+            mLength += 4;
+            offcode = 0;
+            goto _match;
+        }
+        if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
+            /* found a regular match */
+            goto _offset;
+        }
+        if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
+            /* found a regular match after one literal */
+            ip0 = ip1;
+            match0 = match1;
+            goto _offset;
+        }
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+            assert(step >= 2);
+            ip0 += step;
+            ip1 += step;
+            continue;
+        }
+_offset: /* Requires: ip0, match0 */
+        /* Compute the offset code */
+        offset_2 = offset_1;
+        offset_1 = (U32)(ip0-match0);
+        offcode = offset_1 + ZSTD_REP_MOVE;
+        mLength = 4;
+        /* Count the backwards match length */
+        while (((ip0>anchor) & (match0>prefixStart))
+             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
+
+_match: /* Requires: ip0, match0, offcode */
+        /* Count the forward length */
+        mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
+        /* match found */
+        ip0 += mLength;
+        anchor = ip0;
+
+        if (ip0 <= ilimit) {
+            /* Fill Table */
+            assert(base+current0+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+
+            if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
+                while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+                    { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                    ip0 += rLength;
+                    ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                    anchor = ip0;
+                    continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+        }   }   }
+        ip1 = ip0 + 1;
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState == NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   prefixStartIndex = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
+    const U32* const dictHashTable = dms->hashTable;
+    const U32 dictStartIndex       = dms->window.dictLimit;
+    const BYTE* const dictBase     = dms->window.base;
+    const BYTE* const dictStart    = dictBase + dictStartIndex;
+    const BYTE* const dictEnd      = dms->window.nextSrc;
+    const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
+    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
+    const U32 dictHLog             = dictCParams->hashLog;
+
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+
+    /* ensure there will be no underflow
+     * when translating a dict index into a local index */
+    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
+    ip += (dictAndPrefixLength == 0);
+    /* dictMatchState repCode checks don't currently handle repCode == 0
+     * disabling. */
+    assert(offset_1 <= dictAndPrefixLength);
+    assert(offset_2 <= dictAndPrefixLength);
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndex = hashTable[h];
+        const BYTE* match = base + matchIndex;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+
+        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else if ( (matchIndex <= prefixStartIndex) ) {
+            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
+            U32 const dictMatchIndex = dictHashTable[dictHash];
+            const BYTE* dictMatch = dictBase + dictMatchIndex;
+            if (dictMatchIndex <= dictStartIndex ||
+                MEM_read32(dictMatch) != MEM_read32(ip)) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            } else {
+                /* found a dict match */
+                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
+                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
+                while (((ip>anchor) & (dictMatch>dictStart))
+                     && (ip[-1] == dictMatch[-1])) {
+                    ip--; dictMatch--; mLength++;
+                } /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            }
+        } else if (MEM_read32(match) != MEM_read32(ip)) {
+            /* it's not a match, and we're not going to check the dictionary */
+            assert(stepSize >= 1);
+            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+            continue;
+        } else {
+            /* found a regular match */
+            U32 const offset = (U32)(ip-match);
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            while (((ip>anchor) & (match>prefixStart))
+                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+            offset_2 = offset_1;
+            offset_1 = offset;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        }
+
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            assert(base+curr+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
+                        dictBase - dictIndexDelta + repIndex2 :
+                        base + repIndex2;
+                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState != NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_fast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
+
+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
+        const U32    matchIndex = hashTable[h];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE*  match = matchBase + matchIndex;
+        const U32    curr = (U32)(ip-base);
+        const U32    repIndex = curr + 1 - offset_1;
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
+        assert(offset_1 <= curr +1);   /* check repIndex */
+
+        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ip += rLength;
+            anchor = ip;
+        } else {
+            if ( (matchIndex < dictStartIndex) ||
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            }
+            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 const offset = curr - matchIndex;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ip += mLength;
+                anchor = ip;
+        }   }
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h
new file mode 100644
index 0000000..fddc2f5
--- /dev/null
+++ b/lib/zstd/compress/zstd_fast.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_FAST_H
+#define ZSTD_FAST_H
+
+
+#include "../common/mem.h"      /* U32 */
+#include "zstd_compress_internal.h"
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#endif /* ZSTD_FAST_H */
diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c
new file mode 100644
index 0000000..fb54d4e
--- /dev/null
+++ b/lib/zstd/compress/zstd_lazy.c
@@ -0,0 +1,1414 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "zstd_lazy.h"
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+
+static void
+ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+                const BYTE* ip, const BYTE* iend,
+                U32 mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hashLog = cParams->hashLog;
+
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    if (idx != target)
+        DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
+                    idx, target, ms->window.dictLimit);
+    assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
+    (void)iend;
+
+    assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
+    for ( ; idx < target ; idx++) {
+        size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
+        U32    const matchIndex = hashTable[h];
+
+        U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
+        U32*   const sortMarkPtr  = nextCandidatePtr + 1;
+
+        DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
+        hashTable[h] = idx;   /* Update Hash Table */
+        *nextCandidatePtr = matchIndex;   /* update BT like a chain */
+        *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
+    }
+    ms->nextToUpdate = target;
+}
+
+
+/* ZSTD_insertDUBT1() :
+ *  sort one already inserted but unsorted position
+ *  assumption : curr >= btlow == (curr - btmask)
+ *  doesn't fail */
+static void
+ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
+                 U32 curr, const BYTE* inputEnd,
+                 U32 nbCompares, U32 btLow,
+                 const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
+    const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
+
+
+    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
+                curr, dictLimit, windowLow);
+    assert(curr >= btLow);
+    assert(ip < iend);   /* condition for ZSTD_count */
+
+    for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+        /* note : all candidates are now supposed sorted,
+         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
+         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
+
+        if ( (dictMode != ZSTD_extDict)
+          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
+          || (curr < dictLimit) /* both in extDict */) {
+            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
+                                     || (matchIndex+matchLength >= dictLimit)) ?
+                                        base : dictBase;
+            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
+                 || (curr < dictLimit) );
+            match = mBase + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
+        }
+
+        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
+                    curr, matchIndex, (U32)matchLength);
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
+                        matchIndex, btLow, nextPtr[1]);
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
+                        matchIndex, btLow, nextPtr[0]);
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+}
+
+
+static size_t
+ZSTD_DUBT_findBetterDictMatch (
+        ZSTD_matchState_t* ms,
+        const BYTE* const ip, const BYTE* const iend,
+        size_t* offsetPtr,
+        size_t bestLength,
+        U32 nbCompares,
+        U32 const mls,
+        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_matchState_t * const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
+    const U32 * const dictHashTable = dms->hashTable;
+    U32         const hashLog = dmsCParams->hashLog;
+    size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32               dictMatchIndex = dictHashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    U32         const curr = (U32)(ip-base);
+    const BYTE* const dictBase = dms->window.base;
+    const BYTE* const dictEnd = dms->window.nextSrc;
+    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
+    U32         const dictLowLimit = dms->window.lowLimit;
+    U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
+
+    U32*        const dictBt = dms->chainTable;
+    U32         const btLog  = dmsCParams->chainLog - 1;
+    U32         const btMask = (1 << btLog) - 1;
+    U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
+
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+
+    (void)dictMode;
+    assert(dictMode == ZSTD_dictMatchState);
+
+    for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
+        U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match = dictBase + dictMatchIndex;
+        matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+        if (dictMatchIndex+matchLength >= dictHighLimit)
+            match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+        if (matchLength > bestLength) {
+            U32 matchIndex = dictMatchIndex + dictIndexDelta;
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
+                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+            }
+            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthLarger = matchLength;
+            dictMatchIndex = nextPtr[0];
+        }
+    }
+
+    if (bestLength >= MINMATCH) {
+        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                    curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+    }
+    return bestLength;
+
+}
+
+
+static size_t
+ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 const mls,
+                        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32          matchIndex  = hashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    U32    const curr = (U32)(ip-base);
+    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32    const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32    const unsortLimit = MAX(btLow, windowLow);
+
+    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
+    U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+    U32          nbCompares = 1U << cParams->searchLog;
+    U32          nbCandidates = nbCompares;
+    U32          previousCandidate = 0;
+
+    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
+    assert(ip <= iend-8);   /* required for h calculation */
+    assert(dictMode != ZSTD_dedicatedDictSearch);
+
+    /* reach end of unsorted candidates list */
+    while ( (matchIndex > unsortLimit)
+         && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
+         && (nbCandidates > 1) ) {
+        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
+                    matchIndex);
+        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
+        previousCandidate = matchIndex;
+        matchIndex = *nextCandidate;
+        nextCandidate = bt + 2*(matchIndex&btMask);
+        unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+        nbCandidates --;
+    }
+
+    /* nullify last candidate if it's still unsorted
+     * simplification, detrimental to compression ratio, beneficial for speed */
+    if ( (matchIndex > unsortLimit)
+      && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
+        DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
+                    matchIndex);
+        *nextCandidate = *unsortedMark = 0;
+    }
+
+    /* batch sort stacked candidates */
+    matchIndex = previousCandidate;
+    while (matchIndex) {  /* will end on matchIndex == 0 */
+        U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
+        U32 const nextCandidateIdx = *nextCandidateIdxPtr;
+        ZSTD_insertDUBT1(ms, matchIndex, iend,
+                         nbCandidates, unsortLimit, dictMode);
+        matchIndex = nextCandidateIdx;
+        nbCandidates++;
+    }
+
+    /* find longest match */
+    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+        const BYTE* const dictBase = ms->window.dictBase;
+        const U32 dictLimit = ms->window.dictLimit;
+        const BYTE* const dictEnd = dictBase + dictLimit;
+        const BYTE* const prefixStart = base + dictLimit;
+        U32* smallerPtr = bt + 2*(curr&btMask);
+        U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+        U32 matchEndIdx = curr + 8 + 1;
+        U32 dummy32;   /* to be nullified at the end */
+        size_t bestLength = 0;
+
+        matchIndex  = hashTable[h];
+        hashTable[h] = curr;   /* Update Hash Table */
+
+        for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
+            U32* const nextPtr = bt + 2*(matchIndex & btMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match;
+
+            if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
+                match = base + matchIndex;
+                matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+            } else {
+                match = dictBase + matchIndex;
+                matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+                if (matchIndex+matchLength >= dictLimit)
+                    match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+            }
+
+            if (matchLength > bestLength) {
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+                    if (dictMode == ZSTD_dictMatchState) {
+                        nbCompares = 0; /* in addition to avoiding checking any
+                                         * further in this loop, make sure we
+                                         * skip checking in the dictionary. */
+                    }
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (match[matchLength] < ip[matchLength]) {
+                /* match is smaller than current */
+                *smallerPtr = matchIndex;             /* update smaller idx */
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+                matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                *largerPtr = matchIndex;
+                commonLengthLarger = matchLength;
+                if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                largerPtr = nextPtr;
+                matchIndex = nextPtr[0];
+        }   }
+
+        *smallerPtr = *largerPtr = 0;
+
+        assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+        if (dictMode == ZSTD_dictMatchState && nbCompares) {
+            bestLength = ZSTD_DUBT_findBetterDictMatch(
+                    ms, ip, iend,
+                    offsetPtr, bestLength, nbCompares,
+                    mls, dictMode);
+        }
+
+        assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
+        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
+        if (bestLength >= MINMATCH) {
+            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+        }
+        return bestLength;
+    }
+}
+
+
+/* ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iLimit,
+                      size_t* offsetPtr,
+                const U32 mls /* template */,
+                const ZSTD_dictMode_e dictMode)
+{
+    DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateDUBT(ms, ip, iLimit, mls);
+    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
+}
+
+
+static size_t
+ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
+                            const BYTE* ip, const BYTE* const iLimit,
+                                  size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32* const hashTable = ms->hashTable;
+    U32* const chainTable = ms->chainTable;
+    U32 const chainSize = 1 << ms->cParams.chainLog;
+    U32 idx = ms->nextToUpdate;
+    U32 const minChain = chainSize < target ? target - chainSize : idx;
+    U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32 const cacheSize = bucketSize - 1;
+    U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
+    U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
+
+    /* We know the hashtable is oversized by a factor of `bucketSize`.
+     * We are going to temporarily pretend `bucketSize == 1`, keeping only a
+     * single entry. We will use the rest of the space to construct a temporary
+     * chaintable.
+     */
+    U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32* const tmpHashTable = hashTable;
+    U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
+    U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
+    U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
+
+    U32 hashIdx;
+
+    assert(ms->cParams.chainLog <= 24);
+    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
+    assert(idx != 0);
+    assert(tmpMinChain <= minChain);
+
+    /* fill conventional hash table and conventional chain table */
+    for ( ; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
+        if (idx >= tmpMinChain) {
+            tmpChainTable[idx - tmpMinChain] = hashTable[h];
+        }
+        tmpHashTable[h] = idx;
+    }
+
+    /* sort chains into ddss chain table */
+    {
+        U32 chainPos = 0;
+        for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
+            U32 count;
+            U32 countBeyondMinChain = 0;
+            U32 i = tmpHashTable[hashIdx];
+            for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
+                /* skip through the chain to the first position that won't be
+                 * in the hash cache bucket */
+                if (i < minChain) {
+                    countBeyondMinChain++;
+                }
+                i = tmpChainTable[i - tmpMinChain];
+            }
+            if (count == cacheSize) {
+                for (count = 0; count < chainLimit;) {
+                    if (i < minChain) {
+                        if (!i || countBeyondMinChain++ > cacheSize) {
+                            /* only allow pulling `cacheSize` number of entries
+                             * into the cache or chainTable beyond `minChain`,
+                             * to replace the entries pulled out of the
+                             * chainTable into the cache. This lets us reach
+                             * back further without increasing the total number
+                             * of entries in the chainTable, guaranteeing the
+                             * DDSS chain table will fit into the space
+                             * allocated for the regular one. */
+                            break;
+                        }
+                    }
+                    chainTable[chainPos++] = i;
+                    count++;
+                    if (i < tmpMinChain) {
+                        break;
+                    }
+                    i = tmpChainTable[i - tmpMinChain];
+                }
+            } else {
+                count = 0;
+            }
+            if (count) {
+                tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
+            } else {
+                tmpHashTable[hashIdx] = 0;
+            }
+        }
+        assert(chainPos <= chainSize); /* I believe this is guaranteed... */
+    }
+
+    /* move chain pointers into the last entry of each hash bucket */
+    for (hashIdx = (1 << hashLog); hashIdx; ) {
+        U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 const chainPackedPointer = tmpHashTable[hashIdx];
+        U32 i;
+        for (i = 0; i < cacheSize; i++) {
+            hashTable[bucketIdx + i] = 0;
+        }
+        hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
+    }
+
+    /* fill the buckets of the hash table */
+    for (idx = ms->nextToUpdate; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
+                   << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 i;
+        /* Shift hash cache down 1. */
+        for (i = cacheSize - 1; i; i--)
+            hashTable[h + i] = hashTable[h + i - 1];
+        hashTable[h] = idx;
+    }
+
+    ms->nextToUpdate = target;
+}
+
+
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_HcFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainSize = (1 << cParams->chainLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 minChain = curr > chainSize ? curr - chainSize : 0;
+    U32 nbAttempts = 1U << cParams->searchLog;
+    size_t ml=4-1;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
+                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
+                        ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+
+    U32 matchIndex;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32* entry = &dms->hashTable[ddsIdx];
+        PREFETCH_L1(entry);
+    }
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+
+    for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
+        size_t currentMl=0;
+        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+            if (match[ml] == ip[ml])   /* potentially better */
+                currentMl = ZSTD_count(ip, match, iLimit);
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            assert(match+4 <= dictEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+        }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsLowestIndex  = dms->window.dictLimit;
+        const BYTE* const ddsBase = dms->window.base;
+        const BYTE* const ddsEnd  = dms->window.nextSrc;
+        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+        const U32 ddsIndexDelta   = dictLimit - ddsSize;
+        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+        U32 ddsAttempt;
+
+        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 const chainIndex = chainPackedPointer >> 8;
+
+            PREFETCH_L1(&dms->chainTable[chainIndex]);
+        }
+
+        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+            match = ddsBase + matchIndex;
+
+            if (!matchIndex) {
+                return ml;
+            }
+
+            /* guaranteed by table construction */
+            (void)ddsLowestIndex;
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) {
+                    /* best possible, avoids read overflow on next attempt */
+                    return ml;
+                }
+            }
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 chainIndex = chainPackedPointer >> 8;
+            U32 const chainLength = chainPackedPointer & 0xFF;
+            U32 const chainAttempts = nbAttempts - ddsAttempt;
+            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+            U32 chainAttempt;
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+            }
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+                size_t currentMl=0;
+                const BYTE* match;
+                matchIndex = dms->chainTable[chainIndex];
+                match = ddsBase + matchIndex;
+
+                /* guaranteed by table construction */
+                assert(matchIndex >= ddsLowestIndex);
+                assert(match+4 <= ddsEnd);
+                if (MEM_read32(match) == MEM_read32(ip)) {
+                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+                }
+
+                /* save best solution */
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+                }
+            }
+        }
+    } else if (dictMode == ZSTD_dictMatchState) {
+        const U32* const dmsChainTable = dms->chainTable;
+        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
+        const U32 dmsChainMask         = dmsChainSize - 1;
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+        const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
+
+        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
+
+        for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
+            size_t currentMl=0;
+            const BYTE* const match = dmsBase + matchIndex;
+            assert(match+4 <= dmsEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+
+            if (matchIndex <= dmsMinChain) break;
+
+            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
+        }
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
+    }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth,
+                        ZSTD_dictMode_e const dictMode)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+
+    /*
+     * This table is indexed first by the four ZSTD_dictMode_e values, and then
+     * by the two searchMethod_e values. NULLs are placed for configurations
+     * that should never occur (extDict modes go to the other implementation
+     * below and there is no DDSS for binary tree search yet).
+     */
+    const searchMax_f searchFuncs[4][2] = {
+        {
+            ZSTD_HcFindBestMatch_selectMLS,
+            ZSTD_BtFindBestMatch_selectMLS
+        },
+        {
+            NULL,
+            NULL
+        },
+        {
+            ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+        },
+        {
+            ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
+            NULL
+        }
+    };
+
+    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
+    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
+
+    const int isDMS = dictMode == ZSTD_dictMatchState;
+    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
+    const int isDxS = isDMS || isDDS;
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
+    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
+    const BYTE* const dictEnd      = isDxS ? dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = isDxS ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
+
+    assert(searchMax != NULL);
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
+    }
+    if (isDxS) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Match Loop */
+#if defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+
+        /* check repCode */
+        if (isDxS) {
+            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
+            const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
+                                && repIndex < prefixLowestIndex) ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                if (depth==0) goto _storeSequence;
+            }
+        }
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            if (depth==0) goto _storeSequence;
+        }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            if ( (dictMode == ZSTD_noDict)
+              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                int const gain2 = (int)(mlRep * 3);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                if ((mlRep >= 4) && (gain2 > gain1))
+                    matchLength = mlRep, offset = 0, start = ip;
+            }
+            if (isDxS) {
+                const U32 repIndex = (U32)(ip - base) - offset_1;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                    int const gain2 = (int)(mlRep * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+            }
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                if ( (dictMode == ZSTD_noDict)
+                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                    size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                    int const gain2 = (int)(mlRep * 4);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+                if (isDxS) {
+                    const U32 repIndex = (U32)(ip - base) - offset_1;
+                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                        int const gain2 = (int)(mlRep * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((mlRep >= 4) && (gain2 > gain1))
+                            matchLength = mlRep, offset = 0, start = ip;
+                    }
+                }
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* NOTE:
+         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+         * overflows the pointer, which is undefined behavior.
+         */
+        /* catch up */
+        if (offset) {
+            if (dictMode == ZSTD_noDict) {
+                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
+                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
+                    { start--; matchLength++; }
+            }
+            if (isDxS) {
+                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
+                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
+                while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            }
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        if (isDxS) {
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex = current2 - offset_2;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                        dictBase - dictIndexDelta + repIndex :
+                        base + repIndex;
+                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
+                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                    ip += matchLength;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        if (dictMode == ZSTD_noDict) {
+            while ( ((ip <= ilimit) & (offset_2>0))
+                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+                /* store sequence */
+                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+    }   }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1 ? offset_1 : savedOffset;
+    rep[1] = offset_2 ? offset_2 : savedOffset;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_lazy_extDict_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
+    const U32 windowLog = ms->cParams.windowLog;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+
+    U32 offset_1 = rep[0], offset_2 = rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+
+    /* init */
+    ip += (ip == prefixStart);
+
+    /* Match Loop */
+#if defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+        U32 curr = (U32)(ip-base);
+
+        /* check repCode */
+        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
+            const U32 repIndex = (U32)(curr+1 - offset_1);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
+            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                if (depth==0) goto _storeSequence;
+        }   }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            curr++;
+            /* check repCode */
+            if (offset) {
+                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                const U32 repIndex = (U32)(curr - offset_1);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                    int const gain2 = (int)(repLength * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((repLength >= 4) && (gain2 > gain1))
+                        matchLength = repLength, offset = 0, start = ip;
+            }   }
+
+            /* search match, depth 1 */
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                curr++;
+                /* check repCode */
+                if (offset) {
+                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                    const U32 repIndex = (U32)(curr - offset_1);
+                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                    const BYTE* const repMatch = repBase + repIndex;
+                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                        /* repcode detected */
+                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                        int const gain2 = (int)(repLength * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((repLength >= 4) && (gain2 > gain1))
+                            matchLength = repLength, offset = 0, start = ip;
+                }   }
+
+                /* search match, depth 2 */
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset) {
+            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while (ip <= ilimit) {
+            const U32 repCurrent = (U32)(ip-base);
+            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
+            const U32 repIndex = repCurrent - offset_2;
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+            if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+    }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+}
+
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+}
diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h
new file mode 100644
index 0000000..2fc5a61
--- /dev/null
+++ b/lib/zstd/compress/zstd_lazy.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LAZY_H
+#define ZSTD_LAZY_H
+
+
+#include "zstd_compress_internal.h"
+
+/*
+ * Dedicated Dictionary Search Structure bucket log. In the
+ * ZSTD_dedicatedDictSearch mode, the hashTable has
+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
+ * one.
+ */
+#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
+
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#endif /* ZSTD_LAZY_H */
diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c
new file mode 100644
index 0000000..8ef7e88
--- /dev/null
+++ b/lib/zstd/compress/zstd_ldm.c
@@ -0,0 +1,686 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_ldm.h"
+
+#include "../common/debug.h"
+#include <linux/xxhash.h>
+#include "zstd_fast.h"          /* ZSTD_fillHashTable() */
+#include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
+#include "zstd_ldm_geartab.h"
+
+#define LDM_BUCKET_SIZE_LOG 3
+#define LDM_MIN_MATCH_LENGTH 64
+#define LDM_HASH_RLOG 7
+
+typedef struct {
+    U64 rolling;
+    U64 stopMask;
+} ldmRollingHashState_t;
+
+/* ZSTD_ldm_gear_init():
+ *
+ * Initializes the rolling hash state such that it will honor the
+ * settings in params. */
+static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
+{
+    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
+    unsigned hashRateLog = params->hashRateLog;
+
+    state->rolling = ~(U32)0;
+
+    /* The choice of the splitting criterion is subject to two conditions:
+     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
+     *   2. ideally, it has to depend on a window of minMatchLength bytes.
+     *
+     * In the gear hash algorithm, bit n depends on the last n bytes;
+     * so in order to obtain a good quality splitting criterion it is
+     * preferable to use bits with high weight.
+     *
+     * To match condition 1 we use a mask with hashRateLog bits set
+     * and, because of the previous remark, we make sure these bits
+     * have the highest possible weight while still respecting
+     * condition 2.
+     */
+    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
+        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
+    } else {
+        /* In this degenerate case we simply honor the hash rate. */
+        state->stopMask = ((U64)1 << hashRateLog) - 1;
+    }
+}
+
+/* ZSTD_ldm_gear_feed():
+ *
+ * Registers in the splits array all the split points found in the first
+ * size bytes following the data pointer. This function terminates when
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
+ * present in the splits array.
+ *
+ * Precondition: The splits array must not be full.
+ * Returns: The number of bytes processed. */
+static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
+                                 BYTE const* data, size_t size,
+                                 size_t* splits, unsigned* numSplits)
+{
+    size_t n;
+    U64 hash, mask;
+
+    hash = state->rolling;
+    mask = state->stopMask;
+    n = 0;
+
+#define GEAR_ITER_ONCE() do { \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1; \
+        if (UNLIKELY((hash & mask) == 0)) { \
+            splits[*numSplits] = n; \
+            *numSplits += 1; \
+            if (*numSplits == LDM_BATCH_SIZE) \
+                goto done; \
+        } \
+    } while (0)
+
+    while (n + 3 < size) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < size) {
+        GEAR_ITER_ONCE();
+    }
+
+#undef GEAR_ITER_ONCE
+
+done:
+    state->rolling = hash;
+    return n;
+}
+
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams)
+{
+    params->windowLog = cParams->windowLog;
+    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
+    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+    if (params->hashLog == 0) {
+        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
+        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
+    }
+    if (params->hashRateLog == 0) {
+        params->hashRateLog = params->windowLog < params->hashLog
+                                   ? 0
+                                   : params->windowLog - params->hashLog;
+    }
+    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
+}
+
+size_t ZSTD_ldm_getTableSize(ldmParams_t params)
+{
+    size_t const ldmHSize = ((size_t)1) << params.hashLog;
+    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
+    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
+    return params.enableLdm ? totalSize : 0;
+}
+
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
+{
+    return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
+}
+
+/* ZSTD_ldm_getBucket() :
+ *  Returns a pointer to the start of the bucket associated with hash. */
+static ldmEntry_t* ZSTD_ldm_getBucket(
+        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
+{
+    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
+}
+
+/* ZSTD_ldm_insertEntry() :
+ *  Insert the entry with corresponding hash into the hash table */
+static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
+                                 size_t const hash, const ldmEntry_t entry,
+                                 ldmParams_t const ldmParams)
+{
+    BYTE* const pOffset = ldmState->bucketOffsets + hash;
+    unsigned const offset = *pOffset;
+
+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
+    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
+
+}
+
+/* ZSTD_ldm_countBackwardsMatch() :
+ *  Returns the number of bytes that match backwards before pIn and pMatch.
+ *
+ *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
+static size_t ZSTD_ldm_countBackwardsMatch(
+            const BYTE* pIn, const BYTE* pAnchor,
+            const BYTE* pMatch, const BYTE* pMatchBase)
+{
+    size_t matchLength = 0;
+    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
+        pIn--;
+        pMatch--;
+        matchLength++;
+    }
+    return matchLength;
+}
+
+/* ZSTD_ldm_countBackwardsMatch_2segments() :
+ *  Returns the number of bytes that match backwards from pMatch,
+ *  even with the backwards match spanning 2 different segments.
+ *
+ *  On reaching `pMatchBase`, start counting from mEnd */
+static size_t ZSTD_ldm_countBackwardsMatch_2segments(
+                    const BYTE* pIn, const BYTE* pAnchor,
+                    const BYTE* pMatch, const BYTE* pMatchBase,
+                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
+{
+    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
+    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
+        /* If backwards match is entirely in the extDict or prefix, immediately return */
+        return matchLength;
+    }
+    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
+    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
+    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
+    return matchLength;
+}
+
+/* ZSTD_ldm_fillFastTables() :
+ *
+ *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
+ *  This is similar to ZSTD_loadDictionaryContent.
+ *
+ *  The tables for the other strategies are filled within their
+ *  block compressors. */
+static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+                                      void const* end)
+{
+    const BYTE* const iend = (const BYTE*)end;
+
+    switch(ms->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+    case ZSTD_btlazy2:
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        break;
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
+    }
+
+    return 0;
+}
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* ldmState, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params)
+{
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const istart = ip;
+    ldmRollingHashState_t hashState;
+    size_t* const splits = ldmState->splitIndices;
+    unsigned numSplits;
+
+    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+
+    ZSTD_ldm_gear_init(&hashState, params);
+    while (ip < iend) {
+        size_t hashed;
+        unsigned n;
+        
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            if (ip + splits[n] >= istart + minMatchLength) {
+                BYTE const* const split = ip + splits[n] - minMatchLength;
+                U64 const xxhash = xxh64(split, minMatchLength, 0);
+                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+                ldmEntry_t entry;
+
+                entry.offset = (U32)(split - base);
+                entry.checksum = (U32)(xxhash >> 32);
+                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
+            }
+        }
+
+        ip += hashed;
+    }
+}
+
+
+/* ZSTD_ldm_limitTableUpdate() :
+ *
+ *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
+ *  if it is far way
+ *  (after a long match, only update tables a limited amount). */
+static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
+{
+    U32 const curr = (U32)(anchor - ms->window.base);
+    if (curr > ms->nextToUpdate + 1024) {
+        ms->nextToUpdate =
+            curr - MIN(512, curr - ms->nextToUpdate - 1024);
+    }
+}
+
+static size_t ZSTD_ldm_generateSequences_internal(
+        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    /* LDM parameters */
+    int const extDict = ZSTD_window_hasExtDict(ldmState->window);
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const entsPerBucket = 1U << params->bucketSizeLog;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    /* Prefix and extDict parameters */
+    U32 const dictLimit = ldmState->window.dictLimit;
+    U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
+    BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
+    BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
+    BYTE const* const lowPrefixPtr = base + dictLimit;
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    BYTE const* const ilimit = iend - HASH_READ_SIZE;
+    /* Input positions */
+    BYTE const* anchor = istart;
+    BYTE const* ip = istart;
+    /* Rolling hash state */
+    ldmRollingHashState_t hashState;
+    /* Arrays for staged-processing */
+    size_t* const splits = ldmState->splitIndices;
+    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
+    unsigned numSplits;
+
+    if (srcSize < minMatchLength)
+        return iend - anchor;
+
+    /* Initialize the rolling hash state with the first minMatchLength bytes */
+    ZSTD_ldm_gear_init(&hashState, params);
+    {
+        size_t n = 0;
+
+        while (n < minMatchLength) {
+            numSplits = 0;
+            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
+                                    splits, &numSplits);
+        }
+        ip += minMatchLength;
+    }
+
+    while (ip < ilimit) {
+        size_t hashed;
+        unsigned n;
+
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
+                                    splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            BYTE const* const split = ip + splits[n] - minMatchLength;
+            U64 const xxhash = xxh64(split, minMatchLength, 0);
+            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+
+            candidates[n].split = split;
+            candidates[n].hash = hash;
+            candidates[n].checksum = (U32)(xxhash >> 32);
+            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
+            PREFETCH_L1(candidates[n].bucket);
+        }
+
+        for (n = 0; n < numSplits; n++) {
+            size_t forwardMatchLength = 0, backwardMatchLength = 0,
+                   bestMatchLength = 0, mLength;
+            BYTE const* const split = candidates[n].split;
+            U32 const checksum = candidates[n].checksum;
+            U32 const hash = candidates[n].hash;
+            ldmEntry_t* const bucket = candidates[n].bucket;
+            ldmEntry_t const* cur;
+            ldmEntry_t const* bestEntry = NULL;
+            ldmEntry_t newEntry;
+
+            newEntry.offset = (U32)(split - base);
+            newEntry.checksum = checksum;
+
+            /* If a split point would generate a sequence overlapping with
+             * the previous one, we merely register it in the hash table and
+             * move on */
+            if (split < anchor) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
+                size_t curForwardMatchLength, curBackwardMatchLength,
+                       curTotalMatchLength;
+                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
+                    continue;
+                }
+                if (extDict) {
+                    BYTE const* const curMatchBase =
+                        cur->offset < dictLimit ? dictBase : base;
+                    BYTE const* const pMatch = curMatchBase + cur->offset;
+                    BYTE const* const matchEnd =
+                        cur->offset < dictLimit ? dictEnd : iend;
+                    BYTE const* const lowMatchPtr =
+                        cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+                    curForwardMatchLength =
+                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
+                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
+                } else { /* !extDict */
+                    BYTE const* const pMatch = base + cur->offset;
+                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength =
+                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
+                }
+                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
+
+                if (curTotalMatchLength > bestMatchLength) {
+                    bestMatchLength = curTotalMatchLength;
+                    forwardMatchLength = curForwardMatchLength;
+                    backwardMatchLength = curBackwardMatchLength;
+                    bestEntry = cur;
+                }
+            }
+
+            /* No match found -- insert an entry into the hash table
+             * and process the next candidate match */
+            if (bestEntry == NULL) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            /* Match found */
+            mLength = forwardMatchLength + backwardMatchLength;
+            {
+                U32 const offset = (U32)(split - base) - bestEntry->offset;
+                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+                /* Out of sequence storage */
+                if (rawSeqStore->size == rawSeqStore->capacity)
+                    return ERROR(dstSize_tooSmall);
+                seq->litLength = (U32)(split - backwardMatchLength - anchor);
+                seq->matchLength = (U32)mLength;
+                seq->offset = offset;
+                rawSeqStore->size++;
+            }
+
+            /* Insert the current entry into the hash table --- it must be
+             * done after the previous block to avoid clobbering bestEntry */
+            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+
+            anchor = split + forwardMatchLength;
+        }
+
+        ip += hashed;
+    }
+
+    return iend - anchor;
+}
+
+/*! ZSTD_ldm_reduceTable() :
+ *  reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+                                 U32 const reducerValue)
+{
+    U32 u;
+    for (u = 0; u < size; u++) {
+        if (table[u].offset < reducerValue) table[u].offset = 0;
+        else table[u].offset -= reducerValue;
+    }
+}
+
+size_t ZSTD_ldm_generateSequences(
+        ldmState_t* ldmState, rawSeqStore_t* sequences,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    U32 const maxDist = 1U << params->windowLog;
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    size_t const kMaxChunkSize = 1 << 20;
+    size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+    size_t chunk;
+    size_t leftoverSize = 0;
+
+    assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+    /* Check that ZSTD_window_update() has been called for this chunk prior
+     * to passing it to this function.
+     */
+    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+    /* The input could be very large (in zstdmt), so it must be broken up into
+     * chunks to enforce the maximum distance and handle overflow correction.
+     */
+    assert(sequences->pos <= sequences->size);
+    assert(sequences->size <= sequences->capacity);
+    for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
+        BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
+        size_t const remaining = (size_t)(iend - chunkStart);
+        BYTE const *const chunkEnd =
+            (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
+        size_t const chunkSize = chunkEnd - chunkStart;
+        size_t newLeftoverSize;
+        size_t const prevSize = sequences->size;
+
+        assert(chunkStart < iend);
+        /* 1. Perform overflow correction if necessary. */
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+            U32 const ldmHSize = 1U << params->hashLog;
+            U32 const correction = ZSTD_window_correctOverflow(
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
+            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+            /* invalidate dictionaries on overflow correction */
+            ldmState->loadedDictEnd = 0;
+        }
+        /* 2. We enforce the maximum offset allowed.
+         *
+         * kMaxChunkSize should be small enough that we don't lose too much of
+         * the window through early invalidation.
+         * TODO: * Test the chunk size.
+         *       * Try invalidation after the sequence generation and test the
+         *         the offset against maxDist directly.
+         *
+         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+         * that any offset used is valid at the END of the sequence, since it may
+         * be split into two sequences. This condition holds when using
+         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+         * against maxDist directly, we'll have to carefully handle that case.
+         */
+        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
+        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+            ldmState, sequences, params, chunkStart, chunkSize);
+        if (ZSTD_isError(newLeftoverSize))
+            return newLeftoverSize;
+        /* 4. We add the leftover literals from previous iterations to the first
+         *    newly generated sequence, or add the `newLeftoverSize` if none are
+         *    generated.
+         */
+        /* Prepend the leftover literals from the last call */
+        if (prevSize < sequences->size) {
+            sequences->seq[prevSize].litLength += (U32)leftoverSize;
+            leftoverSize = newLeftoverSize;
+        } else {
+            assert(newLeftoverSize == chunkSize);
+            leftoverSize += chunkSize;
+        }
+    }
+    return 0;
+}
+
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+        if (srcSize <= seq->litLength) {
+            /* Skip past srcSize literals */
+            seq->litLength -= (U32)srcSize;
+            return;
+        }
+        srcSize -= seq->litLength;
+        seq->litLength = 0;
+        if (srcSize < seq->matchLength) {
+            /* Skip past the first srcSize of the match */
+            seq->matchLength -= (U32)srcSize;
+            if (seq->matchLength < minMatch) {
+                /* The match is too short, omit it */
+                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+                    seq[1].litLength += seq[0].matchLength;
+                }
+                rawSeqStore->pos++;
+            }
+            return;
+        }
+        srcSize -= seq->matchLength;
+        seq->matchLength = 0;
+        rawSeqStore->pos++;
+    }
+}
+
+/*
+ * If the sequence length is longer than remaining then the sequence is split
+ * between this block and the next.
+ *
+ * Returns the current sequence to handle, or if the rest of the block should
+ * be literals, it returns a sequence with offset == 0.
+ */
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+                                 U32 const remaining, U32 const minMatch)
+{
+    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
+    assert(sequence.offset > 0);
+    /* Likely: No partial sequence */
+    if (remaining >= sequence.litLength + sequence.matchLength) {
+        rawSeqStore->pos++;
+        return sequence;
+    }
+    /* Cut the sequence short (offset == 0 ==> rest is literals). */
+    if (remaining <= sequence.litLength) {
+        sequence.offset = 0;
+    } else if (remaining < sequence.litLength + sequence.matchLength) {
+        sequence.matchLength = remaining - sequence.litLength;
+        if (sequence.matchLength < minMatch) {
+            sequence.offset = 0;
+        }
+    }
+    /* Skip past `remaining` bytes for the future sequences. */
+    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
+    return sequence;
+}
+
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    void const* src, size_t srcSize)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    unsigned const minMatch = cParams->minMatch;
+    ZSTD_blockCompressor const blockCompressor =
+        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    /* Input positions */
+    BYTE const* ip = istart;
+
+    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
+    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
+    if (cParams->strategy >= ZSTD_btopt) {
+        size_t lastLLSize;
+        ms->ldmSeqStore = rawSeqStore;
+        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
+        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
+        return lastLLSize;
+    }
+
+    assert(rawSeqStore->pos <= rawSeqStore->size);
+    assert(rawSeqStore->size <= rawSeqStore->capacity);
+    /* Loop through each sequence and apply the block compressor to the literals */
+    while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+        /* maybeSplitSequence updates rawSeqStore->pos */
+        rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+                                                   (U32)(iend - ip), minMatch);
+        int i;
+        /* End signal */
+        if (sequence.offset == 0)
+            break;
+
+        assert(ip + sequence.litLength + sequence.matchLength <= iend);
+
+        /* Fill tables for block compressor */
+        ZSTD_ldm_limitTableUpdate(ms, ip);
+        ZSTD_ldm_fillFastTables(ms, ip);
+        /* Run the block compressor */
+        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
+        {
+            size_t const newLitLength =
+                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
+            ip += sequence.litLength;
+            /* Update the repcodes */
+            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
+                rep[i] = rep[i-1];
+            rep[0] = sequence.offset;
+            /* Store the sequence */
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
+                          sequence.offset + ZSTD_REP_MOVE,
+                          sequence.matchLength - MINMATCH);
+            ip += sequence.matchLength;
+        }
+    }
+    /* Fill the tables for the block compressor */
+    ZSTD_ldm_limitTableUpdate(ms, ip);
+    ZSTD_ldm_fillFastTables(ms, ip);
+    /* Compress the last literals */
+    return blockCompressor(ms, seqStore, rep, ip, iend - ip);
+}
diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h
new file mode 100644
index 0000000..25b2527
--- /dev/null
+++ b/lib/zstd/compress/zstd_ldm.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_H
+#define ZSTD_LDM_H
+
+
+#include "zstd_compress_internal.h"   /* ldmParams_t, U32 */
+#include <linux/zstd.h>   /* ZSTD_CCtx, size_t */
+
+/*-*************************************
+*  Long distance matching
+***************************************/
+
+#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params);
+
+/*
+ * ZSTD_ldm_generateSequences():
+ *
+ * Generates the sequences using the long distance match finder.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
+ *
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ *       sequences.
+ */
+size_t ZSTD_ldm_generateSequences(
+            ldmState_t* ldms, rawSeqStore_t* sequences,
+            ldmParams_t const* params, void const* src, size_t srcSize);
+
+/*
+ * ZSTD_ldm_blockCompress():
+ *
+ * Compresses a block using the predefined sequences, along with a secondary
+ * block compressor. The literals section of every sequence is passed to the
+ * secondary block compressor, and those sequences are interspersed with the
+ * predefined sequences. Returns the length of the last literals.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
+ *
+ * NOTE: The source must be at most the maximum block size, but the predefined
+ * sequences can be any size, and may be longer than the block. In the case that
+ * they are longer than the block, the last sequences may need to be split into
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
+ */
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            void const* src, size_t srcSize);
+
+/*
+ * ZSTD_ldm_skipSequences():
+ *
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+    U32 const minMatch);
+
+/* ZSTD_ldm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
+ * Not to be used in conjunction with ZSTD_ldm_skipSequences().
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
+
+/* ZSTD_ldm_getTableSize() :
+ *  Estimate the space needed for long distance matching tables or 0 if LDM is
+ *  disabled.
+ */
+size_t ZSTD_ldm_getTableSize(ldmParams_t params);
+
+/* ZSTD_ldm_getSeqSpace() :
+ *  Return an upper bound on the number of sequences that can be produced by
+ *  the long distance matcher, or 0 if LDM is disabled.
+ */
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
+
+/* ZSTD_ldm_adjustParameters() :
+ *  If the params->hashRateLog is not set, set it to its default value based on
+ *  windowLog and params->hashLog.
+ *
+ *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
+ *  params->hashLog if it is not).
+ *
+ *  Ensures that the minMatchLength >= targetLength during optimal parsing.
+ */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams);
+
+
+#endif /* ZSTD_FAST_H */
diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h
new file mode 100644
index 0000000..e5c24d8
--- /dev/null
+++ b/lib/zstd/compress/zstd_ldm_geartab.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_GEARTAB_H
+#define ZSTD_LDM_GEARTAB_H
+
+static U64 ZSTD_ldm_gearTab[256] = {
+    0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
+    0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
+    0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
+    0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
+    0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
+    0x37b628620b628,    0x49a8d455d88caf5,  0x8556d711e6958140,
+    0x4f7ae74fc605c1f,  0x829f0c3468bd3a20, 0x4ffdc885c625179e,
+    0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
+    0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
+    0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
+    0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
+    0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
+    0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
+    0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
+    0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
+    0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
+    0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
+    0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
+    0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
+    0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
+    0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
+    0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
+    0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
+    0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
+    0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
+    0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
+    0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
+    0xff452823dbb010a,  0x9d42ed614f3dd267, 0x5b9313c06257c57b,
+    0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
+    0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
+    0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
+    0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
+    0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
+    0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
+    0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
+    0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
+    0x24a5483879c453e3, 0x88026889192b4b9,  0x28da96671782dbec,
+    0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
+    0xbc135a0a704b70ba, 0x69cd868f7622ada,  0xbc37ba89e0b9c0ab,
+    0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
+    0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
+    0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
+    0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
+    0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
+    0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
+    0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
+    0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
+    0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
+    0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
+    0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
+    0x820d471e20b348e,  0x1874383cb83d46dc, 0x97edeec7a1efe11c,
+    0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
+    0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
+    0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
+    0xaf846af6ab7d0bf4, 0xe5af208eb666e49,  0x5e6622f73534cd6a,
+    0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
+    0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
+    0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
+    0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
+    0x9f90e4c5fd508d8,  0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
+    0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
+    0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
+    0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
+    0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
+    0x4228e364c5b5ed7,  0x9d7a3edf0da43911, 0x8edcfeda24686756,
+    0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
+    0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
+    0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
+    0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
+    0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
+    0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
+    0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
+    0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
+    0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
+    0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
+    0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
+    0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
+    0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
+    0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
+    0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
+    0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
+    0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
+    0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
+    0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
+    0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
+    0x2b4da14f2613d8f4
+};
+
+#endif /* ZSTD_LDM_GEARTAB_H */
diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c
new file mode 100644
index 0000000..dfc55e3
--- /dev/null
+++ b/lib/zstd/compress/zstd_opt.c
@@ -0,0 +1,1358 @@
+/*
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * Disable inlining for the optimal parser for the kernel build.
+ * It is unlikely to be used in the kernel, and where it is used
+ * latency shouldn't matter because it is very slow to begin with.
+ * We prefer a ~180KB binary size win over faster optimal parsing.
+ *
+ * TODO(https://github.com/facebook/zstd/issues/2862):
+ * Improve the code size of the optimal parser in general, so we
+ * don't need this hack for the kernel build.
+ */
+#define ZSTD_NO_INLINE 1
+
+#include "zstd_compress_internal.h"
+#include "hist.h"
+#include "zstd_opt.h"
+
+
+#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
+#define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
+#define ZSTD_MAX_PRICE     (1<<30)
+
+#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+
+
+/*-*************************************
+*  Price functions for optimal parser
+***************************************/
+
+#if 0    /* approximation at bit level */
+#  define BITCOST_ACCURACY 0
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat)  ((void)opt, ZSTD_bitWeight(stat))
+#elif 0  /* fractional bit accuracy */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
+#else    /* opt==approx, ultra==accurate */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
+#endif
+
+MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
+{
+    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
+}
+
+MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
+{
+    U32 const stat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(stat);
+    U32 const BWeight = hb * BITCOST_MULTIPLIER;
+    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + BITCOST_ACCURACY < 31);
+    return weight;
+}
+
+#if (DEBUGLEVEL>=2)
+/* debugging function,
+ * @return price in bytes as fractional value
+ * for debug messages only */
+MEM_STATIC double ZSTD_fCost(U32 price)
+{
+    return (double)price / (BITCOST_MULTIPLIER*8);
+}
+#endif
+
+static int ZSTD_compressedLiterals(optState_t const* const optPtr)
+{
+    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+}
+
+static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
+    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
+    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
+    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
+}
+
+
+/* ZSTD_downscaleStat() :
+ * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
+ * return the resulting sum of elements */
+static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
+{
+    U32 s, sum=0;
+    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
+    assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* ZSTD_rescaleFreqs() :
+ * if first block (detected by optPtr->litLengthSum == 0) : init statistics
+ *    take hints from dictionary if there is one
+ *    or init from zero, using src for literals stats, or flat 1 for match symbols
+ * otherwise downscale existing stats, to be used as seed for next block.
+ */
+static void
+ZSTD_rescaleFreqs(optState_t* const optPtr,
+            const BYTE* const src, size_t const srcSize,
+                  int const optLevel)
+{
+    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
+    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
+    optPtr->priceType = zop_dynamic;
+
+    if (optPtr->litLengthSum == 0) {  /* first block : init */
+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
+            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
+            optPtr->priceType = zop_predef;
+        }
+
+        assert(optPtr->symbolCosts != NULL);
+        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+            /* huffman table presumed generated by dictionary */
+            optPtr->priceType = zop_dynamic;
+
+            if (compressedLiterals) {
+                unsigned lit;
+                assert(optPtr->litFreq != NULL);
+                optPtr->litSum = 0;
+                for (lit=0; lit<=MaxLit; lit++) {
+                    U32 const scaleLog = 11;   /* scale to 2K */
+                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
+                    assert(bitCost <= scaleLog);
+                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litSum += optPtr->litFreq[lit];
+            }   }
+
+            {   unsigned ll;
+                FSE_CState_t llstate;
+                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
+                optPtr->litLengthSum = 0;
+                for (ll=0; ll<=MaxLL; ll++) {
+                    U32 const scaleLog = 10;   /* scale to 1K */
+                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
+                    assert(bitCost < scaleLog);
+                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
+            }   }
+
+            {   unsigned ml;
+                FSE_CState_t mlstate;
+                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
+                optPtr->matchLengthSum = 0;
+                for (ml=0; ml<=MaxML; ml++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
+                    assert(bitCost < scaleLog);
+                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
+            }   }
+
+            {   unsigned of;
+                FSE_CState_t ofstate;
+                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
+                optPtr->offCodeSum = 0;
+                for (of=0; of<=MaxOff; of++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
+                    assert(bitCost < scaleLog);
+                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
+            }   }
+
+        } else {  /* not a dictionary */
+
+            assert(optPtr->litFreq != NULL);
+            if (compressedLiterals) {
+                unsigned lit = MaxLit;
+                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+            }
+
+            {   unsigned ll;
+                for (ll=0; ll<=MaxLL; ll++)
+                    optPtr->litLengthFreq[ll] = 1;
+            }
+            optPtr->litLengthSum = MaxLL+1;
+
+            {   unsigned ml;
+                for (ml=0; ml<=MaxML; ml++)
+                    optPtr->matchLengthFreq[ml] = 1;
+            }
+            optPtr->matchLengthSum = MaxML+1;
+
+            {   unsigned of;
+                for (of=0; of<=MaxOff; of++)
+                    optPtr->offCodeFreq[of] = 1;
+            }
+            optPtr->offCodeSum = MaxOff+1;
+
+        }
+
+    } else {   /* new block : re-use previous statistics, scaled down */
+
+        if (compressedLiterals)
+            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+    }
+
+    ZSTD_setBasePrices(optPtr, optLevel);
+}
+
+/* ZSTD_rawLiteralsCost() :
+ * price of literals (only) in specified segment (which length can be 0).
+ * does not include price of literalLength symbol */
+static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
+                                const optState_t* const optPtr,
+                                int optLevel)
+{
+    if (litLength == 0) return 0;
+
+    if (!ZSTD_compressedLiterals(optPtr))
+        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
+
+    if (optPtr->priceType == zop_predef)
+        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
+
+    /* dynamic statistics */
+    {   U32 price = litLength * optPtr->litSumBasePrice;
+        U32 u;
+        for (u=0; u < litLength; u++) {
+            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
+            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
+        }
+        return price;
+    }
+}
+
+/* ZSTD_litLengthPrice() :
+ * cost of literalLength symbol */
+static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
+{
+    if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
+
+    /* dynamic statistics */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+             + optPtr->litLengthSumBasePrice
+             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+    }
+}
+
+/* ZSTD_getMatchPrice() :
+ * Provides the cost of the match part (offset + matchLength) of a sequence
+ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+FORCE_INLINE_TEMPLATE U32
+ZSTD_getMatchPrice(U32 const offset,
+                   U32 const matchLength,
+             const optState_t* const optPtr,
+                   int const optLevel)
+{
+    U32 price;
+    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const mlBase = matchLength - MINMATCH;
+    assert(matchLength >= MINMATCH);
+
+    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
+        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
+
+    /* dynamic statistics */
+    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
+    if ((optLevel<2) /*static*/ && offCode >= 20)
+        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
+
+    /* match Length */
+    {   U32 const mlCode = ZSTD_MLcode(mlBase);
+        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
+    }
+
+    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
+
+    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
+    return price;
+}
+
+/* ZSTD_updateStats() :
+ * assumption : literals + litLengtn <= iend */
+static void ZSTD_updateStats(optState_t* const optPtr,
+                             U32 litLength, const BYTE* literals,
+                             U32 offsetCode, U32 matchLength)
+{
+    /* literals */
+    if (ZSTD_compressedLiterals(optPtr)) {
+        U32 u;
+        for (u=0; u < litLength; u++)
+            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
+    }
+
+    /* literal Length */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        optPtr->litLengthFreq[llCode]++;
+        optPtr->litLengthSum++;
+    }
+
+    /* match offset code (0-2=>repCode; 3+=>offset+2) */
+    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+        assert(offCode <= MaxOff);
+        optPtr->offCodeFreq[offCode]++;
+        optPtr->offCodeSum++;
+    }
+
+    /* match Length */
+    {   U32 const mlBase = matchLength - MINMATCH;
+        U32 const mlCode = ZSTD_MLcode(mlBase);
+        optPtr->matchLengthFreq[mlCode]++;
+        optPtr->matchLengthSum++;
+    }
+}
+
+
+/* ZSTD_readMINMATCH() :
+ * function safe only for comparisons
+ * assumption : memPtr must be at least 4 bytes before end of buffer */
+MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
+
+/* Update hashTable3 up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
+{
+    U32* const hashTable3 = ms->hashTable3;
+    U32 const hashLog3 = ms->hashLog3;
+    const BYTE* const base = ms->window.base;
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
+    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+    assert(hashLog3 > 0);
+
+    while(idx < target) {
+        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
+        idx++;
+    }
+
+    *nextToUpdate3 = target;
+    return hashTable3[hash3];
+}
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+/* ZSTD_insertBt1() : add one or multiple positions to tree.
+ *  ip : assumed <= iend-8 .
+ * @return : nb of positions added */
+static U32 ZSTD_insertBt1(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                U32 const mls, const int extDict)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32 matchIndex = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    const U32 curr = (U32)(ip-base);
+    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowLow = ms->window.lowLimit;
+    U32 matchEndIdx = curr+8+1;
+    size_t bestLength = 8;
+    U32 nbCompares = 1U << cParams->searchLog;
+#ifdef ZSTD_C_PREDICT
+    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
+    predictedSmall += (predictedSmall>0);
+    predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
+
+    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
+
+    assert(ip <= iend-8);   /* required for h calculation */
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    assert(windowLow > 0);
+    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+
+#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
+        if (matchIndex == predictedSmall) {
+            /* no need to check length, result known */
+            *smallerPtr = matchIndex;
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+            continue;
+        }
+        if (matchIndex == predictedLarge) {
+            *largerPtr = matchIndex;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+            continue;
+        }
+#endif
+
+        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
+            match = base + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            bestLength = matchLength;
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+        }
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > curr + 8);
+        return MAX(positions, matchEndIdx - (curr + 8));
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+void ZSTD_updateTree_internal(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+                idx, target, dictMode);
+
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
+    ms->nextToUpdate = target;
+}
+
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
+    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
+}
+
+FORCE_INLINE_TEMPLATE
+U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
+                    ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
+                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
+                    const U32 rep[ZSTD_REP_NUM],
+                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+                    const U32 lengthToBeat,
+                    U32 const mls /* template */)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    const BYTE* const base = ms->window.base;
+    U32 const curr = (U32)(ip-base);
+    U32 const hashLog = cParams->hashLog;
+    U32 const minMatch = (mls==3) ? 3 : 4;
+    U32* const hashTable = ms->hashTable;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32 matchIndex  = hashTable[h];
+    U32* const bt   = ms->chainTable;
+    U32 const btLog = cParams->chainLog - 1;
+    U32 const btMask= (1U << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const dictBase = ms->window.dictBase;
+    U32 const dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+    U32 const matchLow = windowLow ? windowLow : 1;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 mnum = 0;
+    U32 nbCompares = 1U << cParams->searchLog;
+
+    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
+    const ZSTD_compressionParameters* const dmsCParams =
+                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
+    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
+    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
+    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
+    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
+    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
+    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
+    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
+    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
+    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
+
+    size_t bestLength = lengthToBeat-1;
+    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
+
+    /* check repCode */
+    assert(ll0 <= 1);   /* necessarily 1 or 0 */
+    {   U32 const lastR = ZSTD_REP_NUM + ll0;
+        U32 repCode;
+        for (repCode = ll0; repCode < lastR; repCode++) {
+            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            U32 const repIndex = curr - repOffset;
+            U32 repLen = 0;
+            assert(curr >= dictLimit);
+            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
+                /* We must validate the repcode offset because when we're using a dictionary the
+                 * valid offset range shrinks when the dictionary goes out of bounds.
+                 */
+                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
+                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
+                }
+            } else {  /* repIndex < dictLimit || repIndex >= curr */
+                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
+                                             dmsBase + repIndex - dmsIndexDelta :
+                                             dictBase + repIndex;
+                assert(curr >= windowLow);
+                if ( dictMode == ZSTD_extDict
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
+                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
+                }
+                if (dictMode == ZSTD_dictMatchState
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
+                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
+            }   }
+            /* save longer solution */
+            if (repLen > bestLength) {
+                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
+                            repCode, ll0, repOffset, repLen);
+                bestLength = repLen;
+                matches[mnum].off = repCode - ll0;
+                matches[mnum].len = (U32)repLen;
+                mnum++;
+                if ( (repLen > sufficient_len)
+                   | (ip+repLen == iLimit) ) {  /* best possible */
+                    return mnum;
+    }   }   }   }
+
+    /* HC3 match finder */
+    if ((mls == 3) /*static*/ && (bestLength < mls)) {
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
+        if ((matchIndex3 >= matchLow)
+          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
+            size_t mlen;
+            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
+                const BYTE* const match = base + matchIndex3;
+                mlen = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex3;
+                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
+            }
+
+            /* save best solution */
+            if (mlen >= mls /* == 3 > bestLength */) {
+                DEBUGLOG(8, "found small match with hlog3, of length %u",
+                            (U32)mlen);
+                bestLength = mlen;
+                assert(curr > matchIndex3);
+                assert(mnum==0);  /* no prior solution */
+                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].len = (U32)mlen;
+                mnum = 1;
+                if ( (mlen > sufficient_len) |
+                     (ip+mlen == iLimit) ) {  /* best possible length */
+                    ms->nextToUpdate = curr+1;  /* skip insertion */
+                    return 1;
+        }   }   }
+        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
+    }
+
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        const BYTE* match;
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(curr > matchIndex);
+
+        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
+            match = base + matchIndex;
+            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
+        } else {
+            match = dictBase + matchIndex;
+            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* prepare for match[matchLength] read */
+        }
+
+        if (matchLength > bestLength) {
+            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
+                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+            assert(matchEndIdx > matchIndex);
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+            bestLength = matchLength;
+            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].len = (U32)matchLength;
+            mnum++;
+            if ( (matchLength > ZSTD_OPT_NUM)
+               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
+                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            /* match smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
+        } else {
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+
+    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+    if (dictMode == ZSTD_dictMatchState && nbCompares) {
+        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
+        U32 dictMatchIndex = dms->hashTable[dmsH];
+        const U32* const dmsBt = dms->chainTable;
+        commonLengthSmaller = commonLengthLarger = 0;
+        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
+            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match = dmsBase + dictMatchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
+            if (dictMatchIndex+matchLength >= dmsHighLimit)
+                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+            if (matchLength > bestLength) {
+                matchIndex = dictMatchIndex + dmsIndexDelta;
+                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
+                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                bestLength = matchLength;
+                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].len = (U32)matchLength;
+                mnum++;
+                if ( (matchLength > ZSTD_OPT_NUM)
+                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
+            if (match[matchLength] < ip[matchLength]) {
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                commonLengthLarger = matchLength;
+                dictMatchIndex = nextPtr[0];
+            }
+        }
+    }
+
+    assert(matchEndIdx > curr+8);
+    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
+    return mnum;
+}
+
+
+FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
+                        ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
+                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const matchLengthSearch = cParams->minMatch;
+    DEBUGLOG(8, "ZSTD_BtGetAllMatches");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
+    switch(matchLengthSearch)
+    {
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
+    default :
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
+    case 7 :
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
+    }
+}
+
+/* ***********************
+*  LDM helper functions  *
+*************************/
+
+/* Struct containing info needed to make decision about ldm inclusion */
+typedef struct {
+    rawSeqStore_t seqStore;         /* External match candidates store for this block */
+    U32 startPosInBlock;            /* Start position of the current match candidate */
+    U32 endPosInBlock;              /* End position of the current match candidate */
+    U32 offset;                     /* Offset of the match candidate */
+} ZSTD_optLdm_t;
+
+/* ZSTD_optLdm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ */
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
+ * Calculates the beginning and end of the next match in the current block.
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
+ */
+static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                                   U32 blockBytesRemaining) {
+    rawSeq currSeq;
+    U32 currBlockEndPos;
+    U32 literalsBytesRemaining;
+    U32 matchBytesRemaining;
+
+    /* Setting match end position to MAX to ensure we never use an LDM during this block */
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        return;
+    }
+    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
+       based on ldmSeqStore->posInSequence */
+    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
+    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
+    currBlockEndPos = currPosInBlock + blockBytesRemaining;
+    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
+            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
+            0;
+    matchBytesRemaining = (literalsBytesRemaining == 0) ?
+            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
+            currSeq.matchLength;
+
+    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
+    if (literalsBytesRemaining >= blockBytesRemaining) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
+        return;
+    }
+
+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+       when we are deciding whether or not to add the ldm */
+    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
+    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
+    optLdm->offset = currSeq.offset;
+
+    if (optLdm->endPosInBlock > currBlockEndPos) {
+        /* Match ends after the block ends, we can't use the whole match */
+        optLdm->endPosInBlock = currBlockEndPos;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
+    } else {
+        /* Consume nb of bytes equal to size of sequence left */
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
+    }
+}
+
+/* ZSTD_optLdm_maybeAddMatch():
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ */
+static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
+    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
+    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+
+    /* Ensure that current block position is not outside of the match */
+    if (currPosInBlock < optLdm->startPosInBlock
+      || currPosInBlock >= optLdm->endPosInBlock
+      || candidateMatchLength < MINMATCH) {
+        return;
+    }
+
+    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
+                 candidateOffCode, candidateMatchLength, currPosInBlock);
+        matches[*nbMatches].len = candidateMatchLength;
+        matches[*nbMatches].off = candidateOffCode;
+        (*nbMatches)++;
+    }
+}
+
+/* ZSTD_optLdm_processMatchCandidate():
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
+ */
+static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
+                                              U32 currPosInBlock, U32 remainingBytes) {
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        return;
+    }
+
+    if (currPosInBlock >= optLdm->endPosInBlock) {
+        if (currPosInBlock > optLdm->endPosInBlock) {
+            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
+             * at the end of a match from the ldm seq store, and will often be some bytes
+             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
+             */
+            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
+        } 
+        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
+    }
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+}
+
+/*-*******************************
+*  Optimal parser
+*********************************/
+
+
+static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
+{
+    return sol.litlen + sol.mlen;
+}
+
+#if 0 /* debug */
+
+static void
+listStats(const U32* table, int lastEltID)
+{
+    int const nbElts = lastEltID + 1;
+    int enb;
+    for (enb=0; enb < nbElts; enb++) {
+        (void)table;
+        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
+        RAWLOG(2, "%4i,", table[enb]);
+    }
+    RAWLOG(2, " \n");
+}
+
+#endif
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                               seqStore_t* seqStore,
+                               U32 rep[ZSTD_REP_NUM],
+                         const void* src, size_t srcSize,
+                         const int optLevel,
+                         const ZSTD_dictMode_e dictMode)
+{
+    optState_t* const optStatePtr = &ms->opt;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;
+
+    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
+    ZSTD_match_t* const matches = optStatePtr->matchTable;
+    ZSTD_optimal_t lastSequence;
+    ZSTD_optLdm_t optLdm;
+
+    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
+    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
+    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
+                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
+    assert(optLevel <= 2);
+    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
+    ip += (ip==prefixStart);
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        U32 cur, last_pos = 0;
+
+        /* find first match */
+        {   U32 const litlen = (U32)(ip - anchor);
+            U32 const ll0 = !litlen;
+            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                              (U32)(ip-istart), (U32)(iend - ip));
+            if (!nbMatches) { ip++; continue; }
+
+            /* initialize opt[0] */
+            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
+            opt[0].mlen = 0;  /* means is_a_literal */
+            opt[0].litlen = litlen;
+            /* We don't need to include the actual price of the literals because
+             * it is static for the duration of the forward pass, and is included
+             * in every price. We include the literal length to avoid negative
+             * prices when we subtract the previous literal length.
+             */
+            opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+
+            /* large match -> immediate encoding */
+            {   U32 const maxML = matches[nbMatches-1].len;
+                U32 const maxOffset = matches[nbMatches-1].off;
+                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
+                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
+
+                if (maxML > sufficient_len) {
+                    lastSequence.litlen = litlen;
+                    lastSequence.mlen = maxML;
+                    lastSequence.off = maxOffset;
+                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+                                maxML, sufficient_len);
+                    cur = 0;
+                    last_pos = ZSTD_totalLen(lastSequence);
+                    goto _shortestPath;
+            }   }
+
+            /* set prices for first matches starting position == 0 */
+            {   U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 pos;
+                U32 matchNb;
+                for (pos = 1; pos < minMatch; pos++) {
+                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                }
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const end = matches[matchNb].len;
+                    for ( ; pos <= end ; pos++ ) {
+                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
+                        U32 const sequencePrice = literalsPrice + matchPrice;
+                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
+                                    pos, ZSTD_fCost(sequencePrice));
+                        opt[pos].mlen = pos;
+                        opt[pos].off = offset;
+                        opt[pos].litlen = litlen;
+                        opt[pos].price = sequencePrice;
+                }   }
+                last_pos = pos-1;
+            }
+        }
+
+        /* check further positions */
+        for (cur = 1; cur <= last_pos; cur++) {
+            const BYTE* const inr = ip + cur;
+            assert(cur < ZSTD_OPT_NUM);
+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+
+            /* Fix current position with one literal if cheaper */
+            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+                int const price = opt[cur-1].price
+                                + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+                                + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
+                                - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                assert(price < 1000000000); /* overflow check */
+                if (price <= opt[cur].price) {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
+                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
+                    opt[cur].mlen = 0;
+                    opt[cur].off = 0;
+                    opt[cur].litlen = litlen;
+                    opt[cur].price = price;
+                } else {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
+                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+                }
+            }
+
+            /* Set the repcodes of the current position. We must do it here
+             * because we rely on the repcodes of the 2nd to last sequence being
+             * correct to set the next chunks repcodes during the backward
+             * traversal.
+             */
+            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+            assert(cur >= opt[cur].mlen);
+            if (opt[cur].mlen != 0) {
+                U32 const prev = cur - opt[cur].mlen;
+                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+            } else {
+                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+            }
+
+            /* last match must start at a minimum distance of 8 from oend */
+            if (inr > ilimit) continue;
+
+            if (cur == last_pos) break;
+
+            if ( (optLevel==0) /*static_test*/
+              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
+                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
+            }
+
+            {   U32 const ll0 = (opt[cur].mlen != 0);
+                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
+                U32 const previousPrice = opt[cur].price;
+                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 matchNb;
+
+                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                                  (U32)(inr-istart), (U32)(iend-inr));
+
+                if (!nbMatches) {
+                    DEBUGLOG(7, "rPos:%u : no match found", cur);
+                    continue;
+                }
+
+                {   U32 const maxML = matches[nbMatches-1].len;
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
+                                inr-istart, cur, nbMatches, maxML);
+
+                    if ( (maxML > sufficient_len)
+                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
+                        lastSequence.mlen = maxML;
+                        lastSequence.off = matches[nbMatches-1].off;
+                        lastSequence.litlen = litlen;
+                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
+                        last_pos = cur + ZSTD_totalLen(lastSequence);
+                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
+                        goto _shortestPath;
+                }   }
+
+                /* set prices using matches found at position == cur */
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const lastML = matches[matchNb].len;
+                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
+                    U32 mlen;
+
+                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
+                                matchNb, matches[matchNb].off, lastML, litlen);
+
+                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
+                        U32 const pos = cur + mlen;
+                        int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+
+                        if ((pos > last_pos) || (price < opt[pos].price)) {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
+                            opt[pos].mlen = mlen;
+                            opt[pos].off = offset;
+                            opt[pos].litlen = litlen;
+                            opt[pos].price = price;
+                        } else {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
+                        }
+            }   }   }
+        }  /* for (cur = 1; cur <= last_pos; cur++) */
+
+        lastSequence = opt[last_pos];
+        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
+        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
+
+_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
+        assert(opt[0].mlen == 0);
+
+        /* Set the next chunk's repcodes based on the repcodes of the beginning
+         * of the last match, and the last sequence. This avoids us having to
+         * update them while traversing the sequences.
+         */
+        if (lastSequence.mlen != 0) {
+            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(reps));
+        } else {
+            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+        }
+
+        {   U32 const storeEnd = cur + 1;
+            U32 storeStart = storeEnd;
+            U32 seqPos = cur;
+
+            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
+                        last_pos, cur); (void)last_pos;
+            assert(storeEnd < ZSTD_OPT_NUM);
+            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
+            opt[storeEnd] = lastSequence;
+            while (seqPos > 0) {
+                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+                storeStart--;
+                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
+                opt[storeStart] = opt[seqPos];
+                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+            }
+
+            /* save sequences */
+            DEBUGLOG(6, "sending selected sequences into seqStore")
+            {   U32 storePos;
+                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
+                    U32 const llen = opt[storePos].litlen;
+                    U32 const mlen = opt[storePos].mlen;
+                    U32 const offCode = opt[storePos].off;
+                    U32 const advance = llen + mlen;
+                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
+                                anchor - istart, (unsigned)llen, (unsigned)mlen);
+
+                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
+                        assert(storePos == storeEnd);   /* must be last sequence */
+                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
+                        continue;   /* will finish */
+                    }
+
+                    assert(anchor + llen <= iend);
+                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
+                    anchor += advance;
+                    ip = anchor;
+            }   }
+            ZSTD_setBasePrices(optStatePtr, optLevel);
+        }
+    }   /* while (ip < ilimit) */
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
+}
+
+
+/* used in 2-pass strategy */
+static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
+{
+    U32 s, sum=0;
+    assert(ZSTD_FREQ_DIV+bonus >= 0);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] <<= ZSTD_FREQ_DIV+bonus;
+        table[s]--;
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* used in 2-pass strategy */
+MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
+    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+}
+
+/* ZSTD_initStats_ultra():
+ * make a first compression pass, just to seed stats with more accurate starting values.
+ * only works on first block, with no dictionary and no ldm.
+ * this function cannot error, hence its contract must be respected.
+ */
+static void
+ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                     seqStore_t* seqStore,
+                     U32 rep[ZSTD_REP_NUM],
+               const void* src, size_t srcSize)
+{
+    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
+    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
+
+    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
+    assert(ms->opt.litLengthSum == 0);    /* first block */
+    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
+    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
+    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
+
+    ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
+
+    /* invalidate first scan from history */
+    ZSTD_resetSeqStore(seqStore);
+    ms->window.base -= srcSize;
+    ms->window.dictLimit += (U32)srcSize;
+    ms->window.lowLimit = ms->window.dictLimit;
+    ms->nextToUpdate = ms->window.dictLimit;
+
+    /* re-inforce weight of collected statistics */
+    ZSTD_upscaleStats(&ms->opt);
+}
+
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+
+    /* 2-pass strategy:
+     * this strategy makes a first pass over first block to collect statistics
+     * and seed next round's statistics with it.
+     * After 1st pass, function forgets everything, and starts a new block.
+     * Consequently, this can only work if no data has been previously loaded in tables,
+     * aka, no dictionary, no prefix, no ldm preprocessing.
+     * The compression ratio gain is generally small (~0.5% on first block),
+     * the cost is 2x cpu time on first block. */
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    if ( (ms->opt.litLengthSum==0)   /* first block */
+      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
+      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
+      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+      && (srcSize > ZSTD_PREDEF_THRESHOLD)
+      ) {
+        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
+    }
+
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
+}
+
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
+}
+
+/* note : no btultra2 variant for extDict nor dictMatchState,
+ * because btultra2 is not meant to work with dictionaries
+ * and is only specific for the first block (no prefix) */
diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h
new file mode 100644
index 0000000..22b8628
--- /dev/null
+++ b/lib/zstd/compress/zstd_opt.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_OPT_H
+#define ZSTD_OPT_H
+
+
+#include "zstd_compress_internal.h"
+
+/* used in ZSTD_loadDictionaryContent() */
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+        /* note : no btultra2 variant for extDict nor dictMatchState,
+         * because btultra2 is not meant to work with dictionaries
+         * and is only specific for the first block (no prefix) */
+
+
+#endif /* ZSTD_OPT_H */
diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
deleted file mode 100644
index 66cd487a..0000000
--- a/lib/zstd/decompress.c
+++ /dev/null
@@ -1,2531 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* ***************************************************************
-*  Tuning parameters
-*****************************************************************/
-/*!
-*  MAXWINDOWSIZE_DEFAULT :
-*  maximum window size accepted by DStream, by default.
-*  Frames requiring more memory will be rejected.
-*/
-#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
-#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */
-#endif
-
-/*-*******************************************************
-*  Dependencies
-*********************************************************/
-#include "fse.h"
-#include "huf.h"
-#include "mem.h" /* low level memory routines */
-#include "zstd_internal.h"
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h> /* memcpy, memmove, memset */
-
-#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
-
-/*-*************************************
-*  Macros
-***************************************/
-#define ZSTD_isError ERR_isError /* for inlining */
-#define FSE_isError ERR_isError
-#define HUF_isError ERR_isError
-
-/*_*******************************************************
-*  Memory operations
-**********************************************************/
-static void ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); }
-
-/*-*************************************************************
-*   Context management
-***************************************************************/
-typedef enum {
-	ZSTDds_getFrameHeaderSize,
-	ZSTDds_decodeFrameHeader,
-	ZSTDds_decodeBlockHeader,
-	ZSTDds_decompressBlock,
-	ZSTDds_decompressLastBlock,
-	ZSTDds_checkChecksum,
-	ZSTDds_decodeSkippableHeader,
-	ZSTDds_skipFrame
-} ZSTD_dStage;
-
-typedef struct {
-	FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
-	FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
-	FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
-	HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
-	U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2];
-	U32 rep[ZSTD_REP_NUM];
-} ZSTD_entropyTables_t;
-
-struct ZSTD_DCtx_s {
-	const FSE_DTable *LLTptr;
-	const FSE_DTable *MLTptr;
-	const FSE_DTable *OFTptr;
-	const HUF_DTable *HUFptr;
-	ZSTD_entropyTables_t entropy;
-	const void *previousDstEnd; /* detect continuity */
-	const void *base;	   /* start of curr segment */
-	const void *vBase;	  /* virtual start of previous segment if it was just before curr one */
-	const void *dictEnd;	/* end of previous segment */
-	size_t expected;
-	ZSTD_frameParams fParams;
-	blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
-	ZSTD_dStage stage;
-	U32 litEntropy;
-	U32 fseEntropy;
-	struct xxh64_state xxhState;
-	size_t headerSize;
-	U32 dictID;
-	const BYTE *litPtr;
-	ZSTD_customMem customMem;
-	size_t litSize;
-	size_t rleSize;
-	BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
-	BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
-
-size_t ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); }
-
-size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx)
-{
-	dctx->expected = ZSTD_frameHeaderSize_prefix;
-	dctx->stage = ZSTDds_getFrameHeaderSize;
-	dctx->previousDstEnd = NULL;
-	dctx->base = NULL;
-	dctx->vBase = NULL;
-	dctx->dictEnd = NULL;
-	dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
-	dctx->litEntropy = dctx->fseEntropy = 0;
-	dctx->dictID = 0;
-	ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
-	memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
-	dctx->LLTptr = dctx->entropy.LLTable;
-	dctx->MLTptr = dctx->entropy.MLTable;
-	dctx->OFTptr = dctx->entropy.OFTable;
-	dctx->HUFptr = dctx->entropy.hufTable;
-	return 0;
-}
-
-ZSTD_DCtx *ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_DCtx *dctx;
-
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem);
-	if (!dctx)
-		return NULL;
-	memcpy(&dctx->customMem, &customMem, sizeof(customMem));
-	ZSTD_decompressBegin(dctx);
-	return dctx;
-}
-
-ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	return ZSTD_createDCtx_advanced(stackMem);
-}
-
-size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx)
-{
-	if (dctx == NULL)
-		return 0; /* support free on NULL */
-	ZSTD_free(dctx, dctx->customMem);
-	return 0; /* reserved as a potential error code in the future */
-}
-
-void ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx)
-{
-	size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max;
-	memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
-}
-
-static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict);
-
-/*-*************************************************************
-*   Decompression section
-***************************************************************/
-
-/*! ZSTD_isFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier.
- *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
- *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
- *  Note 3 : Skippable Frame Identifiers are considered valid. */
-unsigned ZSTD_isFrame(const void *buffer, size_t size)
-{
-	if (size < 4)
-		return 0;
-	{
-		U32 const magic = ZSTD_readLE32(buffer);
-		if (magic == ZSTD_MAGICNUMBER)
-			return 1;
-		if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START)
-			return 1;
-	}
-	return 0;
-}
-
-/** ZSTD_frameHeaderSize() :
-*   srcSize must be >= ZSTD_frameHeaderSize_prefix.
-*   @return : size of the Frame Header */
-static size_t ZSTD_frameHeaderSize(const void *src, size_t srcSize)
-{
-	if (srcSize < ZSTD_frameHeaderSize_prefix)
-		return ERROR(srcSize_wrong);
-	{
-		BYTE const fhd = ((const BYTE *)src)[4];
-		U32 const dictID = fhd & 3;
-		U32 const singleSegment = (fhd >> 5) & 1;
-		U32 const fcsId = fhd >> 6;
-		return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId);
-	}
-}
-
-/** ZSTD_getFrameParams() :
-*   decode Frame Header, or require larger `srcSize`.
-*   @return : 0, `fparamsPtr` is correctly filled,
-*            >0, `srcSize` is too small, result is expected `srcSize`,
-*             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize)
-{
-	const BYTE *ip = (const BYTE *)src;
-
-	if (srcSize < ZSTD_frameHeaderSize_prefix)
-		return ZSTD_frameHeaderSize_prefix;
-	if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) {
-		if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-			if (srcSize < ZSTD_skippableHeaderSize)
-				return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */
-			memset(fparamsPtr, 0, sizeof(*fparamsPtr));
-			fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4);
-			fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
-			return 0;
-		}
-		return ERROR(prefix_unknown);
-	}
-
-	/* ensure there is enough `srcSize` to fully read/decode frame header */
-	{
-		size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
-		if (srcSize < fhsize)
-			return fhsize;
-	}
-
-	{
-		BYTE const fhdByte = ip[4];
-		size_t pos = 5;
-		U32 const dictIDSizeCode = fhdByte & 3;
-		U32 const checksumFlag = (fhdByte >> 2) & 1;
-		U32 const singleSegment = (fhdByte >> 5) & 1;
-		U32 const fcsID = fhdByte >> 6;
-		U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
-		U32 windowSize = 0;
-		U32 dictID = 0;
-		U64 frameContentSize = 0;
-		if ((fhdByte & 0x08) != 0)
-			return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */
-		if (!singleSegment) {
-			BYTE const wlByte = ip[pos++];
-			U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-			if (windowLog > ZSTD_WINDOWLOG_MAX)
-				return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */
-			windowSize = (1U << windowLog);
-			windowSize += (windowSize >> 3) * (wlByte & 7);
-		}
-
-		switch (dictIDSizeCode) {
-		default: /* impossible */
-		case 0: break;
-		case 1:
-			dictID = ip[pos];
-			pos++;
-			break;
-		case 2:
-			dictID = ZSTD_readLE16(ip + pos);
-			pos += 2;
-			break;
-		case 3:
-			dictID = ZSTD_readLE32(ip + pos);
-			pos += 4;
-			break;
-		}
-		switch (fcsID) {
-		default: /* impossible */
-		case 0:
-			if (singleSegment)
-				frameContentSize = ip[pos];
-			break;
-		case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break;
-		case 2: frameContentSize = ZSTD_readLE32(ip + pos); break;
-		case 3: frameContentSize = ZSTD_readLE64(ip + pos); break;
-		}
-		if (!windowSize)
-			windowSize = (U32)frameContentSize;
-		if (windowSize > windowSizeMax)
-			return ERROR(frameParameter_windowTooLarge);
-		fparamsPtr->frameContentSize = frameContentSize;
-		fparamsPtr->windowSize = windowSize;
-		fparamsPtr->dictID = dictID;
-		fparamsPtr->checksumFlag = checksumFlag;
-	}
-	return 0;
-}
-
-/** ZSTD_getFrameContentSize() :
-*   compatible with legacy mode
-*   @return : decompressed size of the single frame pointed to be `src` if known, otherwise
-*             - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
-*             - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
-{
-	{
-		ZSTD_frameParams fParams;
-		if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0)
-			return ZSTD_CONTENTSIZE_ERROR;
-		if (fParams.windowSize == 0) {
-			/* Either skippable or empty frame, size == 0 either way */
-			return 0;
-		} else if (fParams.frameContentSize != 0) {
-			return fParams.frameContentSize;
-		} else {
-			return ZSTD_CONTENTSIZE_UNKNOWN;
-		}
-	}
-}
-
-/** ZSTD_findDecompressedSize() :
- *  compatible with legacy mode
- *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
- *      skippable frames
- *  @return : decompressed size of the frames contained */
-unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize)
-{
-	{
-		unsigned long long totalDstSize = 0;
-		while (srcSize >= ZSTD_frameHeaderSize_prefix) {
-			const U32 magicNumber = ZSTD_readLE32(src);
-
-			if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-				size_t skippableSize;
-				if (srcSize < ZSTD_skippableHeaderSize)
-					return ERROR(srcSize_wrong);
-				skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
-				if (srcSize < skippableSize) {
-					return ZSTD_CONTENTSIZE_ERROR;
-				}
-
-				src = (const BYTE *)src + skippableSize;
-				srcSize -= skippableSize;
-				continue;
-			}
-
-			{
-				unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
-				if (ret >= ZSTD_CONTENTSIZE_ERROR)
-					return ret;
-
-				/* check for overflow */
-				if (totalDstSize + ret < totalDstSize)
-					return ZSTD_CONTENTSIZE_ERROR;
-				totalDstSize += ret;
-			}
-			{
-				size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
-				if (ZSTD_isError(frameSrcSize)) {
-					return ZSTD_CONTENTSIZE_ERROR;
-				}
-
-				src = (const BYTE *)src + frameSrcSize;
-				srcSize -= frameSrcSize;
-			}
-		}
-
-		if (srcSize) {
-			return ZSTD_CONTENTSIZE_ERROR;
-		}
-
-		return totalDstSize;
-	}
-}
-
-/** ZSTD_decodeFrameHeader() :
-*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
-*   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize)
-{
-	size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize);
-	if (ZSTD_isError(result))
-		return result; /* invalid header */
-	if (result > 0)
-		return ERROR(srcSize_wrong); /* headerSize too small */
-	if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
-		return ERROR(dictionary_wrong);
-	if (dctx->fParams.checksumFlag)
-		xxh64_reset(&dctx->xxhState, 0);
-	return 0;
-}
-
-typedef struct {
-	blockType_e blockType;
-	U32 lastBlock;
-	U32 origSize;
-} blockProperties_t;
-
-/*! ZSTD_getcBlockSize() :
-*   Provides the size of compressed block from block header `src` */
-size_t ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr)
-{
-	if (srcSize < ZSTD_blockHeaderSize)
-		return ERROR(srcSize_wrong);
-	{
-		U32 const cBlockHeader = ZSTD_readLE24(src);
-		U32 const cSize = cBlockHeader >> 3;
-		bpPtr->lastBlock = cBlockHeader & 1;
-		bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
-		bpPtr->origSize = cSize; /* only useful for RLE */
-		if (bpPtr->blockType == bt_rle)
-			return 1;
-		if (bpPtr->blockType == bt_reserved)
-			return ERROR(corruption_detected);
-		return cSize;
-	}
-}
-
-static size_t ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	if (srcSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memcpy(dst, src, srcSize);
-	return srcSize;
-}
-
-static size_t ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize)
-{
-	if (srcSize != 1)
-		return ERROR(srcSize_wrong);
-	if (regenSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memset(dst, *(const BYTE *)src, regenSize);
-	return regenSize;
-}
-
-/*! ZSTD_decodeLiteralsBlock() :
-	@return : nb of bytes read from src (< srcSize ) */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
-{
-	if (srcSize < MIN_CBLOCK_SIZE)
-		return ERROR(corruption_detected);
-
-	{
-		const BYTE *const istart = (const BYTE *)src;
-		symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
-
-		switch (litEncType) {
-		case set_repeat:
-			if (dctx->litEntropy == 0)
-				return ERROR(dictionary_corrupted);
-			fallthrough;
-		case set_compressed:
-			if (srcSize < 5)
-				return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
-			{
-				size_t lhSize, litSize, litCSize;
-				U32 singleStream = 0;
-				U32 const lhlCode = (istart[0] >> 2) & 3;
-				U32 const lhc = ZSTD_readLE32(istart);
-				switch (lhlCode) {
-				case 0:
-				case 1:
-				default: /* note : default is impossible, since lhlCode into [0..3] */
-					/* 2 - 2 - 10 - 10 */
-					singleStream = !lhlCode;
-					lhSize = 3;
-					litSize = (lhc >> 4) & 0x3FF;
-					litCSize = (lhc >> 14) & 0x3FF;
-					break;
-				case 2:
-					/* 2 - 2 - 14 - 14 */
-					lhSize = 4;
-					litSize = (lhc >> 4) & 0x3FFF;
-					litCSize = lhc >> 18;
-					break;
-				case 3:
-					/* 2 - 2 - 18 - 18 */
-					lhSize = 5;
-					litSize = (lhc >> 4) & 0x3FFFF;
-					litCSize = (lhc >> 22) + (istart[4] << 10);
-					break;
-				}
-				if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-					return ERROR(corruption_detected);
-				if (litCSize + lhSize > srcSize)
-					return ERROR(corruption_detected);
-
-				if (HUF_isError(
-					(litEncType == set_repeat)
-					    ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)
-							    : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr))
-					    : (singleStream
-						   ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
-										 dctx->entropy.workspace, sizeof(dctx->entropy.workspace))
-						   : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
-										   dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
-					return ERROR(corruption_detected);
-
-				dctx->litPtr = dctx->litBuffer;
-				dctx->litSize = litSize;
-				dctx->litEntropy = 1;
-				if (litEncType == set_compressed)
-					dctx->HUFptr = dctx->entropy.hufTable;
-				memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
-				return litCSize + lhSize;
-			}
-
-		case set_basic: {
-			size_t litSize, lhSize;
-			U32 const lhlCode = ((istart[0]) >> 2) & 3;
-			switch (lhlCode) {
-			case 0:
-			case 2:
-			default: /* note : default is impossible, since lhlCode into [0..3] */
-				lhSize = 1;
-				litSize = istart[0] >> 3;
-				break;
-			case 1:
-				lhSize = 2;
-				litSize = ZSTD_readLE16(istart) >> 4;
-				break;
-			case 3:
-				lhSize = 3;
-				litSize = ZSTD_readLE24(istart) >> 4;
-				break;
-			}
-
-			if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
-				if (litSize + lhSize > srcSize)
-					return ERROR(corruption_detected);
-				memcpy(dctx->litBuffer, istart + lhSize, litSize);
-				dctx->litPtr = dctx->litBuffer;
-				dctx->litSize = litSize;
-				memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
-				return lhSize + litSize;
-			}
-			/* direct reference into compressed stream */
-			dctx->litPtr = istart + lhSize;
-			dctx->litSize = litSize;
-			return lhSize + litSize;
-		}
-
-		case set_rle: {
-			U32 const lhlCode = ((istart[0]) >> 2) & 3;
-			size_t litSize, lhSize;
-			switch (lhlCode) {
-			case 0:
-			case 2:
-			default: /* note : default is impossible, since lhlCode into [0..3] */
-				lhSize = 1;
-				litSize = istart[0] >> 3;
-				break;
-			case 1:
-				lhSize = 2;
-				litSize = ZSTD_readLE16(istart) >> 4;
-				break;
-			case 3:
-				lhSize = 3;
-				litSize = ZSTD_readLE24(istart) >> 4;
-				if (srcSize < 4)
-					return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
-				break;
-			}
-			if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-				return ERROR(corruption_detected);
-			memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
-			dctx->litPtr = dctx->litBuffer;
-			dctx->litSize = litSize;
-			return lhSize + 1;
-		}
-		default:
-			return ERROR(corruption_detected); /* impossible */
-		}
-	}
-}
-
-typedef union {
-	FSE_decode_t realData;
-	U32 alignedBy4;
-} FSE_decode_t4;
-
-static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = {
-    {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 4}},		 /* 0 : base, symbol, bits */
-    {{16, 0, 4}},
-    {{32, 1, 5}},
-    {{0, 3, 5}},
-    {{0, 4, 5}},
-    {{0, 6, 5}},
-    {{0, 7, 5}},
-    {{0, 9, 5}},
-    {{0, 10, 5}},
-    {{0, 12, 5}},
-    {{0, 14, 6}},
-    {{0, 16, 5}},
-    {{0, 18, 5}},
-    {{0, 19, 5}},
-    {{0, 21, 5}},
-    {{0, 22, 5}},
-    {{0, 24, 5}},
-    {{32, 25, 5}},
-    {{0, 26, 5}},
-    {{0, 27, 6}},
-    {{0, 29, 6}},
-    {{0, 31, 6}},
-    {{32, 0, 4}},
-    {{0, 1, 4}},
-    {{0, 2, 5}},
-    {{32, 4, 5}},
-    {{0, 5, 5}},
-    {{32, 7, 5}},
-    {{0, 8, 5}},
-    {{32, 10, 5}},
-    {{0, 11, 5}},
-    {{0, 13, 6}},
-    {{32, 16, 5}},
-    {{0, 17, 5}},
-    {{32, 19, 5}},
-    {{0, 20, 5}},
-    {{32, 22, 5}},
-    {{0, 23, 5}},
-    {{0, 25, 4}},
-    {{16, 25, 4}},
-    {{32, 26, 5}},
-    {{0, 28, 6}},
-    {{0, 30, 6}},
-    {{48, 0, 4}},
-    {{16, 1, 4}},
-    {{32, 2, 5}},
-    {{32, 3, 5}},
-    {{32, 5, 5}},
-    {{32, 6, 5}},
-    {{32, 8, 5}},
-    {{32, 9, 5}},
-    {{32, 11, 5}},
-    {{32, 12, 5}},
-    {{0, 15, 6}},
-    {{32, 17, 5}},
-    {{32, 18, 5}},
-    {{32, 20, 5}},
-    {{32, 21, 5}},
-    {{32, 23, 5}},
-    {{32, 24, 5}},
-    {{0, 35, 6}},
-    {{0, 34, 6}},
-    {{0, 33, 6}},
-    {{0, 32, 6}},
-}; /* LL_defaultDTable */
-
-static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = {
-    {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 6}},		 /* 0 : base, symbol, bits */
-    {{0, 1, 4}},
-    {{32, 2, 5}},
-    {{0, 3, 5}},
-    {{0, 5, 5}},
-    {{0, 6, 5}},
-    {{0, 8, 5}},
-    {{0, 10, 6}},
-    {{0, 13, 6}},
-    {{0, 16, 6}},
-    {{0, 19, 6}},
-    {{0, 22, 6}},
-    {{0, 25, 6}},
-    {{0, 28, 6}},
-    {{0, 31, 6}},
-    {{0, 33, 6}},
-    {{0, 35, 6}},
-    {{0, 37, 6}},
-    {{0, 39, 6}},
-    {{0, 41, 6}},
-    {{0, 43, 6}},
-    {{0, 45, 6}},
-    {{16, 1, 4}},
-    {{0, 2, 4}},
-    {{32, 3, 5}},
-    {{0, 4, 5}},
-    {{32, 6, 5}},
-    {{0, 7, 5}},
-    {{0, 9, 6}},
-    {{0, 12, 6}},
-    {{0, 15, 6}},
-    {{0, 18, 6}},
-    {{0, 21, 6}},
-    {{0, 24, 6}},
-    {{0, 27, 6}},
-    {{0, 30, 6}},
-    {{0, 32, 6}},
-    {{0, 34, 6}},
-    {{0, 36, 6}},
-    {{0, 38, 6}},
-    {{0, 40, 6}},
-    {{0, 42, 6}},
-    {{0, 44, 6}},
-    {{32, 1, 4}},
-    {{48, 1, 4}},
-    {{16, 2, 4}},
-    {{32, 4, 5}},
-    {{32, 5, 5}},
-    {{32, 7, 5}},
-    {{32, 8, 5}},
-    {{0, 11, 6}},
-    {{0, 14, 6}},
-    {{0, 17, 6}},
-    {{0, 20, 6}},
-    {{0, 23, 6}},
-    {{0, 26, 6}},
-    {{0, 29, 6}},
-    {{0, 52, 6}},
-    {{0, 51, 6}},
-    {{0, 50, 6}},
-    {{0, 49, 6}},
-    {{0, 48, 6}},
-    {{0, 47, 6}},
-    {{0, 46, 6}},
-}; /* ML_defaultDTable */
-
-static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = {
-    {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 5}},		 /* 0 : base, symbol, bits */
-    {{0, 6, 4}},
-    {{0, 9, 5}},
-    {{0, 15, 5}},
-    {{0, 21, 5}},
-    {{0, 3, 5}},
-    {{0, 7, 4}},
-    {{0, 12, 5}},
-    {{0, 18, 5}},
-    {{0, 23, 5}},
-    {{0, 5, 5}},
-    {{0, 8, 4}},
-    {{0, 14, 5}},
-    {{0, 20, 5}},
-    {{0, 2, 5}},
-    {{16, 7, 4}},
-    {{0, 11, 5}},
-    {{0, 17, 5}},
-    {{0, 22, 5}},
-    {{0, 4, 5}},
-    {{16, 8, 4}},
-    {{0, 13, 5}},
-    {{0, 19, 5}},
-    {{0, 1, 5}},
-    {{16, 6, 4}},
-    {{0, 10, 5}},
-    {{0, 16, 5}},
-    {{0, 28, 5}},
-    {{0, 27, 5}},
-    {{0, 26, 5}},
-    {{0, 25, 5}},
-    {{0, 24, 5}},
-}; /* OF_defaultDTable */
-
-/*! ZSTD_buildSeqTable() :
-	@return : nb bytes read from src,
-			  or an error code if it fails, testable with ZSTD_isError()
-*/
-static size_t ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, symbolEncodingType_e type, U32 max, U32 maxLog, const void *src,
-				 size_t srcSize, const FSE_decode_t4 *defaultTable, U32 flagRepeatTable, void *workspace, size_t workspaceSize)
-{
-	const void *const tmpPtr = defaultTable; /* bypass strict aliasing */
-	switch (type) {
-	case set_rle:
-		if (!srcSize)
-			return ERROR(srcSize_wrong);
-		if ((*(const BYTE *)src) > max)
-			return ERROR(corruption_detected);
-		FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src);
-		*DTablePtr = DTableSpace;
-		return 1;
-	case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0;
-	case set_repeat:
-		if (!flagRepeatTable)
-			return ERROR(corruption_detected);
-		return 0;
-	default: /* impossible */
-	case set_compressed: {
-		U32 tableLog;
-		S16 *norm = (S16 *)workspace;
-		size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
-
-		if ((spaceUsed32 << 2) > workspaceSize)
-			return ERROR(GENERIC);
-		workspace = (U32 *)workspace + spaceUsed32;
-		workspaceSize -= (spaceUsed32 << 2);
-		{
-			size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
-			if (FSE_isError(headerSize))
-				return ERROR(corruption_detected);
-			if (tableLog > maxLog)
-				return ERROR(corruption_detected);
-			FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize);
-			*DTablePtr = DTableSpace;
-			return headerSize;
-		}
-	}
-	}
-}
-
-size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize)
-{
-	const BYTE *const istart = (const BYTE *const)src;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *ip = istart;
-
-	/* check */
-	if (srcSize < MIN_SEQUENCES_SIZE)
-		return ERROR(srcSize_wrong);
-
-	/* SeqHead */
-	{
-		int nbSeq = *ip++;
-		if (!nbSeq) {
-			*nbSeqPtr = 0;
-			return 1;
-		}
-		if (nbSeq > 0x7F) {
-			if (nbSeq == 0xFF) {
-				if (ip + 2 > iend)
-					return ERROR(srcSize_wrong);
-				nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2;
-			} else {
-				if (ip >= iend)
-					return ERROR(srcSize_wrong);
-				nbSeq = ((nbSeq - 0x80) << 8) + *ip++;
-			}
-		}
-		*nbSeqPtr = nbSeq;
-	}
-
-	/* FSE table descriptors */
-	if (ip + 4 > iend)
-		return ERROR(srcSize_wrong); /* minimum possible size */
-	{
-		symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
-		symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
-		symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
-		ip++;
-
-		/* Build DTables */
-		{
-			size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip,
-								  LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-			if (ZSTD_isError(llhSize))
-				return ERROR(corruption_detected);
-			ip += llhSize;
-		}
-		{
-			size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip,
-								  OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-			if (ZSTD_isError(ofhSize))
-				return ERROR(corruption_detected);
-			ip += ofhSize;
-		}
-		{
-			size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip,
-								  ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-			if (ZSTD_isError(mlhSize))
-				return ERROR(corruption_detected);
-			ip += mlhSize;
-		}
-	}
-
-	return ip - istart;
-}
-
-typedef struct {
-	size_t litLength;
-	size_t matchLength;
-	size_t offset;
-	const BYTE *match;
-} seq_t;
-
-typedef struct {
-	BIT_DStream_t DStream;
-	FSE_DState_t stateLL;
-	FSE_DState_t stateOffb;
-	FSE_DState_t stateML;
-	size_t prevOffset[ZSTD_REP_NUM];
-	const BYTE *base;
-	size_t pos;
-	uPtrDiff gotoDict;
-} seqState_t;
-
-FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-			      const BYTE *const vBase, const BYTE *const dictEnd)
-{
-	BYTE *const oLitEnd = op + sequence.litLength;
-	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-	const BYTE *match = oLitEnd - sequence.offset;
-
-	/* check */
-	if (oMatchEnd > oend)
-		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-	if (iLitEnd > litLimit)
-		return ERROR(corruption_detected); /* over-read beyond lit buffer */
-	if (oLitEnd <= oend_w)
-		return ERROR(GENERIC); /* Precondition */
-
-	/* copy literals */
-	if (op < oend_w) {
-		ZSTD_wildcopy(op, *litPtr, oend_w - op);
-		*litPtr += oend_w - op;
-		op = oend_w;
-	}
-	while (op < oLitEnd)
-		*op++ = *(*litPtr)++;
-
-	/* copy Match */
-	if (sequence.offset > (size_t)(oLitEnd - base)) {
-		/* offset beyond prefix */
-		if (sequence.offset > (size_t)(oLitEnd - vBase))
-			return ERROR(corruption_detected);
-		match = dictEnd - (base - match);
-		if (match + sequence.matchLength <= dictEnd) {
-			memmove(oLitEnd, match, sequence.matchLength);
-			return sequenceLength;
-		}
-		/* span extDict & currPrefixSegment */
-		{
-			size_t const length1 = dictEnd - match;
-			memmove(oLitEnd, match, length1);
-			op = oLitEnd + length1;
-			sequence.matchLength -= length1;
-			match = base;
-		}
-	}
-	while (op < oMatchEnd)
-		*op++ = *match++;
-	return sequenceLength;
-}
-
-static seq_t ZSTD_decodeSequence(seqState_t *seqState)
-{
-	seq_t seq;
-
-	U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-	U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-	U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
-
-	U32 const llBits = LL_bits[llCode];
-	U32 const mlBits = ML_bits[mlCode];
-	U32 const ofBits = ofCode;
-	U32 const totalBits = llBits + mlBits + ofBits;
-
-	static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
-					       20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
-
-	static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
-					       21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
-					       43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
-
-	static const U32 OF_base[MaxOff + 1] = {0,       1,	1,	5,	0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
-						0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
-						0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
-
-	/* sequence */
-	{
-		size_t offset;
-		if (!ofCode)
-			offset = 0;
-		else {
-			offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-			if (ZSTD_32bits())
-				BIT_reloadDStream(&seqState->DStream);
-		}
-
-		if (ofCode <= 1) {
-			offset += (llCode == 0);
-			if (offset) {
-				size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-				temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
-				if (offset != 1)
-					seqState->prevOffset[2] = seqState->prevOffset[1];
-				seqState->prevOffset[1] = seqState->prevOffset[0];
-				seqState->prevOffset[0] = offset = temp;
-			} else {
-				offset = seqState->prevOffset[0];
-			}
-		} else {
-			seqState->prevOffset[2] = seqState->prevOffset[1];
-			seqState->prevOffset[1] = seqState->prevOffset[0];
-			seqState->prevOffset[0] = offset;
-		}
-		seq.offset = offset;
-	}
-
-	seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() && (mlBits + llBits > 24))
-		BIT_reloadDStream(&seqState->DStream);
-
-	seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-		BIT_reloadDStream(&seqState->DStream);
-
-	/* ANS state update */
-	FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
-	FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
-	if (ZSTD_32bits())
-		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
-	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
-
-	seq.match = NULL;
-
-	return seq;
-}
-
-FORCE_INLINE
-size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-			 const BYTE *const vBase, const BYTE *const dictEnd)
-{
-	BYTE *const oLitEnd = op + sequence.litLength;
-	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-	const BYTE *match = oLitEnd - sequence.offset;
-
-	/* check */
-	if (oMatchEnd > oend)
-		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-	if (iLitEnd > litLimit)
-		return ERROR(corruption_detected); /* over-read beyond lit buffer */
-	if (oLitEnd > oend_w)
-		return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
-
-	/* copy Literals */
-	ZSTD_copy8(op, *litPtr);
-	if (sequence.litLength > 8)
-		ZSTD_wildcopy(op + 8, (*litPtr) + 8,
-			      sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-	op = oLitEnd;
-	*litPtr = iLitEnd; /* update for next sequence */
-
-	/* copy Match */
-	if (sequence.offset > (size_t)(oLitEnd - base)) {
-		/* offset beyond prefix */
-		if (sequence.offset > (size_t)(oLitEnd - vBase))
-			return ERROR(corruption_detected);
-		match = dictEnd + (match - base);
-		if (match + sequence.matchLength <= dictEnd) {
-			memmove(oLitEnd, match, sequence.matchLength);
-			return sequenceLength;
-		}
-		/* span extDict & currPrefixSegment */
-		{
-			size_t const length1 = dictEnd - match;
-			memmove(oLitEnd, match, length1);
-			op = oLitEnd + length1;
-			sequence.matchLength -= length1;
-			match = base;
-			if (op > oend_w || sequence.matchLength < MINMATCH) {
-				U32 i;
-				for (i = 0; i < sequence.matchLength; ++i)
-					op[i] = match[i];
-				return sequenceLength;
-			}
-		}
-	}
-	/* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
-	/* match within prefix */
-	if (sequence.offset < 8) {
-		/* close range match, overlap */
-		static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-		static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
-		int const sub2 = dec64table[sequence.offset];
-		op[0] = match[0];
-		op[1] = match[1];
-		op[2] = match[2];
-		op[3] = match[3];
-		match += dec32table[sequence.offset];
-		ZSTD_copy4(op + 4, match);
-		match -= sub2;
-	} else {
-		ZSTD_copy8(op, match);
-	}
-	op += 8;
-	match += 8;
-
-	if (oMatchEnd > oend - (16 - MINMATCH)) {
-		if (op < oend_w) {
-			ZSTD_wildcopy(op, match, oend_w - op);
-			match += oend_w - op;
-			op = oend_w;
-		}
-		while (op < oMatchEnd)
-			*op++ = *match++;
-	} else {
-		ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
-	}
-	return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
-{
-	const BYTE *ip = (const BYTE *)seqStart;
-	const BYTE *const iend = ip + seqSize;
-	BYTE *const ostart = (BYTE * const)dst;
-	BYTE *const oend = ostart + maxDstSize;
-	BYTE *op = ostart;
-	const BYTE *litPtr = dctx->litPtr;
-	const BYTE *const litEnd = litPtr + dctx->litSize;
-	const BYTE *const base = (const BYTE *)(dctx->base);
-	const BYTE *const vBase = (const BYTE *)(dctx->vBase);
-	const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
-	int nbSeq;
-
-	/* Build Decoding Tables */
-	{
-		size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
-		if (ZSTD_isError(seqHSize))
-			return seqHSize;
-		ip += seqHSize;
-	}
-
-	/* Regen sequences */
-	if (nbSeq) {
-		seqState_t seqState;
-		dctx->fseEntropy = 1;
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				seqState.prevOffset[i] = dctx->entropy.rep[i];
-		}
-		CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
-		FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-		FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-		FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
-		for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) {
-			nbSeq--;
-			{
-				seq_t const sequence = ZSTD_decodeSequence(&seqState);
-				size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
-				if (ZSTD_isError(oneSeqSize))
-					return oneSeqSize;
-				op += oneSeqSize;
-			}
-		}
-
-		/* check if reached exact end */
-		if (nbSeq)
-			return ERROR(corruption_detected);
-		/* save reps for next block */
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
-		}
-	}
-
-	/* last literal segment */
-	{
-		size_t const lastLLSize = litEnd - litPtr;
-		if (lastLLSize > (size_t)(oend - op))
-			return ERROR(dstSize_tooSmall);
-		memcpy(op, litPtr, lastLLSize);
-		op += lastLLSize;
-	}
-
-	return op - ostart;
-}
-
-FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets)
-{
-	seq_t seq;
-
-	U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-	U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-	U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
-
-	U32 const llBits = LL_bits[llCode];
-	U32 const mlBits = ML_bits[mlCode];
-	U32 const ofBits = ofCode;
-	U32 const totalBits = llBits + mlBits + ofBits;
-
-	static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
-					       20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
-
-	static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
-					       21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
-					       43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
-
-	static const U32 OF_base[MaxOff + 1] = {0,       1,	1,	5,	0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
-						0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
-						0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
-
-	/* sequence */
-	{
-		size_t offset;
-		if (!ofCode)
-			offset = 0;
-		else {
-			if (longOffsets) {
-				int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
-				offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
-				if (ZSTD_32bits() || extraBits)
-					BIT_reloadDStream(&seqState->DStream);
-				if (extraBits)
-					offset += BIT_readBitsFast(&seqState->DStream, extraBits);
-			} else {
-				offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-				if (ZSTD_32bits())
-					BIT_reloadDStream(&seqState->DStream);
-			}
-		}
-
-		if (ofCode <= 1) {
-			offset += (llCode == 0);
-			if (offset) {
-				size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-				temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
-				if (offset != 1)
-					seqState->prevOffset[2] = seqState->prevOffset[1];
-				seqState->prevOffset[1] = seqState->prevOffset[0];
-				seqState->prevOffset[0] = offset = temp;
-			} else {
-				offset = seqState->prevOffset[0];
-			}
-		} else {
-			seqState->prevOffset[2] = seqState->prevOffset[1];
-			seqState->prevOffset[1] = seqState->prevOffset[0];
-			seqState->prevOffset[0] = offset;
-		}
-		seq.offset = offset;
-	}
-
-	seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() && (mlBits + llBits > 24))
-		BIT_reloadDStream(&seqState->DStream);
-
-	seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-		BIT_reloadDStream(&seqState->DStream);
-
-	{
-		size_t const pos = seqState->pos + seq.litLength;
-		seq.match = seqState->base + pos - seq.offset; /* single memory segment */
-		if (seq.offset > pos)
-			seq.match += seqState->gotoDict; /* separate memory segment */
-		seqState->pos = pos + seq.matchLength;
-	}
-
-	/* ANS state update */
-	FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
-	FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
-	if (ZSTD_32bits())
-		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
-	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
-
-	return seq;
-}
-
-static seq_t ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize)
-{
-	if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
-		return ZSTD_decodeSequenceLong_generic(seqState, 1);
-	} else {
-		return ZSTD_decodeSequenceLong_generic(seqState, 0);
-	}
-}
-
-FORCE_INLINE
-size_t ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-			     const BYTE *const vBase, const BYTE *const dictEnd)
-{
-	BYTE *const oLitEnd = op + sequence.litLength;
-	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-	const BYTE *match = sequence.match;
-
-	/* check */
-	if (oMatchEnd > oend)
-		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-	if (iLitEnd > litLimit)
-		return ERROR(corruption_detected); /* over-read beyond lit buffer */
-	if (oLitEnd > oend_w)
-		return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
-
-	/* copy Literals */
-	ZSTD_copy8(op, *litPtr);
-	if (sequence.litLength > 8)
-		ZSTD_wildcopy(op + 8, (*litPtr) + 8,
-			      sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-	op = oLitEnd;
-	*litPtr = iLitEnd; /* update for next sequence */
-
-	/* copy Match */
-	if (sequence.offset > (size_t)(oLitEnd - base)) {
-		/* offset beyond prefix */
-		if (sequence.offset > (size_t)(oLitEnd - vBase))
-			return ERROR(corruption_detected);
-		if (match + sequence.matchLength <= dictEnd) {
-			memmove(oLitEnd, match, sequence.matchLength);
-			return sequenceLength;
-		}
-		/* span extDict & currPrefixSegment */
-		{
-			size_t const length1 = dictEnd - match;
-			memmove(oLitEnd, match, length1);
-			op = oLitEnd + length1;
-			sequence.matchLength -= length1;
-			match = base;
-			if (op > oend_w || sequence.matchLength < MINMATCH) {
-				U32 i;
-				for (i = 0; i < sequence.matchLength; ++i)
-					op[i] = match[i];
-				return sequenceLength;
-			}
-		}
-	}
-	/* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
-	/* match within prefix */
-	if (sequence.offset < 8) {
-		/* close range match, overlap */
-		static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-		static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
-		int const sub2 = dec64table[sequence.offset];
-		op[0] = match[0];
-		op[1] = match[1];
-		op[2] = match[2];
-		op[3] = match[3];
-		match += dec32table[sequence.offset];
-		ZSTD_copy4(op + 4, match);
-		match -= sub2;
-	} else {
-		ZSTD_copy8(op, match);
-	}
-	op += 8;
-	match += 8;
-
-	if (oMatchEnd > oend - (16 - MINMATCH)) {
-		if (op < oend_w) {
-			ZSTD_wildcopy(op, match, oend_w - op);
-			match += oend_w - op;
-			op = oend_w;
-		}
-		while (op < oMatchEnd)
-			*op++ = *match++;
-	} else {
-		ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
-	}
-	return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
-{
-	const BYTE *ip = (const BYTE *)seqStart;
-	const BYTE *const iend = ip + seqSize;
-	BYTE *const ostart = (BYTE * const)dst;
-	BYTE *const oend = ostart + maxDstSize;
-	BYTE *op = ostart;
-	const BYTE *litPtr = dctx->litPtr;
-	const BYTE *const litEnd = litPtr + dctx->litSize;
-	const BYTE *const base = (const BYTE *)(dctx->base);
-	const BYTE *const vBase = (const BYTE *)(dctx->vBase);
-	const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
-	unsigned const windowSize = dctx->fParams.windowSize;
-	int nbSeq;
-
-	/* Build Decoding Tables */
-	{
-		size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
-		if (ZSTD_isError(seqHSize))
-			return seqHSize;
-		ip += seqHSize;
-	}
-
-	/* Regen sequences */
-	if (nbSeq) {
-#define STORED_SEQS 4
-#define STOSEQ_MASK (STORED_SEQS - 1)
-#define ADVANCED_SEQS 4
-		seq_t *sequences = (seq_t *)dctx->entropy.workspace;
-		int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
-		seqState_t seqState;
-		int seqNb;
-		ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS);
-		dctx->fseEntropy = 1;
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				seqState.prevOffset[i] = dctx->entropy.rep[i];
-		}
-		seqState.base = base;
-		seqState.pos = (size_t)(op - base);
-		seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */
-		CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
-		FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-		FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-		FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
-		/* prepare in advance */
-		for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) {
-			sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize);
-		}
-		if (seqNb < seqAdvance)
-			return ERROR(corruption_detected);
-
-		/* decode and decompress */
-		for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) {
-			seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize);
-			size_t const oneSeqSize =
-			    ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
-			if (ZSTD_isError(oneSeqSize))
-				return oneSeqSize;
-			ZSTD_PREFETCH(sequence.match);
-			sequences[seqNb & STOSEQ_MASK] = sequence;
-			op += oneSeqSize;
-		}
-		if (seqNb < nbSeq)
-			return ERROR(corruption_detected);
-
-		/* finish queue */
-		seqNb -= seqAdvance;
-		for (; seqNb < nbSeq; seqNb++) {
-			size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
-			if (ZSTD_isError(oneSeqSize))
-				return oneSeqSize;
-			op += oneSeqSize;
-		}
-
-		/* save reps for next block */
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
-		}
-	}
-
-	/* last literal segment */
-	{
-		size_t const lastLLSize = litEnd - litPtr;
-		if (lastLLSize > (size_t)(oend - op))
-			return ERROR(dstSize_tooSmall);
-		memcpy(op, litPtr, lastLLSize);
-		op += lastLLSize;
-	}
-
-	return op - ostart;
-}
-
-static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{ /* blockType == blockCompressed */
-	const BYTE *ip = (const BYTE *)src;
-
-	if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-		return ERROR(srcSize_wrong);
-
-	/* Decode literals section */
-	{
-		size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
-		if (ZSTD_isError(litCSize))
-			return litCSize;
-		ip += litCSize;
-		srcSize -= litCSize;
-	}
-	if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
-				/* likely because of register pressure */
-				/* if that's the correct cause, then 32-bits ARM should be affected differently */
-				/* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
-		if (dctx->fParams.windowSize > (1 << 23))
-			return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
-	return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
-}
-
-static void ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst)
-{
-	if (dst != dctx->previousDstEnd) { /* not contiguous */
-		dctx->dictEnd = dctx->previousDstEnd;
-		dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
-		dctx->base = dst;
-		dctx->previousDstEnd = dst;
-	}
-}
-
-size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t dSize;
-	ZSTD_checkContinuity(dctx, dst);
-	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
-	dctx->previousDstEnd = (char *)dst + dSize;
-	return dSize;
-}
-
-/** ZSTD_insertBlock() :
-	insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
-size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize)
-{
-	ZSTD_checkContinuity(dctx, blockStart);
-	dctx->previousDstEnd = (const char *)blockStart + blockSize;
-	return blockSize;
-}
-
-size_t ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length)
-{
-	if (length > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memset(dst, byte, length);
-	return length;
-}
-
-/** ZSTD_findFrameCompressedSize() :
- *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
- *  `srcSize` must be at least as large as the frame contained
- *  @return : the compressed size of the frame starting at `src` */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
-{
-	if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-		return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4);
-	} else {
-		const BYTE *ip = (const BYTE *)src;
-		const BYTE *const ipstart = ip;
-		size_t remainingSize = srcSize;
-		ZSTD_frameParams fParams;
-
-		size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize);
-		if (ZSTD_isError(headerSize))
-			return headerSize;
-
-		/* Frame Header */
-		{
-			size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize);
-			if (ZSTD_isError(ret))
-				return ret;
-			if (ret > 0)
-				return ERROR(srcSize_wrong);
-		}
-
-		ip += headerSize;
-		remainingSize -= headerSize;
-
-		/* Loop on each block */
-		while (1) {
-			blockProperties_t blockProperties;
-			size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-			if (ZSTD_isError(cBlockSize))
-				return cBlockSize;
-
-			if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
-				return ERROR(srcSize_wrong);
-
-			ip += ZSTD_blockHeaderSize + cBlockSize;
-			remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
-
-			if (blockProperties.lastBlock)
-				break;
-		}
-
-		if (fParams.checksumFlag) { /* Frame content checksum */
-			if (remainingSize < 4)
-				return ERROR(srcSize_wrong);
-			ip += 4;
-			remainingSize -= 4;
-		}
-
-		return ip - ipstart;
-	}
-}
-
-/*! ZSTD_decompressFrame() :
-*   @dctx must be properly initialized */
-static size_t ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr)
-{
-	const BYTE *ip = (const BYTE *)(*srcPtr);
-	BYTE *const ostart = (BYTE * const)dst;
-	BYTE *const oend = ostart + dstCapacity;
-	BYTE *op = ostart;
-	size_t remainingSize = *srcSizePtr;
-
-	/* check */
-	if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize)
-		return ERROR(srcSize_wrong);
-
-	/* Frame Header */
-	{
-		size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
-		if (ZSTD_isError(frameHeaderSize))
-			return frameHeaderSize;
-		if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize)
-			return ERROR(srcSize_wrong);
-		CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize));
-		ip += frameHeaderSize;
-		remainingSize -= frameHeaderSize;
-	}
-
-	/* Loop on each block */
-	while (1) {
-		size_t decodedSize;
-		blockProperties_t blockProperties;
-		size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-		if (ZSTD_isError(cBlockSize))
-			return cBlockSize;
-
-		ip += ZSTD_blockHeaderSize;
-		remainingSize -= ZSTD_blockHeaderSize;
-		if (cBlockSize > remainingSize)
-			return ERROR(srcSize_wrong);
-
-		switch (blockProperties.blockType) {
-		case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break;
-		case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break;
-		case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break;
-		case bt_reserved:
-		default: return ERROR(corruption_detected);
-		}
-
-		if (ZSTD_isError(decodedSize))
-			return decodedSize;
-		if (dctx->fParams.checksumFlag)
-			xxh64_update(&dctx->xxhState, op, decodedSize);
-		op += decodedSize;
-		ip += cBlockSize;
-		remainingSize -= cBlockSize;
-		if (blockProperties.lastBlock)
-			break;
-	}
-
-	if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
-		U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
-		U32 checkRead;
-		if (remainingSize < 4)
-			return ERROR(checksum_wrong);
-		checkRead = ZSTD_readLE32(ip);
-		if (checkRead != checkCalc)
-			return ERROR(checksum_wrong);
-		ip += 4;
-		remainingSize -= 4;
-	}
-
-	/* Allow caller to get size read */
-	*srcPtr = ip;
-	*srcSizePtr = remainingSize;
-	return op - ostart;
-}
-
-static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict);
-static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict);
-
-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-					const ZSTD_DDict *ddict)
-{
-	void *const dststart = dst;
-
-	if (ddict) {
-		if (dict) {
-			/* programmer error, these two cases should be mutually exclusive */
-			return ERROR(GENERIC);
-		}
-
-		dict = ZSTD_DDictDictContent(ddict);
-		dictSize = ZSTD_DDictDictSize(ddict);
-	}
-
-	while (srcSize >= ZSTD_frameHeaderSize_prefix) {
-		U32 magicNumber;
-
-		magicNumber = ZSTD_readLE32(src);
-		if (magicNumber != ZSTD_MAGICNUMBER) {
-			if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-				size_t skippableSize;
-				if (srcSize < ZSTD_skippableHeaderSize)
-					return ERROR(srcSize_wrong);
-				skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
-				if (srcSize < skippableSize) {
-					return ERROR(srcSize_wrong);
-				}
-
-				src = (const BYTE *)src + skippableSize;
-				srcSize -= skippableSize;
-				continue;
-			} else {
-				return ERROR(prefix_unknown);
-			}
-		}
-
-		if (ddict) {
-			/* we were called from ZSTD_decompress_usingDDict */
-			ZSTD_refDDict(dctx, ddict);
-		} else {
-			/* this will initialize correctly with no dict if dict == NULL, so
-			 * use this in all cases but ddict */
-			CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
-		}
-		ZSTD_checkContinuity(dctx, dst);
-
-		{
-			const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize);
-			if (ZSTD_isError(res))
-				return res;
-			/* don't need to bounds check this, ZSTD_decompressFrame will have
-			 * already */
-			dst = (BYTE *)dst + res;
-			dstCapacity -= res;
-		}
-	}
-
-	if (srcSize)
-		return ERROR(srcSize_wrong); /* input not entirely consumed */
-
-	return (BYTE *)dst - (BYTE *)dststart;
-}
-
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize)
-{
-	return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
-}
-
-size_t ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
-}
-
-/*-**************************************
-*   Advanced Streaming Decompression API
-*   Bufferless and synchronous
-****************************************/
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; }
-
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx)
-{
-	switch (dctx->stage) {
-	default: /* should not happen */
-	case ZSTDds_getFrameHeaderSize:
-	case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader;
-	case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader;
-	case ZSTDds_decompressBlock: return ZSTDnit_block;
-	case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock;
-	case ZSTDds_checkChecksum: return ZSTDnit_checksum;
-	case ZSTDds_decodeSkippableHeader:
-	case ZSTDds_skipFrame: return ZSTDnit_skippableFrame;
-	}
-}
-
-int ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */
-
-/** ZSTD_decompressContinue() :
-*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
-*             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	/* Sanity check */
-	if (srcSize != dctx->expected)
-		return ERROR(srcSize_wrong);
-	if (dstCapacity)
-		ZSTD_checkContinuity(dctx, dst);
-
-	switch (dctx->stage) {
-	case ZSTDds_getFrameHeaderSize:
-		if (srcSize != ZSTD_frameHeaderSize_prefix)
-			return ERROR(srcSize_wrong);					/* impossible */
-		if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
-			memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
-			dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */
-			dctx->stage = ZSTDds_decodeSkippableHeader;
-			return 0;
-		}
-		dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix);
-		if (ZSTD_isError(dctx->headerSize))
-			return dctx->headerSize;
-		memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
-		if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) {
-			dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix;
-			dctx->stage = ZSTDds_decodeFrameHeader;
-			return 0;
-		}
-		dctx->expected = 0; /* not necessary to copy more */
-		fallthrough;
-
-	case ZSTDds_decodeFrameHeader:
-		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
-		CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
-		dctx->expected = ZSTD_blockHeaderSize;
-		dctx->stage = ZSTDds_decodeBlockHeader;
-		return 0;
-
-	case ZSTDds_decodeBlockHeader: {
-		blockProperties_t bp;
-		size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
-		if (ZSTD_isError(cBlockSize))
-			return cBlockSize;
-		dctx->expected = cBlockSize;
-		dctx->bType = bp.blockType;
-		dctx->rleSize = bp.origSize;
-		if (cBlockSize) {
-			dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
-			return 0;
-		}
-		/* empty block */
-		if (bp.lastBlock) {
-			if (dctx->fParams.checksumFlag) {
-				dctx->expected = 4;
-				dctx->stage = ZSTDds_checkChecksum;
-			} else {
-				dctx->expected = 0; /* end of frame */
-				dctx->stage = ZSTDds_getFrameHeaderSize;
-			}
-		} else {
-			dctx->expected = 3; /* go directly to next header */
-			dctx->stage = ZSTDds_decodeBlockHeader;
-		}
-		return 0;
-	}
-	case ZSTDds_decompressLastBlock:
-	case ZSTDds_decompressBlock: {
-		size_t rSize;
-		switch (dctx->bType) {
-		case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break;
-		case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break;
-		case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break;
-		case bt_reserved: /* should never happen */
-		default: return ERROR(corruption_detected);
-		}
-		if (ZSTD_isError(rSize))
-			return rSize;
-		if (dctx->fParams.checksumFlag)
-			xxh64_update(&dctx->xxhState, dst, rSize);
-
-		if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
-			if (dctx->fParams.checksumFlag) {	/* another round for frame checksum */
-				dctx->expected = 4;
-				dctx->stage = ZSTDds_checkChecksum;
-			} else {
-				dctx->expected = 0; /* ends here */
-				dctx->stage = ZSTDds_getFrameHeaderSize;
-			}
-		} else {
-			dctx->stage = ZSTDds_decodeBlockHeader;
-			dctx->expected = ZSTD_blockHeaderSize;
-			dctx->previousDstEnd = (char *)dst + rSize;
-		}
-		return rSize;
-	}
-	case ZSTDds_checkChecksum: {
-		U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
-		U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */
-		if (check32 != h32)
-			return ERROR(checksum_wrong);
-		dctx->expected = 0;
-		dctx->stage = ZSTDds_getFrameHeaderSize;
-		return 0;
-	}
-	case ZSTDds_decodeSkippableHeader: {
-		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
-		dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4);
-		dctx->stage = ZSTDds_skipFrame;
-		return 0;
-	}
-	case ZSTDds_skipFrame: {
-		dctx->expected = 0;
-		dctx->stage = ZSTDds_getFrameHeaderSize;
-		return 0;
-	}
-	default:
-		return ERROR(GENERIC); /* impossible */
-	}
-}
-
-static size_t ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-	dctx->dictEnd = dctx->previousDstEnd;
-	dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
-	dctx->base = dict;
-	dctx->previousDstEnd = (const char *)dict + dictSize;
-	return 0;
-}
-
-/* ZSTD_loadEntropy() :
- * dict : must point at beginning of a valid zstd dictionary
- * @return : size of entropy tables read */
-static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize)
-{
-	const BYTE *dictPtr = (const BYTE *)dict;
-	const BYTE *const dictEnd = dictPtr + dictSize;
-
-	if (dictSize <= 8)
-		return ERROR(dictionary_corrupted);
-	dictPtr += 8; /* skip header = magic + dictID */
-
-	{
-		size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace));
-		if (HUF_isError(hSize))
-			return ERROR(dictionary_corrupted);
-		dictPtr += hSize;
-	}
-
-	{
-		short offcodeNCount[MaxOff + 1];
-		U32 offcodeMaxValue = MaxOff, offcodeLog;
-		size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(offcodeHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (offcodeLog > OffFSELog)
-			return ERROR(dictionary_corrupted);
-		CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-		dictPtr += offcodeHeaderSize;
-	}
-
-	{
-		short matchlengthNCount[MaxML + 1];
-		unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-		size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(matchlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (matchlengthLog > MLFSELog)
-			return ERROR(dictionary_corrupted);
-		CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-		dictPtr += matchlengthHeaderSize;
-	}
-
-	{
-		short litlengthNCount[MaxLL + 1];
-		unsigned litlengthMaxValue = MaxLL, litlengthLog;
-		size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(litlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (litlengthLog > LLFSELog)
-			return ERROR(dictionary_corrupted);
-		CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-		dictPtr += litlengthHeaderSize;
-	}
-
-	if (dictPtr + 12 > dictEnd)
-		return ERROR(dictionary_corrupted);
-	{
-		int i;
-		size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12));
-		for (i = 0; i < 3; i++) {
-			U32 const rep = ZSTD_readLE32(dictPtr);
-			dictPtr += 4;
-			if (rep == 0 || rep >= dictContentSize)
-				return ERROR(dictionary_corrupted);
-			entropy->rep[i] = rep;
-		}
-	}
-
-	return dictPtr - (const BYTE *)dict;
-}
-
-static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-	if (dictSize < 8)
-		return ZSTD_refDictContent(dctx, dict, dictSize);
-	{
-		U32 const magic = ZSTD_readLE32(dict);
-		if (magic != ZSTD_DICT_MAGIC) {
-			return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
-		}
-	}
-	dctx->dictID = ZSTD_readLE32((const char *)dict + 4);
-
-	/* load entropy tables */
-	{
-		size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
-		if (ZSTD_isError(eSize))
-			return ERROR(dictionary_corrupted);
-		dict = (const char *)dict + eSize;
-		dictSize -= eSize;
-	}
-	dctx->litEntropy = dctx->fseEntropy = 1;
-
-	/* reference dictionary content */
-	return ZSTD_refDictContent(dctx, dict, dictSize);
-}
-
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-	CHECK_F(ZSTD_decompressBegin(dctx));
-	if (dict && dictSize)
-		CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
-	return 0;
-}
-
-/* ======   ZSTD_DDict   ====== */
-
-struct ZSTD_DDict_s {
-	void *dictBuffer;
-	const void *dictContent;
-	size_t dictSize;
-	ZSTD_entropyTables_t entropy;
-	U32 dictID;
-	U32 entropyPresent;
-	ZSTD_customMem cMem;
-}; /* typedef'd to ZSTD_DDict within "zstd.h" */
-
-size_t ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); }
-
-static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; }
-
-static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; }
-
-static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict)
-{
-	ZSTD_decompressBegin(dstDCtx); /* init */
-	if (ddict) {		       /* support refDDict on NULL */
-		dstDCtx->dictID = ddict->dictID;
-		dstDCtx->base = ddict->dictContent;
-		dstDCtx->vBase = ddict->dictContent;
-		dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize;
-		dstDCtx->previousDstEnd = dstDCtx->dictEnd;
-		if (ddict->entropyPresent) {
-			dstDCtx->litEntropy = 1;
-			dstDCtx->fseEntropy = 1;
-			dstDCtx->LLTptr = ddict->entropy.LLTable;
-			dstDCtx->MLTptr = ddict->entropy.MLTable;
-			dstDCtx->OFTptr = ddict->entropy.OFTable;
-			dstDCtx->HUFptr = ddict->entropy.hufTable;
-			dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
-			dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
-			dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
-		} else {
-			dstDCtx->litEntropy = 0;
-			dstDCtx->fseEntropy = 0;
-		}
-	}
-}
-
-static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict)
-{
-	ddict->dictID = 0;
-	ddict->entropyPresent = 0;
-	if (ddict->dictSize < 8)
-		return 0;
-	{
-		U32 const magic = ZSTD_readLE32(ddict->dictContent);
-		if (magic != ZSTD_DICT_MAGIC)
-			return 0; /* pure content mode */
-	}
-	ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4);
-
-	/* load entropy tables */
-	CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted);
-	ddict->entropyPresent = 1;
-	return 0;
-}
-
-static ZSTD_DDict *ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
-{
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	{
-		ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
-		if (!ddict)
-			return NULL;
-		ddict->cMem = customMem;
-
-		if ((byReference) || (!dict) || (!dictSize)) {
-			ddict->dictBuffer = NULL;
-			ddict->dictContent = dict;
-		} else {
-			void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
-			if (!internalBuffer) {
-				ZSTD_freeDDict(ddict);
-				return NULL;
-			}
-			memcpy(internalBuffer, dict, dictSize);
-			ddict->dictBuffer = internalBuffer;
-			ddict->dictContent = internalBuffer;
-		}
-		ddict->dictSize = dictSize;
-		ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
-		/* parse dictionary content */
-		{
-			size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict);
-			if (ZSTD_isError(errorCode)) {
-				ZSTD_freeDDict(ddict);
-				return NULL;
-			}
-		}
-
-		return ddict;
-	}
-}
-
-/*! ZSTD_initDDict() :
-*   Create a digested dictionary, to start decompression without startup delay.
-*   `dict` content is copied inside DDict.
-*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
-ZSTD_DDict *ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem);
-}
-
-size_t ZSTD_freeDDict(ZSTD_DDict *ddict)
-{
-	if (ddict == NULL)
-		return 0; /* support free on NULL */
-	{
-		ZSTD_customMem const cMem = ddict->cMem;
-		ZSTD_free(ddict->dictBuffer, cMem);
-		ZSTD_free(ddict, cMem);
-		return 0;
-	}
-}
-
-/*! ZSTD_getDictID_fromDict() :
- *  Provides the dictID stored within dictionary.
- *  if @return == 0, the dictionary is not conformant with Zstandard specification.
- *  It can still be loaded, but as a content-only dictionary. */
-unsigned ZSTD_getDictID_fromDict(const void *dict, size_t dictSize)
-{
-	if (dictSize < 8)
-		return 0;
-	if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC)
-		return 0;
-	return ZSTD_readLE32((const char *)dict + 4);
-}
-
-/*! ZSTD_getDictID_fromDDict() :
- *  Provides the dictID of the dictionary loaded into `ddict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict)
-{
-	if (ddict == NULL)
-		return 0;
-	return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
-}
-
-/*! ZSTD_getDictID_fromFrame() :
- *  Provides the dictID required to decompressed the frame stored within `src`.
- *  If @return == 0, the dictID could not be decoded.
- *  This could for one of the following reasons :
- *  - The frame does not require a dictionary to be decoded (most common case).
- *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
- *    Note : this use case also happens when using a non-conformant dictionary.
- *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
- *  - This is not a Zstandard frame.
- *  When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
-unsigned ZSTD_getDictID_fromFrame(const void *src, size_t srcSize)
-{
-	ZSTD_frameParams zfp = {0, 0, 0, 0};
-	size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize);
-	if (ZSTD_isError(hError))
-		return 0;
-	return zfp.dictID;
-}
-
-/*! ZSTD_decompress_usingDDict() :
-*   Decompression using a pre-digested Dictionary
-*   Use dictionary without significant overhead. */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict)
-{
-	/* pass content and size in case legacy frames are encountered */
-	return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict);
-}
-
-/*=====================================
-*   Streaming decompression
-*====================================*/
-
-typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
-
-/* *** Resource management *** */
-struct ZSTD_DStream_s {
-	ZSTD_DCtx *dctx;
-	ZSTD_DDict *ddictLocal;
-	const ZSTD_DDict *ddict;
-	ZSTD_frameParams fParams;
-	ZSTD_dStreamStage stage;
-	char *inBuff;
-	size_t inBuffSize;
-	size_t inPos;
-	size_t maxWindowSize;
-	char *outBuff;
-	size_t outBuffSize;
-	size_t outStart;
-	size_t outEnd;
-	size_t blockSize;
-	BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */
-	size_t lhSize;
-	ZSTD_customMem customMem;
-	void *legacyContext;
-	U32 previousLegacyVersion;
-	U32 legacyVersion;
-	U32 hostageByte;
-}; /* typedef'd to ZSTD_DStream within "zstd.h" */
-
-size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize)
-{
-	size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-	size_t const inBuffSize = blockSize;
-	size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-	return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
-}
-
-static ZSTD_DStream *ZSTD_createDStream_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_DStream *zds;
-
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem);
-	if (zds == NULL)
-		return NULL;
-	memset(zds, 0, sizeof(ZSTD_DStream));
-	memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem));
-	zds->dctx = ZSTD_createDCtx_advanced(customMem);
-	if (zds->dctx == NULL) {
-		ZSTD_freeDStream(zds);
-		return NULL;
-	}
-	zds->stage = zdss_init;
-	zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
-	return zds;
-}
-
-ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem);
-	if (!zds) {
-		return NULL;
-	}
-
-	zds->maxWindowSize = maxWindowSize;
-	zds->stage = zdss_loadHeader;
-	zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-	ZSTD_freeDDict(zds->ddictLocal);
-	zds->ddictLocal = NULL;
-	zds->ddict = zds->ddictLocal;
-	zds->legacyVersion = 0;
-	zds->hostageByte = 0;
-
-	{
-		size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-		size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-
-		zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem);
-		zds->inBuffSize = blockSize;
-		zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem);
-		zds->outBuffSize = neededOutSize;
-		if (zds->inBuff == NULL || zds->outBuff == NULL) {
-			ZSTD_freeDStream(zds);
-			return NULL;
-		}
-	}
-	return zds;
-}
-
-ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize)
-{
-	ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize);
-	if (zds) {
-		zds->ddict = ddict;
-	}
-	return zds;
-}
-
-size_t ZSTD_freeDStream(ZSTD_DStream *zds)
-{
-	if (zds == NULL)
-		return 0; /* support free on null */
-	{
-		ZSTD_customMem const cMem = zds->customMem;
-		ZSTD_freeDCtx(zds->dctx);
-		zds->dctx = NULL;
-		ZSTD_freeDDict(zds->ddictLocal);
-		zds->ddictLocal = NULL;
-		ZSTD_free(zds->inBuff, cMem);
-		zds->inBuff = NULL;
-		ZSTD_free(zds->outBuff, cMem);
-		zds->outBuff = NULL;
-		ZSTD_free(zds, cMem);
-		return 0;
-	}
-}
-
-/* *** Initialization *** */
-
-size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; }
-size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-
-size_t ZSTD_resetDStream(ZSTD_DStream *zds)
-{
-	zds->stage = zdss_loadHeader;
-	zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-	zds->legacyVersion = 0;
-	zds->hostageByte = 0;
-	return ZSTD_frameHeaderSize_prefix;
-}
-
-/* *****   Decompression   ***** */
-
-ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const length = MIN(dstCapacity, srcSize);
-	memcpy(dst, src, length);
-	return length;
-}
-
-size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
-{
-	const char *const istart = (const char *)(input->src) + input->pos;
-	const char *const iend = (const char *)(input->src) + input->size;
-	const char *ip = istart;
-	char *const ostart = (char *)(output->dst) + output->pos;
-	char *const oend = (char *)(output->dst) + output->size;
-	char *op = ostart;
-	U32 someMoreWork = 1;
-
-	while (someMoreWork) {
-		switch (zds->stage) {
-		case zdss_init:
-			ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
-			fallthrough;
-
-		case zdss_loadHeader: {
-			size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize);
-			if (ZSTD_isError(hSize))
-				return hSize;
-			if (hSize != 0) {				   /* need more input */
-				size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
-				if (toLoad > (size_t)(iend - ip)) {	/* not enough input to load full header */
-					memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip);
-					zds->lhSize += iend - ip;
-					input->pos = input->size;
-					return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) +
-					       ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
-				}
-				memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad);
-				zds->lhSize = hSize;
-				ip += toLoad;
-				break;
-			}
-
-			/* check for single-pass mode opportunity */
-			if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
-			    && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) {
-				size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart);
-				if (cSize <= (size_t)(iend - istart)) {
-					size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict);
-					if (ZSTD_isError(decompressedSize))
-						return decompressedSize;
-					ip = istart + cSize;
-					op += decompressedSize;
-					zds->dctx->expected = 0;
-					zds->stage = zdss_init;
-					someMoreWork = 0;
-					break;
-				}
-			}
-
-			/* Consume header */
-			ZSTD_refDDict(zds->dctx, zds->ddict);
-			{
-				size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */
-				CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
-				{
-					size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-					CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size));
-				}
-			}
-
-			zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-			if (zds->fParams.windowSize > zds->maxWindowSize)
-				return ERROR(frameParameter_windowTooLarge);
-
-			/* Buffers are preallocated, but double check */
-			{
-				size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-				size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-				if (zds->inBuffSize < blockSize) {
-					return ERROR(GENERIC);
-				}
-				if (zds->outBuffSize < neededOutSize) {
-					return ERROR(GENERIC);
-				}
-				zds->blockSize = blockSize;
-			}
-			zds->stage = zdss_read;
-		}
-			fallthrough;
-
-		case zdss_read: {
-			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-			if (neededInSize == 0) { /* end of frame */
-				zds->stage = zdss_init;
-				someMoreWork = 0;
-				break;
-			}
-			if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */
-				const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-				size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart,
-										   (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize);
-				if (ZSTD_isError(decodedSize))
-					return decodedSize;
-				ip += neededInSize;
-				if (!decodedSize && !isSkipFrame)
-					break; /* this was just a header */
-				zds->outEnd = zds->outStart + decodedSize;
-				zds->stage = zdss_flush;
-				break;
-			}
-			if (ip == iend) {
-				someMoreWork = 0;
-				break;
-			} /* no more input */
-			zds->stage = zdss_load;
-			/* pass-through */
-		}
-			fallthrough;
-
-		case zdss_load: {
-			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-			size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */
-			size_t loadedSize;
-			if (toLoad > zds->inBuffSize - zds->inPos)
-				return ERROR(corruption_detected); /* should never happen */
-			loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip);
-			ip += loadedSize;
-			zds->inPos += loadedSize;
-			if (loadedSize < toLoad) {
-				someMoreWork = 0;
-				break;
-			} /* not enough input, wait for more */
-
-			/* decode loaded input */
-			{
-				const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-				size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
-										   zds->inBuff, neededInSize);
-				if (ZSTD_isError(decodedSize))
-					return decodedSize;
-				zds->inPos = 0; /* input is consumed */
-				if (!decodedSize && !isSkipFrame) {
-					zds->stage = zdss_read;
-					break;
-				} /* this was just a header */
-				zds->outEnd = zds->outStart + decodedSize;
-				zds->stage = zdss_flush;
-				/* pass-through */
-			}
-		}
-			fallthrough;
-
-		case zdss_flush: {
-			size_t const toFlushSize = zds->outEnd - zds->outStart;
-			size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize);
-			op += flushedSize;
-			zds->outStart += flushedSize;
-			if (flushedSize == toFlushSize) { /* flush completed */
-				zds->stage = zdss_read;
-				if (zds->outStart + zds->blockSize > zds->outBuffSize)
-					zds->outStart = zds->outEnd = 0;
-				break;
-			}
-			/* cannot complete flush */
-			someMoreWork = 0;
-			break;
-		}
-		default:
-			return ERROR(GENERIC); /* impossible */
-		}
-	}
-
-	/* result */
-	input->pos += (size_t)(ip - istart);
-	output->pos += (size_t)(op - ostart);
-	{
-		size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-		if (!nextSrcSizeHint) {			    /* frame fully decoded */
-			if (zds->outEnd == zds->outStart) { /* output fully flushed */
-				if (zds->hostageByte) {
-					if (input->pos >= input->size) {
-						zds->stage = zdss_read;
-						return 1;
-					}	     /* can't release hostage (not present) */
-					input->pos++; /* release hostage */
-				}
-				return 0;
-			}
-			if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
-				input->pos--;    /* note : pos > 0, otherwise, impossible to finish reading last block */
-				zds->hostageByte = 1;
-			}
-			return 1;
-		}
-		nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */
-		if (zds->inPos > nextSrcSizeHint)
-			return ERROR(GENERIC); /* should never happen */
-		nextSrcSizeHint -= zds->inPos; /* already loaded*/
-		return nextSrcSizeHint;
-	}
-}
-
-EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDCtx);
-EXPORT_SYMBOL(ZSTD_decompressDCtx);
-EXPORT_SYMBOL(ZSTD_decompress_usingDict);
-
-EXPORT_SYMBOL(ZSTD_DDictWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDDict);
-EXPORT_SYMBOL(ZSTD_decompress_usingDDict);
-
-EXPORT_SYMBOL(ZSTD_DStreamWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDStream);
-EXPORT_SYMBOL(ZSTD_initDStream_usingDDict);
-EXPORT_SYMBOL(ZSTD_resetDStream);
-EXPORT_SYMBOL(ZSTD_decompressStream);
-EXPORT_SYMBOL(ZSTD_DStreamInSize);
-EXPORT_SYMBOL(ZSTD_DStreamOutSize);
-
-EXPORT_SYMBOL(ZSTD_findFrameCompressedSize);
-EXPORT_SYMBOL(ZSTD_getFrameContentSize);
-EXPORT_SYMBOL(ZSTD_findDecompressedSize);
-
-EXPORT_SYMBOL(ZSTD_isFrame);
-EXPORT_SYMBOL(ZSTD_getDictID_fromDict);
-EXPORT_SYMBOL(ZSTD_getDictID_fromDDict);
-EXPORT_SYMBOL(ZSTD_getDictID_fromFrame);
-
-EXPORT_SYMBOL(ZSTD_getFrameParams);
-EXPORT_SYMBOL(ZSTD_decompressBegin);
-EXPORT_SYMBOL(ZSTD_decompressBegin_usingDict);
-EXPORT_SYMBOL(ZSTD_copyDCtx);
-EXPORT_SYMBOL(ZSTD_nextSrcSizeToDecompress);
-EXPORT_SYMBOL(ZSTD_decompressContinue);
-EXPORT_SYMBOL(ZSTD_nextInputType);
-
-EXPORT_SYMBOL(ZSTD_decompressBlock);
-EXPORT_SYMBOL(ZSTD_insertBlock);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
new file mode 100644
index 0000000..5105e59
--- /dev/null
+++ b/lib/zstd/decompress/huf_decompress.c
@@ -0,0 +1,1206 @@
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include "../common/zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h"  /* BIT_* */
+#include "../common/fse.h"        /* to compress headers */
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/error_private.h"
+
+/* **************************************************************
+*  Macros
+****************************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(HUF_FORCE_DECOMPRESS_X1) && \
+    defined(HUF_FORCE_DECOMPRESS_X2)
+#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+
+
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+
+/* **************************************************************
+*  BMI2 Variant Wrappers
+****************************************************************/
+#if DYNAMIC_BMI2
+
+#define HUF_DGEN(fn)                                                        \
+                                                                            \
+    static size_t fn##_default(                                             \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        if (bmi2) {                                                         \
+            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
+        }                                                                   \
+        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
+    }
+
+#else
+
+#define HUF_DGEN(fn)                                                        \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        (void)bmi2;                                                         \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }
+
+#endif
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+    DTableDesc dtd;
+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
+
+/*
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
+ */
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+    U64 D4;
+    if (MEM_isLittleEndian()) {
+        D4 = symbol + (nbBits << 8);
+    } else {
+        D4 = (symbol << 8) + nbBits;
+    }
+    D4 *= 0x0001000100010001ULL;
+    return D4;
+}
+
+typedef struct {
+        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
+
+
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
+
+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
+
+    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Compute symbols and rankStart given rankVal:
+     *
+     * rankVal already contains the number of values of each weight.
+     *
+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+     * symbols[0] is filled (but unused) to avoid a branch.
+     *
+     * rankStart contains the offset where each rank belongs in the DTable.
+     * rankStart[0] is not filled because there are no entries in the table for
+     * weight 0.
+     */
+    {
+        int n;
+        int nextRankStart = 0;
+        int const unroll = 4;
+        int const nLimit = (int)nbSymbols - unroll + 1;
+        for (n=0; n<(int)tableLog+1; n++) {
+            U32 const curr = nextRankStart;
+            nextRankStart += wksp->rankVal[n];
+            wksp->rankStart[n] = curr;
+        }
+        for (n=0; n < nLimit; n += unroll) {
+            int u;
+            for (u=0; u < unroll; ++u) {
+                size_t const w = wksp->huffWeight[n+u];
+                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
+            }
+        }
+        for (; n < (int)nbSymbols; ++n) {
+            size_t const w = wksp->huffWeight[n];
+            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
+        }
+    }
+
+    /* fill DTable
+     * We fill all entries of each weight in order.
+     * That way length is a constant for each iteration of the outter loop.
+     * We can switch based on the length to a different inner loop which is
+     * optimized for that particular case.
+     */
+    {
+        U32 w;
+        int symbol=wksp->rankVal[0];
+        int rankStart=0;
+        for (w=1; w<tableLog+1; ++w) {
+            int const symbolCount = wksp->rankVal[w];
+            int const length = (1 << w) >> 1;
+            int uStart = rankStart;
+            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
+            int s;
+            int u;
+            switch (length) {
+            case 1:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart] = D;
+                    uStart += 1;
+                }
+                break;
+            case 2:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart+0] = D;
+                    dt[uStart+1] = D;
+                    uStart += 2;
+                }
+                break;
+            case 4:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    uStart += 4;
+                }
+                break;
+            case 8:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    MEM_write64(dt + uStart + 4, D4);
+                    uStart += 8;
+                }
+                break;
+            default:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    for (u=0; u < length; u += 16) {
+                        MEM_write64(dt + uStart + u + 0, D4);
+                        MEM_write64(dt + uStart + u + 4, D4);
+                        MEM_write64(dt + uStart + u + 8, D4);
+                        MEM_write64(dt + uStart + u + 12, D4);
+                    }
+                    assert(u == length);
+                    uStart += length;
+                }
+                break;
+            }
+            symbol += symbolCount;
+            rankStart += symbolCount * length;
+        }
+    }
+    return iSize;
+}
+
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+HINT_INLINE size_t
+HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    }
+
+    /* [0-3] symbols remaining */
+    if (MEM_32bits())
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+    BIT_DStream_t bitD;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - 3;
+        const void* const dtPtr = DTable + 1;
+        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+        U32 endSignal = 1;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+        for ( ; (endSignal) & (op4 < olimit) ; ) {
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+        }
+
+        /* check corruption */
+        /* note : should not be necessary : op# advance in lock step, and we control op4.
+         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+                                               const void *cSrc,
+                                               size_t cSrcSize,
+                                               const HUF_DTable *DTable);
+
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
+
+
+
+size_t HUF_decompress1X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+
+
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
+{
+    HUF_DEltX2 DElt;
+    U32* rankVal = wksp;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
+    /* get pre-calculated rankVal */
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+            const U32 symbol = sortedSymbols[s].symbol;
+            const U32 weight = sortedSymbols[s].weight;
+            const U32 nbBits = nbBitsBaseline - weight;
+            const U32 length = 1 << (sizeLog-nbBits);
+            const U32 start = rankVal[weight];
+            U32 i = start;
+            const U32 end = start + length;
+
+            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+            DElt.nbBits = (BYTE)(nbBits + consumed);
+            DElt.length = 2;
+            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+            rankVal[weight] += length;
+    }   }
+}
+
+
+static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
+{
+    U32* rankVal = wksp;
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
+
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol, wksp, wkspSize);
+        } else {
+            HUF_DEltX2 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 const end = start + length;
+                U32 u;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                       const void* src, size_t srcSize,
+                             void* workSpace, size_t wkspSize)
+{
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+    U32 *rankStart;
+
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
+
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
+
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
+
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 curr = nextRankStart;
+            nextRankStart += wksp->rankStats[w];
+            rankStart[w] = curr;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = wksp->weightList[s];
+            U32 const r = rankStart[w]++;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = wksp->rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 curr = nextRankVal;
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
+                rankVal0[w] = curr;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = wksp->rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUF_fillDTableX2(dt, maxTableLog,
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+    }   }
+    return 1;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+                const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BIT_DStream_t bitD;
+
+    /* Init */
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - (sizeof(size_t)-1);
+        const void* const dtPtr = DTable+1;
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal = 1;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = (U32)LIKELY((U32)
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
+
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
+
+
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+#endif
+
+/* HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    assert(dstSize > 0);
+    assert(dstSize <= 128*1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 1;
+#else
+    /* decoder timing evaluation */
+    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
+        U32 const D256 = (U32)(dstSize >> 8);
+        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
+        return DTime1 < DTime0;
+    }
+#endif
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                            cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+#endif
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#endif
+    }
+}
+
diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c
new file mode 100644
index 0000000..dbbc791
--- /dev/null
+++ b/lib/zstd/decompress/zstd_ddict.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "zstd_decompress_internal.h"
+#include "zstd_ddict.h"
+
+
+
+
+/*-*******************************************************
+*  Types
+*********************************************************/
+struct ZSTD_DDict_s {
+    void* dictBuffer;
+    const void* dictContent;
+    size_t dictSize;
+    ZSTD_entropyDTables_t entropy;
+    U32 dictID;
+    U32 entropyPresent;
+    ZSTD_customMem cMem;
+};  /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictContent;
+}
+
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictSize;
+}
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_copyDDictParameters");
+    assert(dctx != NULL);
+    assert(ddict != NULL);
+    dctx->dictID = ddict->dictID;
+    dctx->prefixStart = ddict->dictContent;
+    dctx->virtualStart = ddict->dictContent;
+    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+    dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    if (ddict->entropyPresent) {
+        dctx->litEntropy = 1;
+        dctx->fseEntropy = 1;
+        dctx->LLTptr = ddict->entropy.LLTable;
+        dctx->MLTptr = ddict->entropy.MLTable;
+        dctx->OFTptr = ddict->entropy.OFTable;
+        dctx->HUFptr = ddict->entropy.hufTable;
+        dctx->entropy.rep[0] = ddict->entropy.rep[0];
+        dctx->entropy.rep[1] = ddict->entropy.rep[1];
+        dctx->entropy.rep[2] = ddict->entropy.rep[2];
+    } else {
+        dctx->litEntropy = 0;
+        dctx->fseEntropy = 0;
+    }
+}
+
+
+static size_t
+ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
+                           ZSTD_dictContentType_e dictContentType)
+{
+    ddict->dictID = 0;
+    ddict->entropyPresent = 0;
+    if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+    if (ddict->dictSize < 8) {
+        if (dictContentType == ZSTD_dct_fullDict)
+            return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+        return 0;   /* pure content mode */
+    }
+    {   U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            if (dictContentType == ZSTD_dct_fullDict)
+                return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+            return 0;   /* pure content mode */
+        }
+    }
+    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
+            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
+        dictionary_corrupted, "");
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType)
+{
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
+        ddict->dictBuffer = NULL;
+        ddict->dictContent = dict;
+        if (!dict) dictSize = 0;
+    } else {
+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
+        ddict->dictBuffer = internalBuffer;
+        ddict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        ZSTD_memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+
+    /* parse dictionary content */
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
+        if (ddict == NULL) return NULL;
+        ddict->cMem = customMem;
+        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
+                                            dict, dictSize,
+                                            dictLoadMethod, dictContentType);
+            if (ZSTD_isError(initResult)) {
+                ZSTD_freeDDict(ddict);
+                return NULL;
+        }   }
+        return ddict;
+    }
+}
+
+/*! ZSTD_createDDict() :
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
+}
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
+}
+
+
+const ZSTD_DDict* ZSTD_initStaticDDict(
+                                void* sBuffer, size_t sBufferSize,
+                                const void* dict, size_t dictSize,
+                                ZSTD_dictLoadMethod_e dictLoadMethod,
+                                ZSTD_dictContentType_e dictContentType)
+{
+    size_t const neededSpace = sizeof(ZSTD_DDict)
+                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
+    assert(sBuffer != NULL);
+    assert(dict != NULL);
+    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
+    if (sBufferSize < neededSpace) return NULL;
+    if (dictLoadMethod == ZSTD_dlm_byCopy) {
+        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
+        dict = ddict+1;
+    }
+    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
+                                              dict, dictSize,
+                                              ZSTD_dlm_byRef, dictContentType) ))
+        return NULL;
+    return ddict;
+}
+
+
+size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_customFree(ddict->dictBuffer, cMem);
+        ZSTD_customFree(ddict, cMem);
+        return 0;
+    }
+}
+
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+}
+
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support sizeof on NULL */
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h
new file mode 100644
index 0000000..8c1a79d
--- /dev/null
+++ b/lib/zstd/decompress/zstd_ddict.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h"   /* size_t */
+#include <linux/zstd.h>     /* ZSTD_DDict, and several public functions */
+
+
+/*-*******************************************************
+ *  Interface
+ *********************************************************/
+
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+
+
+#endif /* ZSTD_DDICT_H */
diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
new file mode 100644
index 0000000..b4d81d8
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -0,0 +1,2085 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+*  LEGACY_SUPPORT :
+*  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+
+/*!
+ *  MAXWINDOWSIZE_DEFAULT :
+ *  maximum window size accepted by DStream __by default__.
+ *  Frames requiring more memory will be rejected.
+ *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+#  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+/*!
+ *  NO_FORWARD_PROGRESS_MAX :
+ *  maximum allowed nb of calls to ZSTD_decompressStream()
+ *  without any forward progress
+ *  (defined as: no byte read from input, and no byte flushed to output)
+ *  before triggering an error.
+ */
+#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
+#  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
+#include "../common/zstd_internal.h"  /* blockProperties_t */
+#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
+
+
+
+
+/* ***********************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+                                                     * Currently, that means a 0.75 load factor.
+                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+                                                     * the load factor of the ddict hash set.
+                                                     */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    const U64 hash = xxh64(&dictID, sizeof(U32), 0);
+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+    return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    while (hashSet->ddictPtrTable[idx] != NULL) {
+        /* Replace existing ddict if inserting ddict with same dictID */
+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+            hashSet->ddictPtrTable[idx] = ddict;
+            return 0;
+        }
+        idx &= idxRangeMask;
+        idx++;
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    hashSet->ddictPtrTable[idx] = ddict;
+    hashSet->ddictPtrCount++;
+    return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
+    size_t oldTableSize = hashSet->ddictPtrTableSize;
+    size_t i;
+
+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+    hashSet->ddictPtrTable = newTable;
+    hashSet->ddictPtrTableSize = newTableSize;
+    hashSet->ddictPtrCount = 0;
+    for (i = 0; i < oldTableSize; ++i) {
+        if (oldTable[i] != NULL) {
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+        }
+    }
+    ZSTD_customFree((void*)oldTable, customMem);
+    DEBUGLOG(4, "Finished re-hash");
+    return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    for (;;) {
+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+        if (currDictID == dictID || currDictID == 0) {
+            /* currDictID == 0 implies a NULL ddict entry */
+            break;
+        } else {
+            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
+            idx++;
+        }
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+    DEBUGLOG(4, "Allocating new hash set");
+    if (!ret)
+        return NULL;
+    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+    if (!ret->ddictPtrTable) {
+        ZSTD_customFree(ret, customMem);
+        return NULL;
+    }
+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+    ret->ddictPtrCount = 0;
+    return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Freeing ddict hash set");
+    if (hashSet && hashSet->ddictPtrTable) {
+        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
+    }
+    if (hashSet) {
+        ZSTD_customFree(hashSet, customMem);
+    }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+    }
+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+    return 0;
+}
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support sizeof NULL */
+    return sizeof(*dctx)
+           + ZSTD_sizeof_DDict(dctx->ddictLocal)
+           + dctx->inBuffSize + dctx->outBuffSize;
+}
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
+
+
+static size_t ZSTD_startingInputLength(ZSTD_format_e format)
+{
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
+    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
+    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
+    return startingInputLength;
+}
+
+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
+{
+    assert(dctx->streamStage == zdss_init);
+    dctx->format = ZSTD_f_zstd1;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->outBufferMode = ZSTD_bm_buffered;
+    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
+}
+
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+    dctx->staticSize  = 0;
+    dctx->ddict       = NULL;
+    dctx->ddictLocal  = NULL;
+    dctx->dictEnd     = NULL;
+    dctx->ddictIsCold = 0;
+    dctx->dictUses = ZSTD_dont_use;
+    dctx->inBuff      = NULL;
+    dctx->inBuffSize  = 0;
+    dctx->outBuffSize = 0;
+    dctx->streamStage = zdss_init;
+    dctx->legacyContext = NULL;
+    dctx->previousLegacyVersion = 0;
+    dctx->noForwardProgress = 0;
+    dctx->oversizedDuration = 0;
+    dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    dctx->ddictSet = NULL;
+    ZSTD_DCtx_resetParameters(dctx);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentEndForFuzzing = NULL;
+#endif
+}
+
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff = (char*)(dctx+1);
+    return dctx;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
+        if (!dctx) return NULL;
+        dctx->customMem = customMem;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    DEBUGLOG(3, "ZSTD_createDCtx");
+    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_clearDict(ZSTD_DCtx* dctx)
+{
+    ZSTD_freeDDict(dctx->ddictLocal);
+    dctx->ddictLocal = NULL;
+    dctx->ddict = NULL;
+    dctx->dictUses = ZSTD_dont_use;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
+    {   ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_clearDict(dctx);
+        ZSTD_customFree(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+        if (dctx->ddictSet) {
+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+            dctx->ddictSet = NULL;
+        }
+        ZSTD_customFree(dctx, cMem);
+        return 0;
+    }
+}
+
+/* no longer useful */
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
+{
+    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
+}
+
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+    if (dctx->ddict) {
+        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+        if (frameDDict) {
+            DEBUGLOG(4, "DDict found!");
+            ZSTD_clearDict(dctx);
+            dctx->dictID = dctx->fParams.dictID;
+            dctx->ddict = frameDDict;
+            dctx->dictUses = ZSTD_use_indefinitely;
+        }
+    }
+}
+
+
+/*-*************************************************************
+ *   Frame header decoding
+ ***************************************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+    if (size < ZSTD_FRAMEIDSIZE) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER) return 1;
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+    return 0;
+}
+
+/* ZSTD_frameHeaderSize_internal() :
+ *  srcSize must be large enough to reach header size fields.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
+ * @return : size of the Frame Header
+ *           or an error code, which can be tested with ZSTD_isError() */
+static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
+
+    {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
+        U32 const dictID= fhd & 3;
+        U32 const singleSegment = (fhd >> 5) & 1;
+        U32 const fcsId = fhd >> 6;
+        return minInputSize + !singleSegment
+             + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+             + (singleSegment && !fcsId);
+    }
+}
+
+/* ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+{
+    return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/* ZSTD_getFrameHeader_advanced() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+
+    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+    if (srcSize < minInputSize) return minInputSize;
+    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
+
+    if ( (format != ZSTD_f_zstd1_magicless)
+      && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
+        if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            /* skippable frame */
+            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
+                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
+            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
+            zfhPtr->frameType = ZSTD_skippableFrame;
+            return 0;
+        }
+        RETURN_ERROR(prefix_unknown, "");
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
+        if (srcSize < fhsize) return fhsize;
+        zfhPtr->headerSize = (U32)fhsize;
+    }
+
+    {   BYTE const fhdByte = ip[minInputSize-1];
+        size_t pos = minInputSize;
+        U32 const dictIDSizeCode = fhdByte&3;
+        U32 const checksumFlag = (fhdByte>>2)&1;
+        U32 const singleSegment = (fhdByte>>5)&1;
+        U32 const fcsID = fhdByte>>6;
+        U64 windowSize = 0;
+        U32 dictID = 0;
+        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
+        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+                        "reserved bits, must be zero");
+
+        if (!singleSegment) {
+            BYTE const wlByte = ip[pos++];
+            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
+            windowSize = (1ULL << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte&7);
+        }
+        switch(dictIDSizeCode)
+        {
+            default:
+                assert(0);  /* impossible */
+                ZSTD_FALLTHROUGH;
+            case 0 : break;
+            case 1 : dictID = ip[pos]; pos++; break;
+            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+        }
+        switch(fcsID)
+        {
+            default:
+                assert(0);  /* impossible */
+                ZSTD_FALLTHROUGH;
+            case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
+            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+        }
+        if (singleSegment) windowSize = frameContentSize;
+
+        zfhPtr->frameType = ZSTD_frame;
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize = windowSize;
+        zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+        zfhPtr->dictID = dictID;
+        zfhPtr->checksumFlag = checksumFlag;
+    }
+    return 0;
+}
+
+/* ZSTD_getFrameHeader() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : this function does not consume input, it only reads it.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/* ZSTD_getFrameContentSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+    {   ZSTD_frameHeader zfh;
+        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+            return ZSTD_CONTENTSIZE_ERROR;
+        if (zfh.frameType == ZSTD_skippableFrame) {
+            return 0;
+        } else {
+            return zfh.frameContentSize;
+    }   }
+}
+
+static size_t readSkippableFrameSize(void const* src, size_t srcSize)
+{
+    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
+    U32 sizeU32;
+
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
+    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                    frameParameter_unsupported, "");
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+        return skippableSize;
+    }
+}
+
+/* ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+        U32 const magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+            if (ZSTD_isError(skippableSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+            assert(skippableSize <= srcSize);
+
+            src = (const BYTE *)src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+            /* check for overflow */
+            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+            totalDstSize += ret;
+        }
+        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
+
+    return totalDstSize;
+}
+
+/* ZSTD_getDecompressedSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size if known, 0 otherwise
+             note : 0 can mean any of the following :
+                   - frame content is empty
+                   - decompressed size field is not present in frame header
+                   - frame header unknown / not supported
+                   - frame header not complete (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
+    return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
+}
+
+
+/* ZSTD_decodeFrameHeader() :
+ * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
+ * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
+{
+    size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
+    if (ZSTD_isError(result)) return result;    /* invalid header */
+    RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+
+    /* Reference DDict requested by frame if dctx references multiple ddicts */
+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
+        ZSTD_DCtx_selectFrameDDict(dctx);
+    }
+
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
+    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+                    dictionary_wrong, "");
+#endif
+    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
+    if (dctx->validateChecksum) xxh64_reset(&dctx->xxhState, 0);
+    dctx->processedCSize += headerSize;
+    return 0;
+}
+
+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    frameSizeInfo.compressedSize = ret;
+    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+
+    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
+        return frameSizeInfo;
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        size_t nbBlocks = 0;
+        ZSTD_frameHeader zfh;
+
+        /* Extract Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+            if (ZSTD_isError(ret))
+                return ZSTD_errorFrameSizeInfo(ret);
+            if (ret > 0)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+        }
+
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
+
+        /* Iterate over each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize))
+                return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+            nbBlocks++;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        /* Final frame content checksum */
+        if (zfh.checksumFlag) {
+            if (remainingSize < 4)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+            ip += 4;
+        }
+
+        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
+        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                        ? zfh.frameContentSize
+                                        : nbBlocks * zfh.blockSizeMax;
+        return frameSizeInfo;
+    }
+}
+
+/* ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
+
+/* ZSTD_decompressBound() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame or a skippeable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+{
+    unsigned long long bound = 0;
+    /* Iterate over each frame */
+    while (srcSize > 0) {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        size_t const compressedSize = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*)src + compressedSize;
+        srcSize -= compressedSize;
+        bound += decompressedBound;
+    }
+    return bound;
+}
+
+
+/*-*************************************************************
+ *   Frame decoding
+ ***************************************************************/
+
+/* ZSTD_insertBlock() :
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_copyRawBlock");
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (srcSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+                               BYTE b,
+                               size_t regenSize)
+{
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (regenSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memset(dst, b, regenSize);
+    return regenSize;
+}
+
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+{
+    (void)dctx;
+    (void)uncompressedSize;
+    (void)compressedSize;
+    (void)streaming;
+}
+
+
+/*! ZSTD_decompressFrame() :
+ * @dctx must be properly initialized
+ *  will update *srcPtr and *srcSizePtr,
+ *  to make *srcPtr progress by one frame. */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+                                   void* dst, size_t dstCapacity,
+                             const void** srcPtr, size_t *srcSizePtr)
+{
+    const BYTE* const istart = (const BYTE*)(*srcPtr);
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
+    BYTE* op = ostart;
+    size_t remainingSrcSize = *srcSizePtr;
+
+    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
+
+    /* check */
+    RETURN_ERROR_IF(
+        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+        srcSize_wrong, "");
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
+                        srcSize_wrong, "");
+        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
+        ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize;
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSrcSize -= ZSTD_blockHeaderSize;
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
+            break;
+        case bt_reserved :
+        default:
+            RETURN_ERROR(corruption_detected, "invalid block type");
+        }
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        if (dctx->validateChecksum)
+            xxh64_update(&dctx->xxhState, op, decodedSize);
+        if (decodedSize != 0)
+            op += decodedSize;
+        assert(ip != NULL);
+        ip += cBlockSize;
+        remainingSrcSize -= cBlockSize;
+        if (blockProperties.lastBlock) break;
+    }
+
+    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
+                        corruption_detected, "");
+    }
+    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
+        if (!dctx->forceIgnoreChecksum) {
+            U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
+            U32 checkRead;
+            checkRead = MEM_readLE32(ip);
+            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+        }
+        ip += 4;
+        remainingSrcSize -= 4;
+    }
+    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
+    /* Allow caller to get size read */
+    *srcPtr = ip;
+    *srcSizePtr = remainingSrcSize;
+    return (size_t)(op-ostart);
+}
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  const void* dict, size_t dictSize,
+                                  const ZSTD_DDict* ddict)
+{
+    void* const dststart = dst;
+    int moreThan1Frame = 0;
+
+    DEBUGLOG(5, "ZSTD_decompressMultiFrame");
+    assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
+
+    if (ddict) {
+        dict = ZSTD_DDict_dictContent(ddict);
+        dictSize = ZSTD_DDict_dictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
+
+
+        {   U32 const magicNumber = MEM_readLE32(src);
+            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
+                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
+            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
+                assert(skippableSize <= srcSize);
+
+                src = (const BYTE *)src + skippableSize;
+                srcSize -= skippableSize;
+                continue;
+        }   }
+
+        if (ddict) {
+            /* we were called from ZSTD_decompress_usingDDict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
+        } else {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
+        }
+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+                                                    &src, &srcSize);
+            RETURN_ERROR_IF(
+                (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+             && (moreThan1Frame==1),
+                srcSize_wrong,
+                "At least one frame successfully completed, "
+                "but following bytes are garbage: "
+                "it's more likely to be a srcSize error, "
+                "specifying more input bytes than size of frame(s). "
+                "Note: one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic bytes. "
+                "But this is _much_ less likely than a srcSize field error.");
+            if (ZSTD_isError(res)) return res;
+            assert(res <= dstCapacity);
+            if (res != 0)
+                dst = (BYTE*)dst + res;
+            dstCapacity -= res;
+        }
+        moreThan1Frame = 1;
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
+
+    return (size_t)((BYTE*)dst - (BYTE*)dststart);
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize,
+                           const void* dict, size_t dictSize)
+{
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+
+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
+{
+    switch (dctx->dictUses) {
+    default:
+        assert(0 /* Impossible */);
+        ZSTD_FALLTHROUGH;
+    case ZSTD_dont_use:
+        ZSTD_clearDict(dctx);
+        return NULL;
+    case ZSTD_use_indefinitely:
+        return dctx->ddict;
+    case ZSTD_use_once:
+        dctx->dictUses = ZSTD_dont_use;
+        return dctx->ddict;
+    }
+}
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
+    size_t regenSize;
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else   /* stack mode */
+    ZSTD_DCtx dctx;
+    ZSTD_initDCtx_internal(&dctx);
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*-**************************************
+*   Advanced Streaming Decompression API
+*   Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+/*
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
+ * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+    if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+        return dctx->expected;
+    if (dctx->bType != bt_raw)
+        return dctx->expected;
+    return MIN(MAX(inputSize, 1), dctx->expected);
+}
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+    switch(dctx->stage)
+    {
+    default:   /* should not happen */
+        assert(0);
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_getFrameHeaderSize:
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_decodeFrameHeader:
+        return ZSTDnit_frameHeader;
+    case ZSTDds_decodeBlockHeader:
+        return ZSTDnit_blockHeader;
+    case ZSTDds_decompressBlock:
+        return ZSTDnit_block;
+    case ZSTDds_decompressLastBlock:
+        return ZSTDnit_lastBlock;
+    case ZSTDds_checkChecksum:
+        return ZSTDnit_checksum;
+    case ZSTDds_decodeSkippableHeader:
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_skipFrame:
+        return ZSTDnit_skippableFrame;
+    }
+}
+
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
+
+/* ZSTD_decompressContinue() :
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
+    /* Sanity check */
+    RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+    dctx->processedCSize += srcSize;
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        assert(src != NULL);
+        if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
+            assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
+            if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
+                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+                dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
+                dctx->stage = ZSTDds_decodeSkippableHeader;
+                return 0;
+        }   }
+        dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
+        if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
+        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+        dctx->expected = dctx->headerSize - srcSize;
+        dctx->stage = ZSTDds_decodeFrameHeader;
+        return 0;
+
+    case ZSTDds_decodeFrameHeader:
+        assert(src != NULL);
+        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
+        dctx->expected = ZSTD_blockHeaderSize;
+        dctx->stage = ZSTDds_decodeBlockHeader;
+        return 0;
+
+    case ZSTDds_decodeBlockHeader:
+        {   blockProperties_t bp;
+            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
+            dctx->expected = cBlockSize;
+            dctx->bType = bp.blockType;
+            dctx->rleSize = bp.origSize;
+            if (cBlockSize) {
+                dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+                return 0;
+            }
+            /* empty block */
+            if (bp.lastBlock) {
+                if (dctx->fParams.checksumFlag) {
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0; /* end of frame */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
+                dctx->stage = ZSTDds_decodeBlockHeader;
+            }
+            return 0;
+        }
+
+    case ZSTDds_decompressLastBlock:
+    case ZSTDds_decompressBlock:
+        DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
+        {   size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_raw :
+                assert(srcSize <= dctx->expected);
+                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+                assert(rSize == srcSize);
+                dctx->expected -= rSize;
+                break;
+            case bt_rle :
+                rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_reserved :   /* should never happen */
+            default:
+                RETURN_ERROR(corruption_detected, "invalid block type");
+            }
+            FORWARD_IF_ERROR(rSize, "");
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
+            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
+            dctx->decodedSize += rSize;
+            if (dctx->validateChecksum) xxh64_update(&dctx->xxhState, dst, rSize);
+            dctx->previousDstEnd = (char*)dst + rSize;
+
+            /* Stay on the same stage until we are finished streaming the block. */
+            if (dctx->expected > 0) {
+                return rSize;
+            }
+
+            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
+                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
+                RETURN_ERROR_IF(
+                    dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                 && dctx->decodedSize != dctx->fParams.frameContentSize,
+                    corruption_detected, "");
+                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+                    dctx->expected = 0;   /* ends here */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->stage = ZSTDds_decodeBlockHeader;
+                dctx->expected = ZSTD_blockHeaderSize;
+            }
+            return rSize;
+        }
+
+    case ZSTDds_checkChecksum:
+        assert(srcSize == 4);  /* guaranteed by dctx->expected */
+        {
+            if (dctx->validateChecksum) {
+                U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
+                U32 const check32 = MEM_readLE32(src);
+                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
+                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+            }
+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+            dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+
+    case ZSTDds_decodeSkippableHeader:
+        assert(src != NULL);
+        assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
+        dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
+        dctx->stage = ZSTDds_skipFrame;
+        return 0;
+
+    case ZSTDds_skipFrame:
+        dctx->expected = 0;
+        dctx->stage = ZSTDds_getFrameHeaderSize;
+        return 0;
+
+    default:
+        assert(0);   /* impossible */
+        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }
+}
+
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+    dctx->prefixStart = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    return 0;
+}
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of entropy tables read */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                  const void* const dict, size_t const dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
+    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
+    dictPtr += 8;   /* skip header = magic + dictID */
+
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
+    ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
+    {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
+        size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
+#ifdef HUF_FORCE_DECOMPRESS_X1
+        /* in minimal huffman, we always use X1 variants */
+        size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
+                                                dictPtr, dictEnd - dictPtr,
+                                                workspace, workspaceSize);
+#else
+        size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
+                                                dictPtr, (size_t)(dictEnd - dictPtr),
+                                                workspace, workspaceSize);
+#endif
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+        dictPtr += hSize;
+    }
+
+    {   short offcodeNCount[MaxOff+1];
+        unsigned offcodeMaxValue = MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->OFTable,
+                            offcodeNCount, offcodeMaxValue,
+                            OF_base, OF_bits,
+                            offcodeLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */0);
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->MLTable,
+                            matchlengthNCount, matchlengthMaxValue,
+                            ML_base, ML_bits,
+                            matchlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->LLTable,
+                            litlengthNCount, litlengthMaxValue,
+                            LL_base, LL_bits,
+                            litlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    {   int i;
+        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+        for (i=0; i<3; i++) {
+            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+                            dictionary_corrupted, "");
+            entropy->rep[i] = rep;
+    }   }
+
+    return (size_t)(dictPtr - (const BYTE*)dict);
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
+    {   U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
+    }   }
+    dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
+        dict = (const char*)dict + eSize;
+        dictSize -= eSize;
+    }
+    dctx->litEntropy = dctx->fseEntropy = 1;
+
+    /* reference dictionary content */
+    return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+{
+    assert(dctx != NULL);
+    dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->processedCSize = 0;
+    dctx->decodedSize = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->prefixStart = NULL;
+    dctx->virtualStart = NULL;
+    dctx->dictEnd = NULL;
+    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID = 0;
+    dctx->bType = bt_reserved;
+    ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
+    return 0;
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (dict && dictSize)
+        RETURN_ERROR_IF(
+            ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+            dictionary_corrupted, "");
+    return 0;
+}
+
+
+/* ======   ZSTD_DDict   ====== */
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
+    assert(dctx != NULL);
+    if (ddict) {
+        const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
+        size_t const dictSize = ZSTD_DDict_dictSize(ddict);
+        const void* const dictEnd = dictStart + dictSize;
+        dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
+        DEBUGLOG(4, "DDict is %s",
+                    dctx->ddictIsCold ? "~cold~" : "hot!");
+    }
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (ddict) {   /* NULL ddict is equivalent to no dictionary */
+        ZSTD_copyDDictParameters(dctx, ddict);
+    }
+    return 0;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompress frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
+    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError)) return 0;
+    return zfp.dictID;
+}
+
+
+/*! ZSTD_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_DDict* ddict)
+{
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+                                     NULL, 0,
+                                     ddict);
+}
+
+
+/*=====================================
+*   Streaming decompression
+*====================================*/
+
+ZSTD_DStream* ZSTD_createDStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createDStream");
+    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
+}
+
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream* zds)
+{
+    return ZSTD_freeDCtx(zds);
+}
+
+
+/* ***  Initialization  *** */
+
+size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
+                                   const void* dict, size_t dictSize,
+                                         ZSTD_dictLoadMethod_e dictLoadMethod,
+                                         ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (dict && dictSize != 0) {
+        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
+        dctx->ddict = dctx->ddictLocal;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
+    return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
+    dctx->dictUses = ZSTD_use_once;
+    return 0;
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
+    return ZSTD_startingInputLength(zds->format);
+}
+
+/* note : this variant can't fail */
+size_t ZSTD_initDStream(ZSTD_DStream* zds)
+{
+    DEBUGLOG(4, "ZSTD_initDStream");
+    return ZSTD_initDStream_usingDDict(zds, NULL);
+}
+
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+{
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (ddict) {
+        dctx->ddict = ddict;
+        dctx->dictUses = ZSTD_use_indefinitely;
+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+            if (dctx->ddictSet == NULL) {
+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+                if (!dctx->ddictSet) {
+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+                }
+            }
+            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
+        }
+    }
+    return 0;
+}
+
+/* ZSTD_DCtx_setMaxWindowSize() :
+ * note : no direct equivalence in ZSTD_DCtx_setParameter,
+ * since this version sets windowSize, and the other sets windowLog */
+size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
+    size_t const min = (size_t)1 << bounds.lowerBound;
+    size_t const max = (size_t)1 << bounds.upperBound;
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
+    dctx->maxWindowSize = maxWindowSize;
+    return 0;
+}
+
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
+{
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
+}
+
+ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+            return bounds;
+        case ZSTD_d_format:
+            bounds.lowerBound = (int)ZSTD_f_zstd1;
+            bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
+            ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+            return bounds;
+        case ZSTD_d_stableOutBuffer:
+            bounds.lowerBound = (int)ZSTD_bm_buffered;
+            bounds.upperBound = (int)ZSTD_bm_stable;
+            return bounds;
+        case ZSTD_d_forceIgnoreChecksum:
+            bounds.lowerBound = (int)ZSTD_d_validateChecksum;
+            bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
+            return bounds;
+        case ZSTD_d_refMultipleDDicts:
+            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+            return bounds;
+        default:;
+    }
+    bounds.error = ERROR(parameter_unsupported);
+    return bounds;
+}
+
+/* ZSTD_dParam_withinBounds:
+ * @return 1 if value is within dParam bounds,
+ * 0 otherwise */
+static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+#define CHECK_DBOUNDS(p,v) {                \
+    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
+}
+
+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
+{
+    switch (param) {
+        case ZSTD_d_windowLogMax:
+            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
+            return 0;
+        case ZSTD_d_format:
+            *value = (int)dctx->format;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            *value = (int)dctx->outBufferMode;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            *value = (int)dctx->forceIgnoreChecksum;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            *value = (int)dctx->refMultipleDDicts;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
+            CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
+            dctx->maxWindowSize = ((size_t)1) << value;
+            return 0;
+        case ZSTD_d_format:
+            CHECK_DBOUNDS(ZSTD_d_format, value);
+            dctx->format = (ZSTD_format_e)value;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
+            dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+            if (dctx->staticSize != 0) {
+                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+            }
+            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        dctx->streamStage = zdss_init;
+        dctx->noForwardProgress = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+        ZSTD_clearDict(dctx);
+        ZSTD_DCtx_resetParameters(dctx);
+    }
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
+{
+    return ZSTD_sizeof_DCtx(dctx);
+}
+
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+    size_t const minRBSize = (size_t) neededSize;
+    RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+                    frameParameter_windowTooLarge, "");
+    return minRBSize;
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize = blockSize;  /* no block can be larger */
+    size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
+    ZSTD_frameHeader zfh;
+    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
+    if (ZSTD_isError(err)) return err;
+    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
+    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
+                    frameParameter_windowTooLarge, "");
+    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
+}
+
+
+/* *****   Decompression   ***** */
+
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+        zds->oversizedDuration++;
+    else
+        zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+    /* No requirement when ZSTD_obm_stable is not enabled. */
+    if (zds->outBufferMode != ZSTD_bm_stable)
+        return 0;
+    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+     * the context is reset.
+     */
+    if (zds->streamStage == zdss_init)
+        return 0;
+    /* The buffer must match our expectation exactly. */
+    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+        return 0;
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
+}
+
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+            ZSTD_DStream* zds, char** op, char* oend,
+            void const* src, size_t srcSize) {
+    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+    if (zds->outBufferMode == ZSTD_bm_buffered) {
+        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+        size_t const decodedSize = ZSTD_decompressContinue(zds,
+                zds->outBuff + zds->outStart, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        if (!decodedSize && !isSkipFrame) {
+            zds->streamStage = zdss_read;
+        } else {
+            zds->outEnd = zds->outStart + decodedSize;
+            zds->streamStage = zdss_flush;
+        }
+    } else {
+        /* Write directly into the output buffer */
+        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
+        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        *op += decodedSize;
+        /* Flushing is not needed. */
+        zds->streamStage = zdss_read;
+        assert(*op <= oend);
+        assert(zds->outBufferMode == ZSTD_bm_stable);
+    }
+    return 0;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    const char* const src = (const char*)input->src;
+    const char* const istart = input->pos != 0 ? src + input->pos : src;
+    const char* const iend = input->size != 0 ? src + input->size : src;
+    const char* ip = istart;
+    char* const dst = (char*)output->dst;
+    char* const ostart = output->pos != 0 ? dst + output->pos : dst;
+    char* const oend = output->size != 0 ? dst + output->size : dst;
+    char* op = ostart;
+    U32 someMoreWork = 1;
+
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    RETURN_ERROR_IF(
+        input->pos > input->size,
+        srcSize_wrong,
+        "forbidden. in: pos: %u   vs size: %u",
+        (U32)input->pos, (U32)input->size);
+    RETURN_ERROR_IF(
+        output->pos > output->size,
+        dstSize_tooSmall,
+        "forbidden. out: pos: %u   vs size: %u",
+        (U32)output->pos, (U32)output->size);
+    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
+
+    while (someMoreWork) {
+        switch(zds->streamStage)
+        {
+        case zdss_init :
+            DEBUGLOG(5, "stage zdss_init => transparent reset ");
+            zds->streamStage = zdss_loadHeader;
+            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+            zds->legacyVersion = 0;
+            zds->hostageByte = 0;
+            zds->expectedOutBuffer = *output;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_loadHeader :
+            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+            {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+                if (zds->refMultipleDDicts && zds->ddictSet) {
+                    ZSTD_DCtx_selectFrameDDict(zds);
+                }
+                DEBUGLOG(5, "header size : %u", (U32)hSize);
+                if (ZSTD_isError(hSize)) {
+                    return hSize;   /* error */
+                }
+                if (hSize != 0) {   /* need more input */
+                    size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
+                    size_t const remainingInput = (size_t)(iend-ip);
+                    assert(iend >= ip);
+                    if (toLoad > remainingInput) {   /* not enough input to load full header */
+                        if (remainingInput > 0) {
+                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+                            zds->lhSize += remainingInput;
+                        }
+                        input->pos = input->size;
+                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                    }
+                    assert(ip != NULL);
+                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
+                    break;
+            }   }
+
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
+                if (cSize <= (size_t)(iend-istart)) {
+                    /* shortcut : using single-pass mode */
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
+                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    ip = istart + cSize;
+                    op += decompressedSize;
+                    zds->expected = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+            }   }
+
+            /* Check output buffer is large enough for ZSTD_odm_stable. */
+            if (zds->outBufferMode == ZSTD_bm_stable
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+                RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+            }
+
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+
+            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+                zds->stage = ZSTDds_skipFrame;
+            } else {
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
+                        (U32)(zds->fParams.windowSize >>10),
+                        (U32)(zds->maxWindowSize >> 10) );
+            zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                            frameParameter_windowTooLarge, "");
+
+            /* Adapt buffer sizes to frame header instructions */
+            {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
+                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+                        : 0;
+
+                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+                {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+                    if (tooSmall || tooLarge) {
+                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+                        DEBUGLOG(4, "inBuff  : from %u to %u",
+                                    (U32)zds->inBuffSize, (U32)neededInBuffSize);
+                        DEBUGLOG(4, "outBuff : from %u to %u",
+                                    (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+                        if (zds->staticSize) {  /* static DCtx */
+                            DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                            assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                            RETURN_ERROR_IF(
+                                bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                                memory_allocation, "");
+                        } else {
+                            ZSTD_customFree(zds->inBuff, zds->customMem);
+                            zds->inBuffSize = 0;
+                            zds->outBuffSize = 0;
+                            zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
+                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+                        }
+                        zds->inBuffSize = neededInBuffSize;
+                        zds->outBuff = zds->inBuff + zds->inBuffSize;
+                        zds->outBuffSize = neededOutBuffSize;
+            }   }   }
+            zds->streamStage = zdss_read;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_read:
+            DEBUGLOG(5, "stage zdss_read");
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
+                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
+                if (neededInSize==0) {  /* end of frame */
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                    FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+                    ip += neededInSize;
+                    /* Function modifies the stage so we must break */
+                    break;
+            }   }
+            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
+            zds->streamStage = zdss_load;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_load:
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+                size_t const toLoad = neededInSize - zds->inPos;
+                int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                size_t loadedSize;
+                /* At this point we shouldn't be decompressing a block that we can stream. */
+                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
+                if (isSkipFrame) {
+                    loadedSize = MIN(toLoad, (size_t)(iend-ip));
+                } else {
+                    RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
+                                    corruption_detected,
+                                    "should never happen");
+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
+                }
+                ip += loadedSize;
+                zds->inPos += loadedSize;
+                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
+
+                /* decode loaded input */
+                zds->inPos = 0;   /* input is consumed */
+                FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+                /* Function modifies the stage so we must break */
+                break;
+            }
+        case zdss_flush:
+            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
+                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
+                op += flushedSize;
+                zds->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {  /* flush completed */
+                    zds->streamStage = zdss_read;
+                    if ( (zds->outBuffSize < zds->fParams.frameContentSize)
+                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
+                        DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+                                (int)(zds->outBuffSize - zds->outStart),
+                                (U32)zds->fParams.blockSizeMax);
+                        zds->outStart = zds->outEnd = 0;
+                    }
+                    break;
+            }   }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
+        default:
+            assert(0);    /* impossible */
+            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }   }
+
+    /* result */
+    input->pos = (size_t)(ip - (const char*)(input->src));
+    output->pos = (size_t)(op - (char*)(output->dst));
+
+    /* Update the expected output buffer for ZSTD_obm_stable. */
+    zds->expectedOutBuffer = *output;
+
+    if ((ip==istart) && (op==ostart)) {  /* no forward progress */
+        zds->noForwardProgress ++;
+        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
+            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
+            assert(0);
+        }
+    } else {
+        zds->noForwardProgress = 0;
+    }
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
+        if (!nextSrcSizeHint) {   /* frame fully decoded */
+            if (zds->outEnd == zds->outStart) {  /* output fully flushed */
+                if (zds->hostageByte) {
+                    if (input->pos >= input->size) {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
+                    input->pos++;  /* release hostage */
+                }   /* zds->hostageByte */
+                return 0;
+            }  /* zds->outEnd == zds->outStart */
+            if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+                input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
+                zds->hostageByte=1;
+            }
+            return 1;
+        }  /* nextSrcSizeHint==0 */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
+        assert(zds->inPos <= nextSrcSizeHint);
+        nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c
new file mode 100644
index 0000000..2d101d9
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_block.c
@@ -0,0 +1,1540 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_decompress_block :
+ * this module takes care of decompressing _compressed_ block */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/compiler.h"    /* prefetch */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/zstd_internal.h"
+#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"
+
+/*_*******************************************************
+*  Macros
+**********************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * ZSTD_decompressSequences implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
+#endif
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+ *   Block decoding
+ ***************************************************************/
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr)
+{
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
+
+    {   U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize = cBlockHeader >> 3;
+        bpPtr->lastBlock = cBlockHeader & 1;
+        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+        bpPtr->origSize = cSize;   /* only useful for RLE */
+        if (bpPtr->blockType == bt_rle) return 1;
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
+        return cSize;
+    }
+}
+
+
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize);
+/*! ZSTD_decodeLiteralsBlock() :
+ * @return : nb of bytes read from src (< srcSize )
+ *  note : symbol not declared but exposed for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
+
+    {   const BYTE* const istart = (const BYTE*) src;
+        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+
+        switch(litEncType)
+        {
+        case set_repeat:
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
+            ZSTD_FALLTHROUGH;
+
+        case set_compressed:
+            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
+            {   size_t lhSize, litSize, litCSize;
+                U32 singleStream=0;
+                U32 const lhlCode = (istart[0] >> 2) & 3;
+                U32 const lhc = MEM_readLE32(istart);
+                size_t hufSuccess;
+                switch(lhlCode)
+                {
+                case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    /* 2 - 2 - 10 - 10 */
+                    singleStream = !lhlCode;
+                    lhSize = 3;
+                    litSize  = (lhc >> 4) & 0x3FF;
+                    litCSize = (lhc >> 14) & 0x3FF;
+                    break;
+                case 2:
+                    /* 2 - 2 - 14 - 14 */
+                    lhSize = 4;
+                    litSize  = (lhc >> 4) & 0x3FFF;
+                    litCSize = lhc >> 18;
+                    break;
+                case 3:
+                    /* 2 - 2 - 18 - 18 */
+                    lhSize = 5;
+                    litSize  = (lhc >> 4) & 0x3FFFF;
+                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+
+                /* prefetch huffman table if cold */
+                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
+                    PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+                }
+
+                if (litEncType==set_repeat) {
+                    if (singleStream) {
+                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    } else {
+                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    }
+                } else {
+                    if (singleStream) {
+#if defined(HUF_FORCE_DECOMPRESS_X2)
+                        hufSuccess = HUF_decompress1X_DCtx_wksp(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace));
+#else
+                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+#endif
+                    } else {
+                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+                    }
+                }
+
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                dctx->litEntropy = 1;
+                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+                ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return litCSize + lhSize;
+            }
+
+        case set_basic:
+            {   size_t litSize, lhSize;
+                U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    break;
+                }
+
+                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
+                    ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    dctx->litPtr = dctx->litBuffer;
+                    dctx->litSize = litSize;
+                    ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                    return lhSize+litSize;
+                }
+                /* direct reference into compressed stream */
+                dctx->litPtr = istart+lhSize;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+
+        case set_rle:
+            {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                size_t litSize, lhSize;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                return lhSize+1;
+            }
+        default:
+            RETURN_ERROR(corruption_detected, "impossible");
+        }
+    }
+}
+
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
+
+/* Default FSE distribution table for Literal Lengths */
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+     {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+     /* nextState, nbAddBits, nbBits, baseVal */
+     {  0,  0,  4,    0},  { 16,  0,  4,    0},
+     { 32,  0,  5,    1},  {  0,  0,  5,    3},
+     {  0,  0,  5,    4},  {  0,  0,  5,    6},
+     {  0,  0,  5,    7},  {  0,  0,  5,    9},
+     {  0,  0,  5,   10},  {  0,  0,  5,   12},
+     {  0,  0,  6,   14},  {  0,  1,  5,   16},
+     {  0,  1,  5,   20},  {  0,  1,  5,   22},
+     {  0,  2,  5,   28},  {  0,  3,  5,   32},
+     {  0,  4,  5,   48},  { 32,  6,  5,   64},
+     {  0,  7,  5,  128},  {  0,  8,  6,  256},
+     {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
+     { 32,  0,  4,    0},  {  0,  0,  4,    1},
+     {  0,  0,  5,    2},  { 32,  0,  5,    4},
+     {  0,  0,  5,    5},  { 32,  0,  5,    7},
+     {  0,  0,  5,    8},  { 32,  0,  5,   10},
+     {  0,  0,  5,   11},  {  0,  0,  6,   13},
+     { 32,  1,  5,   16},  {  0,  1,  5,   18},
+     { 32,  1,  5,   22},  {  0,  2,  5,   24},
+     { 32,  3,  5,   32},  {  0,  3,  5,   40},
+     {  0,  6,  4,   64},  { 16,  6,  4,   64},
+     { 32,  7,  5,  128},  {  0,  9,  6,  512},
+     {  0, 11,  6, 2048},  { 48,  0,  4,    0},
+     { 16,  0,  4,    1},  { 32,  0,  5,    2},
+     { 32,  0,  5,    3},  { 32,  0,  5,    5},
+     { 32,  0,  5,    6},  { 32,  0,  5,    8},
+     { 32,  0,  5,    9},  { 32,  0,  5,   11},
+     { 32,  0,  5,   12},  {  0,  0,  6,   15},
+     { 32,  1,  5,   18},  { 32,  1,  5,   20},
+     { 32,  2,  5,   24},  { 32,  2,  5,   28},
+     { 32,  3,  5,   40},  { 32,  4,  5,   48},
+     {  0, 16,  6,65536},  {  0, 15,  6,32768},
+     {  0, 14,  6,16384},  {  0, 13,  6, 8192},
+};   /* LL_defaultDTable */
+
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  5,    0},     {  0,  6,  4,   61},
+    {  0,  9,  5,  509},     {  0, 15,  5,32765},
+    {  0, 21,  5,2097149},   {  0,  3,  5,    5},
+    {  0,  7,  4,  125},     {  0, 12,  5, 4093},
+    {  0, 18,  5,262141},    {  0, 23,  5,8388605},
+    {  0,  5,  5,   29},     {  0,  8,  4,  253},
+    {  0, 14,  5,16381},     {  0, 20,  5,1048573},
+    {  0,  2,  5,    1},     { 16,  7,  4,  125},
+    {  0, 11,  5, 2045},     {  0, 17,  5,131069},
+    {  0, 22,  5,4194301},   {  0,  4,  5,   13},
+    { 16,  8,  4,  253},     {  0, 13,  5, 8189},
+    {  0, 19,  5,524285},    {  0,  1,  5,    1},
+    { 16,  6,  4,   61},     {  0, 10,  5, 1021},
+    {  0, 16,  5,65533},     {  0, 28,  5,268435453},
+    {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
+    {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
+};   /* OF_defaultDTable */
+
+
+/* Default FSE distribution table for Match Lengths */
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  6,    3},  {  0,  0,  4,    4},
+    { 32,  0,  5,    5},  {  0,  0,  5,    6},
+    {  0,  0,  5,    8},  {  0,  0,  5,    9},
+    {  0,  0,  5,   11},  {  0,  0,  6,   13},
+    {  0,  0,  6,   16},  {  0,  0,  6,   19},
+    {  0,  0,  6,   22},  {  0,  0,  6,   25},
+    {  0,  0,  6,   28},  {  0,  0,  6,   31},
+    {  0,  0,  6,   34},  {  0,  1,  6,   37},
+    {  0,  1,  6,   41},  {  0,  2,  6,   47},
+    {  0,  3,  6,   59},  {  0,  4,  6,   83},
+    {  0,  7,  6,  131},  {  0,  9,  6,  515},
+    { 16,  0,  4,    4},  {  0,  0,  4,    5},
+    { 32,  0,  5,    6},  {  0,  0,  5,    7},
+    { 32,  0,  5,    9},  {  0,  0,  5,   10},
+    {  0,  0,  6,   12},  {  0,  0,  6,   15},
+    {  0,  0,  6,   18},  {  0,  0,  6,   21},
+    {  0,  0,  6,   24},  {  0,  0,  6,   27},
+    {  0,  0,  6,   30},  {  0,  0,  6,   33},
+    {  0,  1,  6,   35},  {  0,  1,  6,   39},
+    {  0,  2,  6,   43},  {  0,  3,  6,   51},
+    {  0,  4,  6,   67},  {  0,  5,  6,   99},
+    {  0,  8,  6,  259},  { 32,  0,  4,    4},
+    { 48,  0,  4,    4},  { 16,  0,  4,    5},
+    { 32,  0,  5,    7},  { 32,  0,  5,    8},
+    { 32,  0,  5,   10},  { 32,  0,  5,   11},
+    {  0,  0,  6,   14},  {  0,  0,  6,   17},
+    {  0,  0,  6,   20},  {  0,  0,  6,   23},
+    {  0,  0,  6,   26},  {  0,  0,  6,   29},
+    {  0,  0,  6,   32},  {  0, 16,  6,65539},
+    {  0, 15,  6,32771},  {  0, 14,  6,16387},
+    {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
+    {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
+};   /* ML_defaultDTable */
+
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+{
+    void* ptr = dt;
+    ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+    ZSTD_seqSymbol* const cell = dt + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->nbBits = 0;
+    cell->nextState = 0;
+    assert(nbAddBits < 255);
+    cell->nbAdditionalBits = (BYTE)nbAddBits;
+    cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * cannot fail if input is valid =>
+ * all inputs are presumed validated at this stage */
+FORCE_INLINE_TEMPLATE
+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_seqSymbol* const tableDecode = dt+1;
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+
+    U16* symbolNext = (U16*)wksp;
+    BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
+    U32 highThreshold = tableSize - 1;
+
+
+    /* Sanity Checks */
+    assert(maxSymbolValue <= MaxSeq);
+    assert(tableLog <= MaxFSELog);
+    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
+    (void)wkspSize;
+    /* Init, lay down lowprob symbols */
+    {   ZSTD_seqSymbol_header DTableH;
+        DTableH.tableLog = tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].baseValue = s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    assert(normalizedCounter[s]>=0);
+                    symbolNext[s] = (U16)normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    assert(tableSize <= 512);
+    /* Specialized symbol spreading for the case when there are
+     * no low probability (-1 count) symbols. When compressing
+     * small blocks we avoid low probability symbols to hit this
+     * case, since header decoding speed matters more.
+     */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].baseValue = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            int const n = normalizedCounter[s];
+            for (i=0; i<n; i++) {
+                tableDecode[position].baseValue = s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {
+        U32 u;
+        for (u=0; u<tableSize; u++) {
+            U32 const symbol = tableDecode[u].baseValue;
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+            assert(nbAdditionalBits[symbol] < 255);
+            tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+            tableDecode[u].baseValue = baseValue[symbol];
+        }
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+#endif
+
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
+                baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+        return;
+    }
+#endif
+    (void)bmi2;
+    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+
+/*! ZSTD_buildSeqTable() :
+ * @return : nb bytes read from src,
+ *           or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
+                                 symbolEncodingType_e type, unsigned max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const U32* baseValue, const U32* nbAdditionalBits,
+                                 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
+                                 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
+                                 int bmi2)
+{
+    switch(type)
+    {
+    case set_rle :
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
+        {   U32 const symbol = *(const BYTE*)src;
+            U32 const baseline = baseValue[symbol];
+            U32 const nbBits = nbAdditionalBits[symbol];
+            ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+        }
+        *DTablePtr = DTableSpace;
+        return 1;
+    case set_basic :
+        *DTablePtr = defaultTable;
+        return 0;
+    case set_repeat:
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
+        /* prefetch FSE table if used */
+        if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
+            const void* const pStart = *DTablePtr;
+            size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
+            PREFETCH_AREA(pStart, pSize);
+        }
+        return 0;
+    case set_compressed :
+        {   unsigned tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
+            *DTablePtr = DTableSpace;
+            return headerSize;
+        }
+    default :
+        assert(0);
+        RETURN_ERROR(GENERIC, "impossible");
+    }
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+    int nbSeq;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
+
+    /* check */
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
+
+    /* SeqHead */
+    nbSeq = *ip++;
+    if (!nbSeq) {
+        *nbSeqPtr=0;
+        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
+        return 1;
+    }
+    if (nbSeq > 0x7F) {
+        if (nbSeq == 0xFF) {
+            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
+            ip+=2;
+        } else {
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
+            nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+        }
+    }
+    *nbSeqPtr = nbSeq;
+
+    /* FSE table descriptors */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+        ip++;
+
+        /* Build DTables */
+        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
+                                                      LLtype, MaxLL, LLFSELog,
+                                                      ip, iend-ip,
+                                                      LL_base, LL_bits,
+                                                      LL_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += llhSize;
+        }
+
+        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
+                                                      OFtype, MaxOff, OffFSELog,
+                                                      ip, iend-ip,
+                                                      OF_base, OF_bits,
+                                                      OF_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += ofhSize;
+        }
+
+        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
+                                                      MLtype, MaxML, MLFSELog,
+                                                      ip, iend-ip,
+                                                      ML_base, ML_bits,
+                                                      ML_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += mlhSize;
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+    const BYTE* match;
+} seq_t;
+
+typedef struct {
+    size_t state;
+    const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    ZSTD_fseState stateLL;
+    ZSTD_fseState stateOffb;
+    ZSTD_fseState stateML;
+    size_t prevOffset[ZSTD_REP_NUM];
+    const BYTE* prefixStart;
+    const BYTE* dictEnd;
+    size_t pos;
+} seqState_t;
+
+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[offset];
+        (*op)[0] = (*ip)[0];
+        (*op)[1] = (*ip)[1];
+        (*op)[2] = (*ip)[2];
+        (*op)[3] = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op+4, *ip);
+        *ip -= sub2;
+    } else {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8) {
+        /* Handle short lengths. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst) {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w) {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w) {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op = oend_w;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEnd(BYTE* op,
+                            BYTE* const oend, seq_t sequence,
+                            const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+        match = dictEnd - (prefixStart-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+                         BYTE* const oend, seq_t sequence,
+                         const BYTE** litPtr, const BYTE* const litLimit,
+                         const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(
+            iLitEnd > litLimit ||
+            oMatchEnd > oend_w ||
+            (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16)) {
+        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+    }
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+    const void* ptr = dt;
+    const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+                (U32)DStatePtr->state, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
+{
+    ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
+{
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offsets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
+    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
+        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
+        : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+{
+    seq_t seq;
+    ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
+    ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
+    ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
+    U32 const llBase = llDInfo.baseValue;
+    U32 const mlBase = mlDInfo.baseValue;
+    U32 const ofBase = ofDInfo.baseValue;
+    BYTE const llBits = llDInfo.nbAdditionalBits;
+    BYTE const mlBits = mlDInfo.nbAdditionalBits;
+    BYTE const ofBits = ofDInfo.nbAdditionalBits;
+    BYTE const totalBits = llBits+mlBits+ofBits;
+
+    /* sequence */
+    {   size_t offset;
+        if (ofBits > 1) {
+            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+            assert(ofBits <= MaxOff);
+            if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+                U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+                offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                BIT_reloadDStream(&seqState->DStream);
+                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
+            } else {
+                offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+            }
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        } else {
+            U32 const ll0 = (llBase == 0);
+            if (LIKELY((ofBits == 0))) {
+                if (LIKELY(!ll0))
+                    offset = seqState->prevOffset[0];
+                else {
+                    offset = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset;
+                }
+            } else {
+                offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                    temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                    if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset = temp;
+        }   }   }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = mlBase;
+    if (mlBits > 0)
+        seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+        BIT_reloadDStream(&seqState->DStream);
+    if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+        BIT_reloadDStream(&seqState->DStream);
+    /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+    seq.litLength = llBase;
+    if (llBits > 0)
+        seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
+    if (MEM_32bits())
+        BIT_reloadDStream(&seqState->DStream);
+
+    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+    if (prefetch == ZSTD_p_prefetch) {
+        size_t const pos = seqState->pos + seq.litLength;
+        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
+        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
+        seqState->pos = pos + seq.matchLength;
+    }
+
+    /* ANS state update
+     * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
+     * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
+     * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
+     * better option, so it is the default for other compilers. But, if you
+     * measure that it is worse, please put up a pull request.
+     */
+    {
+#if !defined(__clang__)
+        const int kUseUpdateFseState = 1;
+#else
+        const int kUseUpdateFseState = 0;
+#endif
+        if (kUseUpdateFseState) {
+            ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+            ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+        } else {
+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
+        }
+    }
+
+    return seq;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+    size_t const windowSize = dctx->fParams.windowSize;
+    /* No dictionary used. */
+    if (dctx->dictContentEndForFuzzing == NULL) return 0;
+    /* Dictionary is our prefix. */
+    if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+    /* Dictionary is not our ext-dict. */
+    if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+    /* Dictionary is not within our window size. */
+    if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+    /* Dictionary is active. */
+    return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+        ZSTD_DCtx const* dctx,
+        BYTE const* op, BYTE const* oend,
+        seq_t const seq,
+        BYTE const* prefixStart, BYTE const* virtualStart)
+{
+#if DEBUGLEVEL >= 1
+    size_t const windowSize = dctx->fParams.windowSize;
+    size_t const sequenceSize = seq.litLength + seq.matchLength;
+    BYTE const* const oLitEnd = op + seq.litLength;
+    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+    assert(op <= oend);
+    assert((size_t)(oend - op) >= sequenceSize);
+    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+        /* Offset must be within the dictionary. */
+        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+        assert(seq.offset <= windowSize + dictSize);
+    } else {
+        /* Offset must be within our window. */
+        assert(seq.offset <= windowSize);
+    }
+#else
+    (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
+#endif
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        size_t error = 0;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__x86_64__)
+        /* Align the decompression loop to 32 + 16 bytes.
+         *
+         * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+         * speed swings based on the alignment of the decompression loop. This
+         * performance swing is caused by parts of the decompression loop falling
+         * out of the DSB. The entire decompression loop should fit in the DSB,
+         * when it can't we get much worse performance. You can measure if you've
+         * hit the good case or the bad case with this perf command for some
+         * compressed file test.zst:
+         *
+         *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+         *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+         *
+         * If you see most cycles served out of the MITE you've hit the bad case.
+         * If you see most cycles served out of the DSB you've hit the good case.
+         * If it is pretty even then you may be in an okay case.
+         *
+         * I've been able to reproduce this issue on the following CPUs:
+         *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+         *               Use Instruments->Counters to get DSB/MITE cycles.
+         *               I never got performance swings, but I was able to
+         *               go from the good case of mostly DSB to half of the
+         *               cycles served from MITE.
+         *   - Coffeelake: Intel i9-9900k
+         *
+         * I haven't been able to reproduce the instability or DSB misses on any
+         * of the following CPUS:
+         *   - Haswell
+         *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+         *   - Skylake
+         *
+         * If you are seeing performance stability this script can help test.
+         * It tests on 4 commits in zstd where I saw performance change.
+         *
+         *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+         */
+        __asm__(".p2align 5");
+        __asm__("nop");
+        __asm__(".p2align 4");
+#endif
+        for ( ; ; ) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+            BIT_reloadDStream(&(seqState.DStream));
+            op += oneSeqSize;
+            /* gcc and clang both don't like early returns in this loop.
+             * Instead break and check for an error at the end of the loop.
+             */
+            if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
+                error = oneSeqSize;
+                break;
+            }
+            if (UNLIKELY(!--nbSeq)) break;
+        }
+
+        /* check if reached exact end */
+        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
+        if (ZSTD_isError(error)) return error;
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+#define STORED_SEQS 4
+#define STORED_SEQS_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS 4
+        seq_t sequences[STORED_SEQS];
+        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int seqNb;
+        dctx->fseEntropy = 1;
+        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        seqState.prefixStart = prefixStart;
+        seqState.pos = (size_t)(op-prefixStart);
+        seqState.dictEnd = dictEnd;
+        assert(dst != NULL);
+        assert(iend >= ip);
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
+            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+        }
+        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
+
+        /* decode and decompress */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            sequences[seqNb & STORED_SEQS_MASK] = sequence;
+            op += oneSeqSize;
+        }
+        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for ( ; seqNb<nbSeq ; seqNb++) {
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if DYNAMIC_BMI2
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+#endif /* DYNAMIC_BMI2 */
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+                            const ZSTD_longOffset_e isLongOffset,
+                            const int frame);
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static size_t
+ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                   const void* seqStart, size_t seqSize, int nbSeq,
+                   const ZSTD_longOffset_e isLongOffset,
+                   const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t
+ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+                             void* dst, size_t maxDstSize,
+                             const void* seqStart, size_t seqSize, int nbSeq,
+                             const ZSTD_longOffset_e isLongOffset,
+                             const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ *           compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+    const void* ptr = offTable;
+    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+    const ZSTD_seqSymbol* table = offTable + 1;
+    U32 const max = 1 << tableLog;
+    U32 u, total = 0;
+    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+    assert(max <= (1 << OffFSELog));  /* max not too large */
+    for (u=0; u<max; u++) {
+        if (table[u].nbAdditionalBits > 22) total += 1;
+    }
+
+    assert(tableLog <= OffFSELog);
+    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
+
+    return total;
+}
+#endif
+
+size_t
+ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize, const int frame)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    /* isLongOffset must be true if there are long offsets.
+     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
+     * We don't expect that to be the case in 64-bit mode.
+     * In block mode, window size is not known, so we have to be conservative.
+     * (note: but it could be evaluated from current-lowLimit)
+     */
+    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+
+    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+
+    /* Decode literals section */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+
+    /* Build Decoding Tables */
+    {
+        /* These macros control at build-time which decompressor implementation
+         * we use. If neither is defined, we do some inspection and dispatch at
+         * runtime.
+         */
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        int usePrefetchDecoder = dctx->ddictIsCold;
+#endif
+        int nbSeq;
+        size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+        srcSize -= seqHSize;
+
+        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if ( !usePrefetchDecoder
+          && (!frame || (dctx->fParams.windowSize > (1<<24)))
+          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
+            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+            usePrefetchDecoder = (shareLongOffsets >= minShare);
+        }
+#endif
+
+        dctx->ddictIsCold = 0;
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if (usePrefetchDecoder)
+#endif
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+        /* else */
+        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+    }
+}
+
+
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
+{
+    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+        dctx->prefixStart = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    size_t dSize;
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h
new file mode 100644
index 0000000..e7f5f66
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_block.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DEC_BLOCK_H
+#define ZSTD_DEC_BLOCK_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h"   /* size_t */
+#include <linux/zstd.h>    /* DCtx, and some public functions */
+#include "../common/zstd_internal.h"  /* blockProperties_t, and some public functions */
+#include "zstd_decompress_internal.h"  /* ZSTD_seqSymbol */
+
+
+/* ===   Prototypes   === */
+
+/* note: prototypes already published within `zstd.h` :
+ * ZSTD_decompressBlock()
+ */
+
+/* note: prototypes already published within `zstd_internal.h` :
+ * ZSTD_getcBlockSize()
+ * ZSTD_decodeSeqHeaders()
+ */
+
+
+/* ZSTD_decompressBlock_internal() :
+ * decompress block, starting at `src`,
+ * into destination buffer `dst`.
+ * @return : decompressed block size,
+ *           or an error code (which can be tested using ZSTD_isError())
+ */
+size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize, const int frame);
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * this function must be called with valid parameters only
+ * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
+ * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
+ * Internal use only.
+ */
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+             const short* normalizedCounter, unsigned maxSymbolValue,
+             const U32* baseValue, const U32* nbAdditionalBits,
+                   unsigned tableLog, void* wksp, size_t wkspSize,
+                   int bmi2);
+
+
+#endif /* ZSTD_DEC_BLOCK_H */
diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h
new file mode 100644
index 0000000..4b9052f
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_internal.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
+
+ #ifndef ZSTD_DECOMPRESS_INTERNAL_H
+ #define ZSTD_DECOMPRESS_INTERNAL_H
+
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/mem.h"             /* BYTE, U16, U32 */
+#include "../common/zstd_internal.h"   /* ZSTD_seqSymbol */
+
+
+
+/*-*******************************************************
+ *  Constants
+ *********************************************************/
+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
+                 0,    1,    2,     3,     4,     5,     6,      7,
+                 8,    9,   10,    11,    12,    13,    14,     15,
+                16,   18,   20,    22,    24,    28,    32,     40,
+                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                0x2000, 0x4000, 0x8000, 0x10000 };
+
+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
+                     0,  1,  2,  3,  4,  5,  6,  7,
+                     8,  9, 10, 11, 12, 13, 14, 15,
+                    16, 17, 18, 19, 20, 21, 22, 23,
+                    24, 25, 26, 27, 28, 29, 30, 31 };
+
+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
+                     3,  4,  5,    6,     7,     8,     9,    10,
+                    11, 12, 13,   14,    15,    16,    17,    18,
+                    19, 20, 21,   22,    23,    24,    25,    26,
+                    27, 28, 29,   30,    31,    32,    33,    34,
+                    35, 37, 39,   41,    43,    47,    51,    59,
+                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
+/*-*******************************************************
+ *  Decompression types
+ *********************************************************/
+ typedef struct {
+     U32 fastMode;
+     U32 tableLog;
+ } ZSTD_seqSymbol_header;
+
+ typedef struct {
+     U16  nextState;
+     BYTE nbAdditionalBits;
+     BYTE nbBits;
+     U32  baseValue;
+ } ZSTD_seqSymbol;
+
+ #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
+
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
+
+typedef struct {
+    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
+    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
+    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 rep[ZSTD_REP_NUM];
+    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+} ZSTD_entropyDTables_t;
+
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
+
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef enum {
+    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
+    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+    const ZSTD_DDict** ddictPtrTable;
+    size_t ddictPtrTableSize;
+    size_t ddictPtrCount;
+} ZSTD_DDictHashSet;
+
+struct ZSTD_DCtx_s
+{
+    const ZSTD_seqSymbol* LLTptr;
+    const ZSTD_seqSymbol* MLTptr;
+    const ZSTD_seqSymbol* OFTptr;
+    const HUF_DTable* HUFptr;
+    ZSTD_entropyDTables_t entropy;
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
+    const void* previousDstEnd;   /* detect continuity */
+    const void* prefixStart;      /* start of current segment */
+    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
+    size_t expected;
+    ZSTD_frameHeader fParams;
+    U64 processedCSize;
+    U64 decodedSize;
+    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+    ZSTD_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    struct xxh64_state xxhState;
+    size_t headerSize;
+    ZSTD_format_e format;
+    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
+    const BYTE* litPtr;
+    ZSTD_customMem customMem;
+    size_t litSize;
+    size_t rleSize;
+    size_t staticSize;
+    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+
+    /* dictionary */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+    U32 dictID;
+    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e dictUses;
+    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
+    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
+
+    /* streaming */
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+    int noForwardProgress;
+    ZSTD_bufferMode_e outBufferMode;
+    ZSTD_outBuffer expectedOutBuffer;
+
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+    size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
+
+    /* Tracing */
+};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+
+/*-*******************************************************
+ *  Shared internal functions
+ *********************************************************/
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                   const void* const dict, size_t const dictSize);
+
+/*! ZSTD_checkContinuity() :
+ *  check if next `dst` follows previous position, where decompression ended.
+ *  If yes, do nothing (continue on current segment).
+ *  If not, classify previous segment as "external dictionary", and start a new segment.
+ *  This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
+
+
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h
new file mode 100644
index 0000000..0fbec50
--- /dev/null
+++ b/lib/zstd/decompress_sources.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This file includes every .c file needed for decompression.
+ * It is used by lib/decompress_unzstd.c to include the decompression
+ * source into the translation-unit, so it can be used for kernel
+ * decompression.
+ */
+
+#include "common/debug.c"
+#include "common/entropy_common.c"
+#include "common/error_private.c"
+#include "common/fse_decompress.c"
+#include "common/zstd_common.c"
+#include "decompress/huf_decompress.c"
+#include "decompress/zstd_ddict.c"
+#include "decompress/zstd_decompress.c"
+#include "decompress/zstd_decompress_block.c"
+#include "zstd_decompress_module.c"
diff --git a/lib/zstd/entropy_common.c b/lib/zstd/entropy_common.c
deleted file mode 100644
index 2b0a643..0000000
--- a/lib/zstd/entropy_common.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Common functions of New Generation Entropy library
- * Copyright (C) 2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include "error_private.h" /* ERR_*, ERROR */
-#include "fse.h"
-#include "huf.h"
-#include "mem.h"
-
-/*===   Version   ===*/
-unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
-
-/*===   Error Management   ===*/
-unsigned FSE_isError(size_t code) { return ERR_isError(code); }
-
-unsigned HUF_isError(size_t code) { return ERR_isError(code); }
-
-/*-**************************************************************
-*  FSE NCount encoding-decoding
-****************************************************************/
-size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize)
-{
-	const BYTE *const istart = (const BYTE *)headerBuffer;
-	const BYTE *const iend = istart + hbSize;
-	const BYTE *ip = istart;
-	int nbBits;
-	int remaining;
-	int threshold;
-	U32 bitStream;
-	int bitCount;
-	unsigned charnum = 0;
-	int previous0 = 0;
-
-	if (hbSize < 4)
-		return ERROR(srcSize_wrong);
-	bitStream = ZSTD_readLE32(ip);
-	nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
-	if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX)
-		return ERROR(tableLog_tooLarge);
-	bitStream >>= 4;
-	bitCount = 4;
-	*tableLogPtr = nbBits;
-	remaining = (1 << nbBits) + 1;
-	threshold = 1 << nbBits;
-	nbBits++;
-
-	while ((remaining > 1) & (charnum <= *maxSVPtr)) {
-		if (previous0) {
-			unsigned n0 = charnum;
-			while ((bitStream & 0xFFFF) == 0xFFFF) {
-				n0 += 24;
-				if (ip < iend - 5) {
-					ip += 2;
-					bitStream = ZSTD_readLE32(ip) >> bitCount;
-				} else {
-					bitStream >>= 16;
-					bitCount += 16;
-				}
-			}
-			while ((bitStream & 3) == 3) {
-				n0 += 3;
-				bitStream >>= 2;
-				bitCount += 2;
-			}
-			n0 += bitStream & 3;
-			bitCount += 2;
-			if (n0 > *maxSVPtr)
-				return ERROR(maxSymbolValue_tooSmall);
-			while (charnum < n0)
-				normalizedCounter[charnum++] = 0;
-			if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
-				ip += bitCount >> 3;
-				bitCount &= 7;
-				bitStream = ZSTD_readLE32(ip) >> bitCount;
-			} else {
-				bitStream >>= 2;
-			}
-		}
-		{
-			int const max = (2 * threshold - 1) - remaining;
-			int count;
-
-			if ((bitStream & (threshold - 1)) < (U32)max) {
-				count = bitStream & (threshold - 1);
-				bitCount += nbBits - 1;
-			} else {
-				count = bitStream & (2 * threshold - 1);
-				if (count >= threshold)
-					count -= max;
-				bitCount += nbBits;
-			}
-
-			count--;				 /* extra accuracy */
-			remaining -= count < 0 ? -count : count; /* -1 means +1 */
-			normalizedCounter[charnum++] = (short)count;
-			previous0 = !count;
-			while (remaining < threshold) {
-				nbBits--;
-				threshold >>= 1;
-			}
-
-			if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
-				ip += bitCount >> 3;
-				bitCount &= 7;
-			} else {
-				bitCount -= (int)(8 * (iend - 4 - ip));
-				ip = iend - 4;
-			}
-			bitStream = ZSTD_readLE32(ip) >> (bitCount & 31);
-		}
-	} /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
-	if (remaining != 1)
-		return ERROR(corruption_detected);
-	if (bitCount > 32)
-		return ERROR(corruption_detected);
-	*maxSVPtr = charnum - 1;
-
-	ip += (bitCount + 7) >> 3;
-	return ip - istart;
-}
-
-/*! HUF_readStats() :
-	Read compact Huffman tree, saved by HUF_writeCTable().
-	`huffWeight` is destination buffer.
-	`rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
-	@return : size read from `src` , or an error Code .
-	Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
-*/
-size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 weightTotal;
-	const BYTE *ip = (const BYTE *)src;
-	size_t iSize;
-	size_t oSize;
-
-	if (!srcSize)
-		return ERROR(srcSize_wrong);
-	iSize = ip[0];
-	/* memset(huffWeight, 0, hwSize);   */ /* is not necessary, even though some analyzer complain ... */
-
-	if (iSize >= 128) { /* special header */
-		oSize = iSize - 127;
-		iSize = ((oSize + 1) / 2);
-		if (iSize + 1 > srcSize)
-			return ERROR(srcSize_wrong);
-		if (oSize >= hwSize)
-			return ERROR(corruption_detected);
-		ip += 1;
-		{
-			U32 n;
-			for (n = 0; n < oSize; n += 2) {
-				huffWeight[n] = ip[n / 2] >> 4;
-				huffWeight[n + 1] = ip[n / 2] & 15;
-			}
-		}
-	} else {						 /* header compressed with FSE (normal case) */
-		if (iSize + 1 > srcSize)
-			return ERROR(srcSize_wrong);
-		oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */
-		if (FSE_isError(oSize))
-			return oSize;
-	}
-
-	/* collect weight stats */
-	memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
-	weightTotal = 0;
-	{
-		U32 n;
-		for (n = 0; n < oSize; n++) {
-			if (huffWeight[n] >= HUF_TABLELOG_MAX)
-				return ERROR(corruption_detected);
-			rankStats[huffWeight[n]]++;
-			weightTotal += (1 << huffWeight[n]) >> 1;
-		}
-	}
-	if (weightTotal == 0)
-		return ERROR(corruption_detected);
-
-	/* get last non-null symbol weight (implied, total must be 2^n) */
-	{
-		U32 const tableLog = BIT_highbit32(weightTotal) + 1;
-		if (tableLog > HUF_TABLELOG_MAX)
-			return ERROR(corruption_detected);
-		*tableLogPtr = tableLog;
-		/* determine last weight */
-		{
-			U32 const total = 1 << tableLog;
-			U32 const rest = total - weightTotal;
-			U32 const verif = 1 << BIT_highbit32(rest);
-			U32 const lastWeight = BIT_highbit32(rest) + 1;
-			if (verif != rest)
-				return ERROR(corruption_detected); /* last value must be a clean power of 2 */
-			huffWeight[oSize] = (BYTE)lastWeight;
-			rankStats[lastWeight]++;
-		}
-	}
-
-	/* check tree construction validity */
-	if ((rankStats[1] < 2) || (rankStats[1] & 1))
-		return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
-
-	/* results */
-	*nbSymbolsPtr = (U32)(oSize + 1);
-	return iSize + 1;
-}
diff --git a/lib/zstd/error_private.h b/lib/zstd/error_private.h
deleted file mode 100644
index 1a60b31..0000000
--- a/lib/zstd/error_private.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* Note : this module is expected to remain private, do not expose it */
-
-#ifndef ERROR_H_MODULE
-#define ERROR_H_MODULE
-
-/* ****************************************
-*  Dependencies
-******************************************/
-#include <linux/types.h> /* size_t */
-#include <linux/zstd.h>  /* enum list */
-
-/* ****************************************
-*  Compiler-specific
-******************************************/
-#define ERR_STATIC static __attribute__((unused))
-
-/*-****************************************
-*  Customization (error_public.h)
-******************************************/
-typedef ZSTD_ErrorCode ERR_enum;
-#define PREFIX(name) ZSTD_error_##name
-
-/*-****************************************
-*  Error codes handling
-******************************************/
-#define ERROR(name) ((size_t)-PREFIX(name))
-
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
-
-ERR_STATIC ERR_enum ERR_getErrorCode(size_t code)
-{
-	if (!ERR_isError(code))
-		return (ERR_enum)0;
-	return (ERR_enum)(0 - code);
-}
-
-#endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/fse.h b/lib/zstd/fse.h
deleted file mode 100644
index 7460ab0..0000000
--- a/lib/zstd/fse.h
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * FSE : Finite State Entropy codec
- * Public Prototypes declaration
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef FSE_H
-#define FSE_H
-
-/*-*****************************************
-*  Dependencies
-******************************************/
-#include <linux/types.h> /* size_t, ptrdiff_t */
-
-/*-*****************************************
-*  FSE_PUBLIC_API : control library symbols visibility
-******************************************/
-#define FSE_PUBLIC_API
-
-/*------   Version   ------*/
-#define FSE_VERSION_MAJOR 0
-#define FSE_VERSION_MINOR 9
-#define FSE_VERSION_RELEASE 0
-
-#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
-#define FSE_QUOTE(str) #str
-#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
-#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
-
-#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE)
-FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
-
-/*-*****************************************
-*  Tool functions
-******************************************/
-FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
-
-/* Error Management */
-FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
-
-/*-*****************************************
-*  FSE detailed API
-******************************************/
-/*!
-FSE_compress() does the following:
-1. count symbol occurrence from source[] into table count[]
-2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
-3. save normalized counters to memory buffer using writeNCount()
-4. build encoding table 'CTable' from normalized counters
-5. encode the data stream using encoding table 'CTable'
-
-FSE_decompress() does the following:
-1. read normalized counters with readNCount()
-2. build decoding table 'DTable' from normalized counters
-3. decode the data stream using decoding table 'DTable'
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and provide normalized distribution using external method.
-*/
-
-/* *** COMPRESSION *** */
-/*! FSE_optimalTableLog():
-	dynamically downsize 'tableLog' when conditions are met.
-	It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
-	@return : recommended tableLog (necessarily <= 'maxTableLog') */
-FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-
-/*! FSE_normalizeCount():
-	normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
-	'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
-	@return : tableLog,
-			  or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue);
-
-/*! FSE_NCountWriteBound():
-	Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
-	Typically useful for allocation purpose. */
-FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
-
-/*! FSE_writeNCount():
-	Compactly save 'normalizedCounter' into 'buffer'.
-	@return : size of the compressed table,
-			  or an errorCode, which can be tested using FSE_isError(). */
-FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
-
-/*! Constructor and Destructor of FSE_CTable.
-	Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
-typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
-
-/*! FSE_compress_usingCTable():
-	Compress `src` using `ct` into `dst` which must be already allocated.
-	@return : size of compressed data (<= `dstCapacity`),
-			  or 0 if compressed data could not fit into `dst`,
-			  or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct);
-
-/*!
-Tutorial :
-----------
-The first step is to count all symbols. FSE_count() does this job very fast.
-Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
-'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
-maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
-FSE_count() will return the number of occurrence of the most frequent symbol.
-This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-The next step is to normalize the frequencies.
-FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
-It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
-You can use 'tableLog'==0 to mean "use default tableLog value".
-If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
-which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
-
-The result of FSE_normalizeCount() will be saved into a table,
-called 'normalizedCounter', which is a table of signed short.
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
-The return value is tableLog if everything proceeded as expected.
-It is 0 if there is a single symbol within distribution.
-If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
-'buffer' must be already allocated.
-For guaranteed success, buffer size must be at least FSE_headerBound().
-The result of the function is the number of bytes written into 'buffer'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
-
-'normalizedCounter' can then be used to create the compression table 'CTable'.
-The space required by 'CTable' must be already allocated, using FSE_createCTable().
-You can then use FSE_buildCTable() to fill 'CTable'.
-If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
-
-'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
-Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
-The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
-If it returns '0', compressed data could not fit into 'dst'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-*/
-
-/* *** DECOMPRESSION *** */
-
-/*! FSE_readNCount():
-	Read compactly saved 'normalizedCounter' from 'rBuffer'.
-	@return : size read from 'rBuffer',
-			  or an errorCode, which can be tested using FSE_isError().
-			  maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize);
-
-/*! Constructor and Destructor of FSE_DTable.
-	Note that its size depends on 'tableLog' */
-typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
-
-/*! FSE_buildDTable():
-	Builds 'dt', which must be already allocated, using FSE_createDTable().
-	return : 0, or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize);
-
-/*! FSE_decompress_usingDTable():
-	Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
-	into `dst` which must be already allocated.
-	@return : size of regenerated data (necessarily <= `dstCapacity`),
-			  or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt);
-
-/*!
-Tutorial :
-----------
-(Note : these functions only decompress FSE-compressed blocks.
- If block is uncompressed, use memcpy() instead
- If block is a single repeated byte, use memset() instead )
-
-The first step is to obtain the normalized frequencies of symbols.
-This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
-In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
-or size the table to handle worst case situations (typically 256).
-FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
-The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
-Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
-This is performed by the function FSE_buildDTable().
-The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
-`cSrcSize` must be strictly correct, otherwise decompression will fail.
-FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
-*/
-
-/* *** Dependency *** */
-#include "bitstream.h"
-
-/* *****************************************
-*  Static allocation
-*******************************************/
-/* FSE buffer bounds */
-#define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size >> 7))
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
-
-/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2))
-#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog))
-
-/* *****************************************
-*  FSE advanced API
-*******************************************/
-/* FSE_count_wksp() :
- * Same as FSE_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned
- */
-size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace);
-
-/* FSE_countFast_wksp() :
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
- * `workSpace` must be a table of minimum `1024` unsigned
- */
-size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace);
-
-/*! FSE_count_simple
- * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
- * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
-*/
-size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize);
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
-/**< same as FSE_optimalTableLog(), which used `minus==2` */
-
-size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits);
-/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
-
-size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue);
-/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` must be >= `(1<<tableLog)`.
- */
-size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, size_t wkspSize);
-
-size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits);
-/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
-
-size_t FSE_buildDTable_rle(FSE_DTable *dt, unsigned char symbolValue);
-/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
-
-size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize);
-/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
-
-/* *****************************************
-*  FSE symbol compression API
-*******************************************/
-/*!
-   This API consists of small unitary functions, which highly benefit from being inlined.
-   Hence their body are included in next section.
-*/
-typedef struct {
-	ptrdiff_t value;
-	const void *stateTable;
-	const void *symbolTT;
-	unsigned stateLog;
-} FSE_CState_t;
-
-static void FSE_initCState(FSE_CState_t *CStatePtr, const FSE_CTable *ct);
-
-static void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *CStatePtr, unsigned symbol);
-
-static void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *CStatePtr);
-
-/**<
-These functions are inner components of FSE_compress_usingCTable().
-They allow the creation of custom streams, mixing multiple tables and bit sources.
-
-A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
-So the first symbol you will encode is the last you will decode, like a LIFO stack.
-
-You will need a few variables to track your CStream. They are :
-
-FSE_CTable    ct;         // Provided by FSE_buildCTable()
-BIT_CStream_t bitStream;  // bitStream tracking structure
-FSE_CState_t  state;      // State tracking structure (can have several)
-
-
-The first thing to do is to init bitStream and state.
-	size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
-	FSE_initCState(&state, ct);
-
-Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
-You can then encode your input data, byte after byte.
-FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
-Remember decoding will be done in reverse direction.
-	FSE_encodeByte(&bitStream, &state, symbol);
-
-At any time, you can also add any bit sequence.
-Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
-	BIT_addBits(&bitStream, bitField, nbBits);
-
-The above methods don't commit data to memory, they just store it into local register, for speed.
-Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-Writing data to memory is a manual operation, performed by the flushBits function.
-	BIT_flushBits(&bitStream);
-
-Your last FSE encoding operation shall be to flush your last state value(s).
-	FSE_flushState(&bitStream, &state);
-
-Finally, you must close the bitStream.
-The function returns the size of CStream in bytes.
-If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
-If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
-	size_t size = BIT_closeCStream(&bitStream);
-*/
-
-/* *****************************************
-*  FSE symbol decompression API
-*******************************************/
-typedef struct {
-	size_t state;
-	const void *table; /* precise table may vary, depending on U16 */
-} FSE_DState_t;
-
-static void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt);
-
-static unsigned char FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
-
-static unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr);
-
-/**<
-Let's now decompose FSE_decompress_usingDTable() into its unitary components.
-You will decode FSE-encoded symbols from the bitStream,
-and also any other bitFields you put in, **in reverse order**.
-
-You will need a few variables to track your bitStream. They are :
-
-BIT_DStream_t DStream;    // Stream context
-FSE_DState_t  DState;     // State context. Multiple ones are possible
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
-
-The first thing to do is to init the bitStream.
-	errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
-
-You should then retrieve your initial state(s)
-(in reverse flushing order if you have several ones) :
-	errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
-
-You can then decode your data, symbol after symbol.
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
-	unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
-
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
-Note : maximum allowed nbBits is 25, for 32-bits compatibility
-	size_t bitField = BIT_readBits(&DStream, nbBits);
-
-All above operations only read from local register (which size depends on size_t).
-Refueling the register from memory is manually performed by the reload method.
-	endSignal = FSE_reloadDStream(&DStream);
-
-BIT_reloadDStream() result tells if there is still some more data to read from DStream.
-BIT_DStream_unfinished : there is still some data left into the DStream.
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
-
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
-to properly detect the exact end of stream.
-After each decoded symbol, check if DStream is fully consumed using this simple test :
-	BIT_reloadDStream(&DStream) >= BIT_DStream_completed
-
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
-Checking if DStream has reached its end is performed by :
-	BIT_endOfDStream(&DStream);
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
-	FSE_endOfDState(&DState);
-*/
-
-/* *****************************************
-*  FSE unsafe API
-*******************************************/
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
-
-/* *****************************************
-*  Implementation of inlined functions
-*******************************************/
-typedef struct {
-	int deltaFindState;
-	U32 deltaNbBits;
-} FSE_symbolCompressionTransform; /* total 8 bytes */
-
-ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct)
-{
-	const void *ptr = ct;
-	const U16 *u16ptr = (const U16 *)ptr;
-	const U32 tableLog = ZSTD_read16(ptr);
-	statePtr->value = (ptrdiff_t)1 << tableLog;
-	statePtr->stateTable = u16ptr + 2;
-	statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1));
-	statePtr->stateLog = tableLog;
-}
-
-/*! FSE_initCState2() :
-*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
-*   uses the smallest state value possible, saving the cost of this symbol */
-ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol)
-{
-	FSE_initCState(statePtr, ct);
-	{
-		const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
-		const U16 *stateTable = (const U16 *)(statePtr->stateTable);
-		U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16);
-		statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
-		statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-	}
-}
-
-ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol)
-{
-	const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
-	const U16 *const stateTable = (const U16 *)(statePtr->stateTable);
-	U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
-	BIT_addBits(bitC, statePtr->value, nbBitsOut);
-	statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-}
-
-ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr)
-{
-	BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
-	BIT_flushBits(bitC);
-}
-
-/* ======    Decompression    ====== */
-
-typedef struct {
-	U16 tableLog;
-	U16 fastMode;
-} FSE_DTableHeader; /* sizeof U32 */
-
-typedef struct {
-	unsigned short newState;
-	unsigned char symbol;
-	unsigned char nbBits;
-} FSE_decode_t; /* size == U32 */
-
-ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt)
-{
-	const void *ptr = dt;
-	const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr;
-	DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
-	BIT_reloadDStream(bitD);
-	DStatePtr->table = dt + 1;
-}
-
-ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	return DInfo.symbol;
-}
-
-ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	U32 const nbBits = DInfo.nbBits;
-	size_t const lowBits = BIT_readBits(bitD, nbBits);
-	DStatePtr->state = DInfo.newState + lowBits;
-}
-
-ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	U32 const nbBits = DInfo.nbBits;
-	BYTE const symbol = DInfo.symbol;
-	size_t const lowBits = BIT_readBits(bitD, nbBits);
-
-	DStatePtr->state = DInfo.newState + lowBits;
-	return symbol;
-}
-
-/*! FSE_decodeSymbolFast() :
-	unsafe, only works if no symbol has a probability > 50% */
-ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	U32 const nbBits = DInfo.nbBits;
-	BYTE const symbol = DInfo.symbol;
-	size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
-
-	DStatePtr->state = DInfo.newState + lowBits;
-	return symbol;
-}
-
-ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; }
-
-/* **************************************************************
-*  Tuning parameters
-****************************************************************/
-/*!MEMORY_USAGE :
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-*  Increasing memory usage improves compression ratio
-*  Reduced memory usage can improve speed, due to cache effect
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#ifndef FSE_MAX_MEMORY_USAGE
-#define FSE_MAX_MEMORY_USAGE 14
-#endif
-#ifndef FSE_DEFAULT_MEMORY_USAGE
-#define FSE_DEFAULT_MEMORY_USAGE 13
-#endif
-
-/*!FSE_MAX_SYMBOL_VALUE :
-*  Maximum symbol value authorized.
-*  Required for proper stack allocation */
-#ifndef FSE_MAX_SYMBOL_VALUE
-#define FSE_MAX_SYMBOL_VALUE 255
-#endif
-
-/* **************************************************************
-*  template functions type & suffix
-****************************************************************/
-#define FSE_FUNCTION_TYPE BYTE
-#define FSE_FUNCTION_EXTENSION
-#define FSE_DECODE_TYPE FSE_decode_t
-
-/* ***************************************************************
-*  Constants
-*****************************************************************/
-#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2)
-#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG)
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1)
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2)
-#define FSE_MIN_TABLELOG 5
-
-#define FSE_TABLELOG_ABSOLUTE_MAX 15
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
-#endif
-
-#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3)
-
-#endif /* FSE_H */
diff --git a/lib/zstd/fse_compress.c b/lib/zstd/fse_compress.c
deleted file mode 100644
index ef3d174..0000000
--- a/lib/zstd/fse_compress.c
+++ /dev/null
@@ -1,795 +0,0 @@
-/*
- * FSE : Finite State Entropy encoder
- * Copyright (C) 2013-2015, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/math64.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X, Y) X##Y
-#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
-#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
-
-/* Function templates */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
- * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
- */
-size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
-{
-	U32 const tableSize = 1 << tableLog;
-	U32 const tableMask = tableSize - 1;
-	void *const ptr = ct;
-	U16 *const tableU16 = ((U16 *)ptr) + 2;
-	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
-	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
-	U32 const step = FSE_TABLESTEP(tableSize);
-	U32 highThreshold = tableSize - 1;
-
-	U32 *cumul;
-	FSE_FUNCTION_TYPE *tableSymbol;
-	size_t spaceUsed32 = 0;
-
-	cumul = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
-	tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* CTable header */
-	tableU16[-2] = (U16)tableLog;
-	tableU16[-1] = (U16)maxSymbolValue;
-
-	/* For explanations on how to distribute symbol values over the table :
-	*  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
-
-	/* symbol start positions */
-	{
-		U32 u;
-		cumul[0] = 0;
-		for (u = 1; u <= maxSymbolValue + 1; u++) {
-			if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
-				cumul[u] = cumul[u - 1] + 1;
-				tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
-			} else {
-				cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
-			}
-		}
-		cumul[maxSymbolValue + 1] = tableSize + 1;
-	}
-
-	/* Spread symbols */
-	{
-		U32 position = 0;
-		U32 symbol;
-		for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
-			int nbOccurences;
-			for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
-				tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
-				position = (position + step) & tableMask;
-				while (position > highThreshold)
-					position = (position + step) & tableMask; /* Low proba area */
-			}
-		}
-
-		if (position != 0)
-			return ERROR(GENERIC); /* Must have gone through all positions */
-	}
-
-	/* Build table */
-	{
-		U32 u;
-		for (u = 0; u < tableSize; u++) {
-			FSE_FUNCTION_TYPE s = tableSymbol[u];	/* note : static analyzer may not understand tableSymbol is properly initialized */
-			tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
-		}
-	}
-
-	/* Build Symbol Transformation Table */
-	{
-		unsigned total = 0;
-		unsigned s;
-		for (s = 0; s <= maxSymbolValue; s++) {
-			switch (normalizedCounter[s]) {
-			case 0: break;
-
-			case -1:
-			case 1:
-				symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
-				symbolTT[s].deltaFindState = total - 1;
-				total++;
-				break;
-			default: {
-				U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
-				U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
-				symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
-				symbolTT[s].deltaFindState = total - normalizedCounter[s];
-				total += normalizedCounter[s];
-			}
-			}
-		}
-	}
-
-	return 0;
-}
-
-/*-**************************************************************
-*  FSE NCount encoding-decoding
-****************************************************************/
-size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
-{
-	size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
-	return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
-}
-
-static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
-				      unsigned writeIsSafe)
-{
-	BYTE *const ostart = (BYTE *)header;
-	BYTE *out = ostart;
-	BYTE *const oend = ostart + headerBufferSize;
-	int nbBits;
-	const int tableSize = 1 << tableLog;
-	int remaining;
-	int threshold;
-	U32 bitStream;
-	int bitCount;
-	unsigned charnum = 0;
-	int previous0 = 0;
-
-	bitStream = 0;
-	bitCount = 0;
-	/* Table Size */
-	bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
-	bitCount += 4;
-
-	/* Init */
-	remaining = tableSize + 1; /* +1 for extra accuracy */
-	threshold = tableSize;
-	nbBits = tableLog + 1;
-
-	while (remaining > 1) { /* stops at 1 */
-		if (previous0) {
-			unsigned start = charnum;
-			while (!normalizedCounter[charnum])
-				charnum++;
-			while (charnum >= start + 24) {
-				start += 24;
-				bitStream += 0xFFFFU << bitCount;
-				if ((!writeIsSafe) && (out > oend - 2))
-					return ERROR(dstSize_tooSmall); /* Buffer overflow */
-				out[0] = (BYTE)bitStream;
-				out[1] = (BYTE)(bitStream >> 8);
-				out += 2;
-				bitStream >>= 16;
-			}
-			while (charnum >= start + 3) {
-				start += 3;
-				bitStream += 3 << bitCount;
-				bitCount += 2;
-			}
-			bitStream += (charnum - start) << bitCount;
-			bitCount += 2;
-			if (bitCount > 16) {
-				if ((!writeIsSafe) && (out > oend - 2))
-					return ERROR(dstSize_tooSmall); /* Buffer overflow */
-				out[0] = (BYTE)bitStream;
-				out[1] = (BYTE)(bitStream >> 8);
-				out += 2;
-				bitStream >>= 16;
-				bitCount -= 16;
-			}
-		}
-		{
-			int count = normalizedCounter[charnum++];
-			int const max = (2 * threshold - 1) - remaining;
-			remaining -= count < 0 ? -count : count;
-			count++; /* +1 for extra accuracy */
-			if (count >= threshold)
-				count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
-			bitStream += count << bitCount;
-			bitCount += nbBits;
-			bitCount -= (count < max);
-			previous0 = (count == 1);
-			if (remaining < 1)
-				return ERROR(GENERIC);
-			while (remaining < threshold)
-				nbBits--, threshold >>= 1;
-		}
-		if (bitCount > 16) {
-			if ((!writeIsSafe) && (out > oend - 2))
-				return ERROR(dstSize_tooSmall); /* Buffer overflow */
-			out[0] = (BYTE)bitStream;
-			out[1] = (BYTE)(bitStream >> 8);
-			out += 2;
-			bitStream >>= 16;
-			bitCount -= 16;
-		}
-	}
-
-	/* flush remaining bitStream */
-	if ((!writeIsSafe) && (out > oend - 2))
-		return ERROR(dstSize_tooSmall); /* Buffer overflow */
-	out[0] = (BYTE)bitStream;
-	out[1] = (BYTE)(bitStream >> 8);
-	out += (bitCount + 7) / 8;
-
-	if (charnum > maxSymbolValue + 1)
-		return ERROR(GENERIC);
-
-	return (out - ostart);
-}
-
-size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
-{
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge); /* Unsupported */
-	if (tableLog < FSE_MIN_TABLELOG)
-		return ERROR(GENERIC); /* Unsupported */
-
-	if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
-		return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
-
-	return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
-}
-
-/*-**************************************************************
-*  Counting histogram
-****************************************************************/
-/*! FSE_count_simple
-	This function counts byte values within `src`, and store the histogram into table `count`.
-	It doesn't use any additional memory.
-	But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
-	For this reason, prefer using a table `count` with 256 elements.
-	@return : count of most numerous element
-*/
-size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
-{
-	const BYTE *ip = (const BYTE *)src;
-	const BYTE *const end = ip + srcSize;
-	unsigned maxSymbolValue = *maxSymbolValuePtr;
-	unsigned max = 0;
-
-	memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
-	if (srcSize == 0) {
-		*maxSymbolValuePtr = 0;
-		return 0;
-	}
-
-	while (ip < end)
-		count[*ip++]++;
-
-	while (!count[maxSymbolValue])
-		maxSymbolValue--;
-	*maxSymbolValuePtr = maxSymbolValue;
-
-	{
-		U32 s;
-		for (s = 0; s <= maxSymbolValue; s++)
-			if (count[s] > max)
-				max = count[s];
-	}
-
-	return (size_t)max;
-}
-
-/* FSE_count_parallel_wksp() :
- * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
-static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
-				      unsigned *const workSpace)
-{
-	const BYTE *ip = (const BYTE *)source;
-	const BYTE *const iend = ip + sourceSize;
-	unsigned maxSymbolValue = *maxSymbolValuePtr;
-	unsigned max = 0;
-	U32 *const Counting1 = workSpace;
-	U32 *const Counting2 = Counting1 + 256;
-	U32 *const Counting3 = Counting2 + 256;
-	U32 *const Counting4 = Counting3 + 256;
-
-	memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
-
-	/* safety checks */
-	if (!sourceSize) {
-		memset(count, 0, maxSymbolValue + 1);
-		*maxSymbolValuePtr = 0;
-		return 0;
-	}
-	if (!maxSymbolValue)
-		maxSymbolValue = 255; /* 0 == default */
-
-	/* by stripes of 16 bytes */
-	{
-		U32 cached = ZSTD_read32(ip);
-		ip += 4;
-		while (ip < iend - 15) {
-			U32 c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-			c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-			c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-			c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-		}
-		ip -= 4;
-	}
-
-	/* finish last symbols */
-	while (ip < iend)
-		Counting1[*ip++]++;
-
-	if (checkMax) { /* verify stats will fit into destination table */
-		U32 s;
-		for (s = 255; s > maxSymbolValue; s--) {
-			Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
-			if (Counting1[s])
-				return ERROR(maxSymbolValue_tooSmall);
-		}
-	}
-
-	{
-		U32 s;
-		for (s = 0; s <= maxSymbolValue; s++) {
-			count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
-			if (count[s] > max)
-				max = count[s];
-		}
-	}
-
-	while (!count[maxSymbolValue])
-		maxSymbolValue--;
-	*maxSymbolValuePtr = maxSymbolValue;
-	return (size_t)max;
-}
-
-/* FSE_countFast_wksp() :
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned */
-size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
-{
-	if (sourceSize < 1500)
-		return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
-	return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
-}
-
-/* FSE_count_wksp() :
- * Same as FSE_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned */
-size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
-{
-	if (*maxSymbolValuePtr < 255)
-		return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
-	*maxSymbolValuePtr = 255;
-	return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
-}
-
-/*-**************************************************************
-*  FSE Compression Code
-****************************************************************/
-/*! FSE_sizeof_CTable() :
-	FSE_CTable is a variable size structure which contains :
-	`U16 tableLog;`
-	`U16 maxSymbolValue;`
-	`U16 nextStateNumber[1 << tableLog];`                         // This size is variable
-	`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
-Allocation is manual (C standard does not support variable-size structures).
-*/
-size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
-{
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge);
-	return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
-}
-
-/* provides the minimum logSize to safely represent a distribution */
-static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
-{
-	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
-	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
-	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
-	return minBits;
-}
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
-{
-	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
-	U32 tableLog = maxTableLog;
-	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
-	if (tableLog == 0)
-		tableLog = FSE_DEFAULT_TABLELOG;
-	if (maxBitsSrc < tableLog)
-		tableLog = maxBitsSrc; /* Accuracy can be reduced */
-	if (minBits > tableLog)
-		tableLog = minBits; /* Need a minimum to safely represent all symbol values */
-	if (tableLog < FSE_MIN_TABLELOG)
-		tableLog = FSE_MIN_TABLELOG;
-	if (tableLog > FSE_MAX_TABLELOG)
-		tableLog = FSE_MAX_TABLELOG;
-	return tableLog;
-}
-
-unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
-}
-
-/* Secondary normalization method.
-   To be used when primary method fails. */
-
-static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
-{
-	short const NOT_YET_ASSIGNED = -2;
-	U32 s;
-	U32 distributed = 0;
-	U32 ToDistribute;
-
-	/* Init */
-	U32 const lowThreshold = (U32)(total >> tableLog);
-	U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
-
-	for (s = 0; s <= maxSymbolValue; s++) {
-		if (count[s] == 0) {
-			norm[s] = 0;
-			continue;
-		}
-		if (count[s] <= lowThreshold) {
-			norm[s] = -1;
-			distributed++;
-			total -= count[s];
-			continue;
-		}
-		if (count[s] <= lowOne) {
-			norm[s] = 1;
-			distributed++;
-			total -= count[s];
-			continue;
-		}
-
-		norm[s] = NOT_YET_ASSIGNED;
-	}
-	ToDistribute = (1 << tableLog) - distributed;
-
-	if ((total / ToDistribute) > lowOne) {
-		/* risk of rounding to zero */
-		lowOne = (U32)((total * 3) / (ToDistribute * 2));
-		for (s = 0; s <= maxSymbolValue; s++) {
-			if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
-				norm[s] = 1;
-				distributed++;
-				total -= count[s];
-				continue;
-			}
-		}
-		ToDistribute = (1 << tableLog) - distributed;
-	}
-
-	if (distributed == maxSymbolValue + 1) {
-		/* all values are pretty poor;
-		   probably incompressible data (should have already been detected);
-		   find max, then give all remaining points to max */
-		U32 maxV = 0, maxC = 0;
-		for (s = 0; s <= maxSymbolValue; s++)
-			if (count[s] > maxC)
-				maxV = s, maxC = count[s];
-		norm[maxV] += (short)ToDistribute;
-		return 0;
-	}
-
-	if (total == 0) {
-		/* all of the symbols were low enough for the lowOne or lowThreshold */
-		for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
-			if (norm[s] > 0)
-				ToDistribute--, norm[s]++;
-		return 0;
-	}
-
-	{
-		U64 const vStepLog = 62 - tableLog;
-		U64 const mid = (1ULL << (vStepLog - 1)) - 1;
-		U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
-		U64 tmpTotal = mid;
-		for (s = 0; s <= maxSymbolValue; s++) {
-			if (norm[s] == NOT_YET_ASSIGNED) {
-				U64 const end = tmpTotal + (count[s] * rStep);
-				U32 const sStart = (U32)(tmpTotal >> vStepLog);
-				U32 const sEnd = (U32)(end >> vStepLog);
-				U32 const weight = sEnd - sStart;
-				if (weight < 1)
-					return ERROR(GENERIC);
-				norm[s] = (short)weight;
-				tmpTotal = end;
-			}
-		}
-	}
-
-	return 0;
-}
-
-size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
-{
-	/* Sanity checks */
-	if (tableLog == 0)
-		tableLog = FSE_DEFAULT_TABLELOG;
-	if (tableLog < FSE_MIN_TABLELOG)
-		return ERROR(GENERIC); /* Unsupported size */
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge); /* Unsupported size */
-	if (tableLog < FSE_minTableLog(total, maxSymbolValue))
-		return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
-
-	{
-		U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
-		U64 const scale = 62 - tableLog;
-		U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
-		U64 const vStep = 1ULL << (scale - 20);
-		int stillToDistribute = 1 << tableLog;
-		unsigned s;
-		unsigned largest = 0;
-		short largestP = 0;
-		U32 lowThreshold = (U32)(total >> tableLog);
-
-		for (s = 0; s <= maxSymbolValue; s++) {
-			if (count[s] == total)
-				return 0; /* rle special case */
-			if (count[s] == 0) {
-				normalizedCounter[s] = 0;
-				continue;
-			}
-			if (count[s] <= lowThreshold) {
-				normalizedCounter[s] = -1;
-				stillToDistribute--;
-			} else {
-				short proba = (short)((count[s] * step) >> scale);
-				if (proba < 8) {
-					U64 restToBeat = vStep * rtbTable[proba];
-					proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
-				}
-				if (proba > largestP)
-					largestP = proba, largest = s;
-				normalizedCounter[s] = proba;
-				stillToDistribute -= proba;
-			}
-		}
-		if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
-			/* corner case, need another normalization method */
-			size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
-			if (FSE_isError(errorCode))
-				return errorCode;
-		} else
-			normalizedCounter[largest] += (short)stillToDistribute;
-	}
-
-	return tableLog;
-}
-
-/* fake FSE_CTable, for raw (uncompressed) input */
-size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
-{
-	const unsigned tableSize = 1 << nbBits;
-	const unsigned tableMask = tableSize - 1;
-	const unsigned maxSymbolValue = tableMask;
-	void *const ptr = ct;
-	U16 *const tableU16 = ((U16 *)ptr) + 2;
-	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
-	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
-	unsigned s;
-
-	/* Sanity checks */
-	if (nbBits < 1)
-		return ERROR(GENERIC); /* min size */
-
-	/* header */
-	tableU16[-2] = (U16)nbBits;
-	tableU16[-1] = (U16)maxSymbolValue;
-
-	/* Build table */
-	for (s = 0; s < tableSize; s++)
-		tableU16[s] = (U16)(tableSize + s);
-
-	/* Build Symbol Transformation Table */
-	{
-		const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
-		for (s = 0; s <= maxSymbolValue; s++) {
-			symbolTT[s].deltaNbBits = deltaNbBits;
-			symbolTT[s].deltaFindState = s - 1;
-		}
-	}
-
-	return 0;
-}
-
-/* fake FSE_CTable, for rle input (always same symbol) */
-size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
-{
-	void *ptr = ct;
-	U16 *tableU16 = ((U16 *)ptr) + 2;
-	void *FSCTptr = (U32 *)ptr + 2;
-	FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
-
-	/* header */
-	tableU16[-2] = (U16)0;
-	tableU16[-1] = (U16)symbolValue;
-
-	/* Build table */
-	tableU16[0] = 0;
-	tableU16[1] = 0; /* just in case */
-
-	/* Build Symbol Transformation Table */
-	symbolTT[symbolValue].deltaNbBits = 0;
-	symbolTT[symbolValue].deltaFindState = 0;
-
-	return 0;
-}
-
-static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
-{
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *ip = iend;
-
-	BIT_CStream_t bitC;
-	FSE_CState_t CState1, CState2;
-
-	/* init */
-	if (srcSize <= 2)
-		return 0;
-	{
-		size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
-		if (FSE_isError(initError))
-			return 0; /* not enough space available to write a bitstream */
-	}
-
-#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
-
-	if (srcSize & 1) {
-		FSE_initCState2(&CState1, ct, *--ip);
-		FSE_initCState2(&CState2, ct, *--ip);
-		FSE_encodeSymbol(&bitC, &CState1, *--ip);
-		FSE_FLUSHBITS(&bitC);
-	} else {
-		FSE_initCState2(&CState2, ct, *--ip);
-		FSE_initCState2(&CState1, ct, *--ip);
-	}
-
-	/* join to mod 4 */
-	srcSize -= 2;
-	if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
-		FSE_encodeSymbol(&bitC, &CState2, *--ip);
-		FSE_encodeSymbol(&bitC, &CState1, *--ip);
-		FSE_FLUSHBITS(&bitC);
-	}
-
-	/* 2 or 4 encoding per loop */
-	while (ip > istart) {
-
-		FSE_encodeSymbol(&bitC, &CState2, *--ip);
-
-		if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
-			FSE_FLUSHBITS(&bitC);
-
-		FSE_encodeSymbol(&bitC, &CState1, *--ip);
-
-		if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
-			FSE_encodeSymbol(&bitC, &CState2, *--ip);
-			FSE_encodeSymbol(&bitC, &CState1, *--ip);
-		}
-
-		FSE_FLUSHBITS(&bitC);
-	}
-
-	FSE_flushCState(&bitC, &CState2);
-	FSE_flushCState(&bitC, &CState1);
-	return BIT_closeCStream(&bitC);
-}
-
-size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
-{
-	unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
-
-	if (fast)
-		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
-	else
-		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
-}
-
-size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c
deleted file mode 100644
index 0b35353..0000000
--- a/lib/zstd/fse_decompress.c
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * FSE : Finite State Entropy decoder
- * Copyright (C) 2013-2015, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h"
-#include "zstd_internal.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_isError ERR_isError
-#define FSE_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X, Y) X##Y
-#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
-#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
-
-/* Function templates */
-
-size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
-{
-	void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
-	FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr);
-	U16 *symbolNext = (U16 *)workspace;
-
-	U32 const maxSV1 = maxSymbolValue + 1;
-	U32 const tableSize = 1 << tableLog;
-	U32 highThreshold = tableSize - 1;
-
-	/* Sanity Checks */
-	if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1))
-		return ERROR(tableLog_tooLarge);
-	if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE)
-		return ERROR(maxSymbolValue_tooLarge);
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge);
-
-	/* Init, lay down lowprob symbols */
-	{
-		FSE_DTableHeader DTableH;
-		DTableH.tableLog = (U16)tableLog;
-		DTableH.fastMode = 1;
-		{
-			S16 const largeLimit = (S16)(1 << (tableLog - 1));
-			U32 s;
-			for (s = 0; s < maxSV1; s++) {
-				if (normalizedCounter[s] == -1) {
-					tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
-					symbolNext[s] = 1;
-				} else {
-					if (normalizedCounter[s] >= largeLimit)
-						DTableH.fastMode = 0;
-					symbolNext[s] = normalizedCounter[s];
-				}
-			}
-		}
-		memcpy(dt, &DTableH, sizeof(DTableH));
-	}
-
-	/* Spread symbols */
-	{
-		U32 const tableMask = tableSize - 1;
-		U32 const step = FSE_TABLESTEP(tableSize);
-		U32 s, position = 0;
-		for (s = 0; s < maxSV1; s++) {
-			int i;
-			for (i = 0; i < normalizedCounter[s]; i++) {
-				tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
-				position = (position + step) & tableMask;
-				while (position > highThreshold)
-					position = (position + step) & tableMask; /* lowprob area */
-			}
-		}
-		if (position != 0)
-			return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
-	}
-
-	/* Build Decoding table */
-	{
-		U32 u;
-		for (u = 0; u < tableSize; u++) {
-			FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-			U16 nextState = symbolNext[symbol]++;
-			tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState));
-			tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize);
-		}
-	}
-
-	return 0;
-}
-
-/*-*******************************************************
-*  Decompression (Byte symbols)
-*********************************************************/
-size_t FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue)
-{
-	void *ptr = dt;
-	FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
-	void *dPtr = dt + 1;
-	FSE_decode_t *const cell = (FSE_decode_t *)dPtr;
-
-	DTableH->tableLog = 0;
-	DTableH->fastMode = 0;
-
-	cell->newState = 0;
-	cell->symbol = symbolValue;
-	cell->nbBits = 0;
-
-	return 0;
-}
-
-size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits)
-{
-	void *ptr = dt;
-	FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
-	void *dPtr = dt + 1;
-	FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr;
-	const unsigned tableSize = 1 << nbBits;
-	const unsigned tableMask = tableSize - 1;
-	const unsigned maxSV1 = tableMask + 1;
-	unsigned s;
-
-	/* Sanity checks */
-	if (nbBits < 1)
-		return ERROR(GENERIC); /* min size */
-
-	/* Build Decoding Table */
-	DTableH->tableLog = (U16)nbBits;
-	DTableH->fastMode = 1;
-	for (s = 0; s < maxSV1; s++) {
-		dinfo[s].newState = 0;
-		dinfo[s].symbol = (BYTE)s;
-		dinfo[s].nbBits = (BYTE)nbBits;
-	}
-
-	return 0;
-}
-
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt,
-						       const unsigned fast)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	BYTE *const omax = op + maxDstSize;
-	BYTE *const olimit = omax - 3;
-
-	BIT_DStream_t bitD;
-	FSE_DState_t state1;
-	FSE_DState_t state2;
-
-	/* Init */
-	CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
-
-	FSE_initDState(&state1, &bitD, dt);
-	FSE_initDState(&state2, &bitD, dt);
-
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
-
-	/* 4 symbols per loop */
-	for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) {
-		op[0] = FSE_GETSYMBOL(&state1);
-
-		if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-			BIT_reloadDStream(&bitD);
-
-		op[1] = FSE_GETSYMBOL(&state2);
-
-		if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-		{
-			if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) {
-				op += 2;
-				break;
-			}
-		}
-
-		op[2] = FSE_GETSYMBOL(&state1);
-
-		if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-			BIT_reloadDStream(&bitD);
-
-		op[3] = FSE_GETSYMBOL(&state2);
-	}
-
-	/* tail */
-	/* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
-	while (1) {
-		if (op > (omax - 2))
-			return ERROR(dstSize_tooSmall);
-		*op++ = FSE_GETSYMBOL(&state1);
-		if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
-			*op++ = FSE_GETSYMBOL(&state2);
-			break;
-		}
-
-		if (op > (omax - 2))
-			return ERROR(dstSize_tooSmall);
-		*op++ = FSE_GETSYMBOL(&state2);
-		if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
-			*op++ = FSE_GETSYMBOL(&state1);
-			break;
-		}
-	}
-
-	return op - ostart;
-}
-
-size_t FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt)
-{
-	const void *ptr = dt;
-	const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr;
-	const U32 fastMode = DTableH->fastMode;
-
-	/* select fast mode (static) */
-	if (fastMode)
-		return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
-	return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
-}
-
-size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize)
-{
-	const BYTE *const istart = (const BYTE *)cSrc;
-	const BYTE *ip = istart;
-	unsigned tableLog;
-	unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-	size_t NCountLength;
-
-	FSE_DTable *dt;
-	short *counting;
-	size_t spaceUsed32 = 0;
-
-	FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32));
-
-	dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog);
-	counting = (short *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* normal FSE decoding mode */
-	NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
-	if (FSE_isError(NCountLength))
-		return NCountLength;
-	// if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining
-	// case : NCountLength==cSrcSize */
-	if (tableLog > maxLog)
-		return ERROR(tableLog_tooLarge);
-	ip += NCountLength;
-	cSrcSize -= NCountLength;
-
-	CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize));
-
-	return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */
-}
diff --git a/lib/zstd/huf.h b/lib/zstd/huf.h
deleted file mode 100644
index 923218d..0000000
--- a/lib/zstd/huf.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Huffman coder, part of New Generation Entropy library
- * header file
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef HUF_H_298734234
-#define HUF_H_298734234
-
-/* *** Dependencies *** */
-#include <linux/types.h> /* size_t */
-
-/* ***   Tool functions *** */
-#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
-size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
-
-/* Error Management */
-unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
-
-/* ***   Advanced function   *** */
-
-/** HUF_compress4X_wksp() :
-*   Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
-size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			   size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-/* *** Dependencies *** */
-#include "mem.h" /* U32 */
-
-/* *** Constants *** */
-#define HUF_TABLELOG_MAX 12     /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
-#define HUF_SYMBOLVALUE_MAX 255
-
-#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
-#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
-#error "HUF_TABLELOG_MAX is too large !"
-#endif
-
-/* ****************************************
-*  Static allocation
-******************************************/
-/* HUF buffer bounds */
-#define HUF_CTABLEBOUND 129
-#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8)			 /* only true if incompressible pre-filtered with fast heuristic */
-#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
-
-/* static allocation of HUF's Compression Table */
-#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-	U32 name##hb[maxSymbolValue + 1];              \
-	void *name##hv = &(name##hb);                  \
-	HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */
-
-/* static allocation of HUF's DTable */
-typedef U32 HUF_DTable;
-#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog)))
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)}
-#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)}
-
-/* The workspace must have alignment at least 4 and be at least this large */
-#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10)
-#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32))
-
-/* The workspace must have alignment at least 4 and be at least this large */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10)
-#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
-
-/* ****************************************
-*  Advanced decompression functions
-******************************************/
-size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				size_t workspaceSize);							       /**< considers RLE and uncompressed as errors */
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< single-symbol decoder */
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< double-symbols decoder */
-
-/* ****************************************
-*  HUF detailed API
-******************************************/
-/*!
-HUF_compress() does the following:
-1. count symbol occurrence from source[] into table count[] using FSE_count()
-2. (optional) refine tableLog using HUF_optimalTableLog()
-3. build Huffman table from count using HUF_buildCTable()
-4. save Huffman table to memory buffer using HUF_writeCTable_wksp()
-5. encode the data stream using HUF_compress4X_usingCTable()
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and regenerate 'CTable' using external methods.
-*/
-/* FSE_count() : find it within "fse.h" */
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
-size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize);
-size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
-
-typedef enum {
-	HUF_repeat_none,  /**< Cannot use the previous table */
-	HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1,
-			     4}X_repeat */
-	HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
-} HUF_repeat;
-/** HUF_compress4X_repeat() :
-*   Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-*   If it uses hufTable it does not modify hufTable or repeat.
-*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-*   If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
-			     int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
- */
-size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize);
-
-/*! HUF_readStats() :
-	Read compact Huffman tree, saved by HUF_writeCTable().
-	`huffWeight` is destination buffer.
-	@return : size read from `src` , or an error Code .
-	Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
-size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize,
-			  void *workspace, size_t workspaceSize);
-
-/** HUF_readCTable() :
-*   Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-
-/*
-HUF_decompress() does the following:
-1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
-2. build Huffman table from save, using HUF_readDTableXn()
-3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
-*/
-
-/** HUF_selectDecoder() :
-*   Tells which decoder is likely to decode faster,
-*   based on a set of pre-determined metrics.
-*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
-U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize);
-
-size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-
-size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-
-/* single stream variants */
-
-size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			   size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
-/** HUF_compress1X_repeat() :
-*   Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-*   If it uses hufTable it does not modify hufTable or repeat.
-*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-*   If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
-			     int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize);
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< single-symbol decoder */
-size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< double-symbols decoder */
-
-size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize,
-				    const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
-size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-
-#endif /* HUF_H_298734234 */
diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c
deleted file mode 100644
index fd32838..0000000
--- a/lib/zstd/huf_compress.c
+++ /dev/null
@@ -1,773 +0,0 @@
-/*
- * Huffman encoder, part of New Generation Entropy library
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h" /* header compression */
-#include "huf.h"
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-#define CHECK_V_F(e, f)     \
-	size_t const e = f; \
-	if (ERR_isError(e)) \
-	return f
-#define CHECK_F(f)                        \
-	{                                 \
-		CHECK_V_F(_var_err__, f); \
-	}
-
-/* **************************************************************
-*  Utils
-****************************************************************/
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
-}
-
-/* *******************************************************
-*  HUF : Huffman block compression
-*********************************************************/
-/* HUF_compressWeights() :
- * Same as FSE_compress(), but dedicated to huff0's weights compression.
- * The use case needs much less stack memory.
- * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
- */
-#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
-size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	BYTE *const oend = ostart + dstSize;
-
-	U32 maxSymbolValue = HUF_TABLELOG_MAX;
-	U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
-
-	FSE_CTable *CTable;
-	U32 *count;
-	S16 *norm;
-	size_t spaceUsed32 = 0;
-
-	HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32));
-
-	CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX);
-	count = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_MAX + 1;
-	norm = (S16 *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* init conditions */
-	if (wtSize <= 1)
-		return 0; /* Not compressible */
-
-	/* Scan input and build symbol stats */
-	{
-		CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize));
-		if (maxCount == wtSize)
-			return 1; /* only a single symbol in src : rle */
-		if (maxCount == 1)
-			return 0; /* each symbol present maximum once => not compressible */
-	}
-
-	tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-	CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue));
-
-	/* Write table description header */
-	{
-		CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog));
-		op += hSize;
-	}
-
-	/* Compress */
-	CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize));
-	{
-		CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable));
-		if (cSize == 0)
-			return 0; /* not enough space for compressed data */
-		op += cSize;
-	}
-
-	return op - ostart;
-}
-
-struct HUF_CElt_s {
-	U16 val;
-	BYTE nbBits;
-}; /* typedef'd to HUF_CElt within "huf.h" */
-
-/*! HUF_writeCTable_wksp() :
-	`CTable` : Huffman tree to save, using huf representation.
-	@return : size of saved CTable */
-size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize)
-{
-	BYTE *op = (BYTE *)dst;
-	U32 n;
-
-	BYTE *bitsToWeight;
-	BYTE *huffWeight;
-	size_t spaceUsed32 = 0;
-
-	bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2;
-	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* check conditions */
-	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
-		return ERROR(maxSymbolValue_tooLarge);
-
-	/* convert to weight */
-	bitsToWeight[0] = 0;
-	for (n = 1; n < huffLog + 1; n++)
-		bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
-	for (n = 0; n < maxSymbolValue; n++)
-		huffWeight[n] = bitsToWeight[CTable[n].nbBits];
-
-	/* attempt weights compression by FSE */
-	{
-		CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize));
-		if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */
-			op[0] = (BYTE)hSize;
-			return hSize + 1;
-		}
-	}
-
-	/* write raw values as 4-bits (max : 15) */
-	if (maxSymbolValue > (256 - 128))
-		return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
-	if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize)
-		return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
-	op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1));
-	huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
-	for (n = 0; n < maxSymbolValue; n += 2)
-		op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]);
-	return ((maxSymbolValue + 1) / 2) + 1;
-}
-
-size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 *rankVal;
-	BYTE *huffWeight;
-	U32 tableLog = 0;
-	U32 nbSymbols = 0;
-	size_t readSize;
-	size_t spaceUsed32 = 0;
-
-	rankVal = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* get symbol weights */
-	readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-	if (ERR_isError(readSize))
-		return readSize;
-
-	/* check result */
-	if (tableLog > HUF_TABLELOG_MAX)
-		return ERROR(tableLog_tooLarge);
-	if (nbSymbols > maxSymbolValue + 1)
-		return ERROR(maxSymbolValue_tooSmall);
-
-	/* Prepare base value per rank */
-	{
-		U32 n, nextRankStart = 0;
-		for (n = 1; n <= tableLog; n++) {
-			U32 curr = nextRankStart;
-			nextRankStart += (rankVal[n] << (n - 1));
-			rankVal[n] = curr;
-		}
-	}
-
-	/* fill nbBits */
-	{
-		U32 n;
-		for (n = 0; n < nbSymbols; n++) {
-			const U32 w = huffWeight[n];
-			CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
-		}
-	}
-
-	/* fill val */
-	{
-		U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */
-		U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0};
-		{
-			U32 n;
-			for (n = 0; n < nbSymbols; n++)
-				nbPerRank[CTable[n].nbBits]++;
-		}
-		/* determine stating value per rank */
-		valPerRank[tableLog + 1] = 0; /* for w==0 */
-		{
-			U16 min = 0;
-			U32 n;
-			for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */
-				valPerRank[n] = min;     /* get starting value within each rank */
-				min += nbPerRank[n];
-				min >>= 1;
-			}
-		}
-		/* assign value within rank, symbol order */
-		{
-			U32 n;
-			for (n = 0; n <= maxSymbolValue; n++)
-				CTable[n].val = valPerRank[CTable[n].nbBits]++;
-		}
-	}
-
-	return readSize;
-}
-
-typedef struct nodeElt_s {
-	U32 count;
-	U16 parent;
-	BYTE byte;
-	BYTE nbBits;
-} nodeElt;
-
-static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits)
-{
-	const U32 largestBits = huffNode[lastNonNull].nbBits;
-	if (largestBits <= maxNbBits)
-		return largestBits; /* early exit : no elt > maxNbBits */
-
-	/* there are several too large elements (at least >= 2) */
-	{
-		int totalCost = 0;
-		const U32 baseCost = 1 << (largestBits - maxNbBits);
-		U32 n = lastNonNull;
-
-		while (huffNode[n].nbBits > maxNbBits) {
-			totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
-			huffNode[n].nbBits = (BYTE)maxNbBits;
-			n--;
-		} /* n stops at huffNode[n].nbBits <= maxNbBits */
-		while (huffNode[n].nbBits == maxNbBits)
-			n--; /* n end at index of smallest symbol using < maxNbBits */
-
-		/* renorm totalCost */
-		totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
-
-		/* repay normalized cost */
-		{
-			U32 const noSymbol = 0xF0F0F0F0;
-			U32 rankLast[HUF_TABLELOG_MAX + 2];
-			int pos;
-
-			/* Get pos of last (smallest) symbol per rank */
-			memset(rankLast, 0xF0, sizeof(rankLast));
-			{
-				U32 currNbBits = maxNbBits;
-				for (pos = n; pos >= 0; pos--) {
-					if (huffNode[pos].nbBits >= currNbBits)
-						continue;
-					currNbBits = huffNode[pos].nbBits; /* < maxNbBits */
-					rankLast[maxNbBits - currNbBits] = pos;
-				}
-			}
-
-			while (totalCost > 0) {
-				U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
-				for (; nBitsToDecrease > 1; nBitsToDecrease--) {
-					U32 highPos = rankLast[nBitsToDecrease];
-					U32 lowPos = rankLast[nBitsToDecrease - 1];
-					if (highPos == noSymbol)
-						continue;
-					if (lowPos == noSymbol)
-						break;
-					{
-						U32 const highTotal = huffNode[highPos].count;
-						U32 const lowTotal = 2 * huffNode[lowPos].count;
-						if (highTotal <= lowTotal)
-							break;
-					}
-				}
-				/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
-				/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
-				while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
-					nBitsToDecrease++;
-				totalCost -= 1 << (nBitsToDecrease - 1);
-				if (rankLast[nBitsToDecrease - 1] == noSymbol)
-					rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
-				huffNode[rankLast[nBitsToDecrease]].nbBits++;
-				if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
-					rankLast[nBitsToDecrease] = noSymbol;
-				else {
-					rankLast[nBitsToDecrease]--;
-					if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease)
-						rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
-				}
-			} /* while (totalCost > 0) */
-
-			while (totalCost < 0) {		       /* Sometimes, cost correction overshoot */
-				if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0
-								  (using maxNbBits) */
-					while (huffNode[n].nbBits == maxNbBits)
-						n--;
-					huffNode[n + 1].nbBits--;
-					rankLast[1] = n + 1;
-					totalCost++;
-					continue;
-				}
-				huffNode[rankLast[1] + 1].nbBits--;
-				rankLast[1]++;
-				totalCost++;
-			}
-		}
-	} /* there are several too large elements (at least >= 2) */
-
-	return maxNbBits;
-}
-
-typedef struct {
-	U32 base;
-	U32 curr;
-} rankPos;
-
-static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue)
-{
-	rankPos rank[32];
-	U32 n;
-
-	memset(rank, 0, sizeof(rank));
-	for (n = 0; n <= maxSymbolValue; n++) {
-		U32 r = BIT_highbit32(count[n] + 1);
-		rank[r].base++;
-	}
-	for (n = 30; n > 0; n--)
-		rank[n - 1].base += rank[n].base;
-	for (n = 0; n < 32; n++)
-		rank[n].curr = rank[n].base;
-	for (n = 0; n <= maxSymbolValue; n++) {
-		U32 const c = count[n];
-		U32 const r = BIT_highbit32(c + 1) + 1;
-		U32 pos = rank[r].curr++;
-		while ((pos > rank[r].base) && (c > huffNode[pos - 1].count))
-			huffNode[pos] = huffNode[pos - 1], pos--;
-		huffNode[pos].count = c;
-		huffNode[pos].byte = (BYTE)n;
-	}
-}
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
- */
-#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1)
-typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1];
-size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
-{
-	nodeElt *const huffNode0 = (nodeElt *)workSpace;
-	nodeElt *const huffNode = huffNode0 + 1;
-	U32 n, nonNullRank;
-	int lowS, lowN;
-	U16 nodeNb = STARTNODE;
-	U32 nodeRoot;
-
-	/* safety checks */
-	if (wkspSize < sizeof(huffNodeTable))
-		return ERROR(GENERIC); /* workSpace is not large enough */
-	if (maxNbBits == 0)
-		maxNbBits = HUF_TABLELOG_DEFAULT;
-	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
-		return ERROR(GENERIC);
-	memset(huffNode0, 0, sizeof(huffNodeTable));
-
-	/* sort, decreasing order */
-	HUF_sort(huffNode, count, maxSymbolValue);
-
-	/* init for parents */
-	nonNullRank = maxSymbolValue;
-	while (huffNode[nonNullRank].count == 0)
-		nonNullRank--;
-	lowS = nonNullRank;
-	nodeRoot = nodeNb + lowS - 1;
-	lowN = nodeNb;
-	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count;
-	huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb;
-	nodeNb++;
-	lowS -= 2;
-	for (n = nodeNb; n <= nodeRoot; n++)
-		huffNode[n].count = (U32)(1U << 30);
-	huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */
-
-	/* create parents */
-	while (nodeNb <= nodeRoot) {
-		U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-		U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-		huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
-		huffNode[n1].parent = huffNode[n2].parent = nodeNb;
-		nodeNb++;
-	}
-
-	/* distribute weights (unlimited tree height) */
-	huffNode[nodeRoot].nbBits = 0;
-	for (n = nodeRoot - 1; n >= STARTNODE; n--)
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
-	for (n = 0; n <= nonNullRank; n++)
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
-
-	/* enforce maxTableLog */
-	maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
-
-	/* fill result into tree (val, nbBits) */
-	{
-		U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0};
-		U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0};
-		if (maxNbBits > HUF_TABLELOG_MAX)
-			return ERROR(GENERIC); /* check fit into table */
-		for (n = 0; n <= nonNullRank; n++)
-			nbPerRank[huffNode[n].nbBits]++;
-		/* determine stating value per rank */
-		{
-			U16 min = 0;
-			for (n = maxNbBits; n > 0; n--) {
-				valPerRank[n] = min; /* get starting value within each rank */
-				min += nbPerRank[n];
-				min >>= 1;
-			}
-		}
-		for (n = 0; n <= maxSymbolValue; n++)
-			tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
-		for (n = 0; n <= maxSymbolValue; n++)
-			tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
-	}
-
-	return maxNbBits;
-}
-
-static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
-{
-	size_t nbBits = 0;
-	int s;
-	for (s = 0; s <= (int)maxSymbolValue; ++s) {
-		nbBits += CTable[s].nbBits * count[s];
-	}
-	return nbBits >> 3;
-}
-
-static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
-{
-	int bad = 0;
-	int s;
-	for (s = 0; s <= (int)maxSymbolValue; ++s) {
-		bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
-	}
-	return !bad;
-}
-
-static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable)
-{
-	BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
-}
-
-size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-
-#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
-
-#define HUF_FLUSHBITS_1(stream)                                            \
-	if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
-	HUF_FLUSHBITS(stream)
-
-#define HUF_FLUSHBITS_2(stream)                                            \
-	if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
-	HUF_FLUSHBITS(stream)
-
-size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
-{
-	const BYTE *ip = (const BYTE *)src;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstSize;
-	BYTE *op = ostart;
-	size_t n;
-	BIT_CStream_t bitC;
-
-	/* init */
-	if (dstSize < 8)
-		return 0; /* not enough space to compress */
-	{
-		size_t const initErr = BIT_initCStream(&bitC, op, oend - op);
-		if (HUF_isError(initErr))
-			return 0;
-	}
-
-	n = srcSize & ~3; /* join to mod 4 */
-	switch (srcSize & 3) {
-	case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
-		fallthrough;
-	case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
-		fallthrough;
-	case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
-		fallthrough;
-	case 0:
-	default:;
-	}
-
-	for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */
-		HUF_encodeSymbol(&bitC, ip[n - 1], CTable);
-		HUF_FLUSHBITS_1(&bitC);
-		HUF_encodeSymbol(&bitC, ip[n - 2], CTable);
-		HUF_FLUSHBITS_2(&bitC);
-		HUF_encodeSymbol(&bitC, ip[n - 3], CTable);
-		HUF_FLUSHBITS_1(&bitC);
-		HUF_encodeSymbol(&bitC, ip[n - 4], CTable);
-		HUF_FLUSHBITS(&bitC);
-	}
-
-	return BIT_closeCStream(&bitC);
-}
-
-size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
-{
-	size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */
-	const BYTE *ip = (const BYTE *)src;
-	const BYTE *const iend = ip + srcSize;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstSize;
-	BYTE *op = ostart;
-
-	if (dstSize < 6 + 1 + 1 + 1 + 8)
-		return 0; /* minimum space to compress successfully */
-	if (srcSize < 12)
-		return 0; /* no saving possible : too small input */
-	op += 6;	  /* jumpTable */
-
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-		if (cSize == 0)
-			return 0;
-		ZSTD_writeLE16(ostart, (U16)cSize);
-		op += cSize;
-	}
-
-	ip += segmentSize;
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-		if (cSize == 0)
-			return 0;
-		ZSTD_writeLE16(ostart + 2, (U16)cSize);
-		op += cSize;
-	}
-
-	ip += segmentSize;
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-		if (cSize == 0)
-			return 0;
-		ZSTD_writeLE16(ostart + 4, (U16)cSize);
-		op += cSize;
-	}
-
-	ip += segmentSize;
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable));
-		if (cSize == 0)
-			return 0;
-		op += cSize;
-	}
-
-	return op - ostart;
-}
-
-static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream,
-					  const HUF_CElt *CTable)
-{
-	size_t const cSize =
-	    singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
-	if (HUF_isError(cSize)) {
-		return cSize;
-	}
-	if (cSize == 0) {
-		return 0;
-	} /* uncompressible */
-	op += cSize;
-	/* check compressibility */
-	if ((size_t)(op - ostart) >= srcSize - 1) {
-		return 0;
-	}
-	return op - ostart;
-}
-
-/* `workSpace` must a table of at least 1024 unsigned */
-static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog,
-				    unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstSize;
-	BYTE *op = ostart;
-
-	U32 *count;
-	size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
-	HUF_CElt *CTable;
-	size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
-
-	/* checks & inits */
-	if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize)
-		return ERROR(GENERIC);
-	if (!srcSize)
-		return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
-	if (!dstSize)
-		return 0; /* cannot fit within dst budget */
-	if (srcSize > HUF_BLOCKSIZE_MAX)
-		return ERROR(srcSize_wrong); /* curr block size limit */
-	if (huffLog > HUF_TABLELOG_MAX)
-		return ERROR(tableLog_tooLarge);
-	if (!maxSymbolValue)
-		maxSymbolValue = HUF_SYMBOLVALUE_MAX;
-	if (!huffLog)
-		huffLog = HUF_TABLELOG_DEFAULT;
-
-	count = (U32 *)workSpace;
-	workSpace = (BYTE *)workSpace + countSize;
-	wkspSize -= countSize;
-	CTable = (HUF_CElt *)workSpace;
-	workSpace = (BYTE *)workSpace + CTableSize;
-	wkspSize -= CTableSize;
-
-	/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
-	if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
-		return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-	}
-
-	/* Scan input and build symbol stats */
-	{
-		CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace));
-		if (largest == srcSize) {
-			*ostart = ((const BYTE *)src)[0];
-			return 1;
-		} /* single symbol, rle */
-		if (largest <= (srcSize >> 7) + 1)
-			return 0; /* Fast heuristic : not compressible enough */
-	}
-
-	/* Check validity of previous table */
-	if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
-		*repeat = HUF_repeat_none;
-	}
-	/* Heuristic : use existing table for small inputs */
-	if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
-		return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-	}
-
-	/* Build Huffman Tree */
-	huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-	{
-		CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize));
-		huffLog = (U32)maxBits;
-		/* Zero the unused symbols so we can check it for validity */
-		memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
-	}
-
-	/* Write table description header */
-	{
-		CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize));
-		/* Check if using the previous table will be beneficial */
-		if (repeat && *repeat != HUF_repeat_none) {
-			size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
-			size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
-			if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
-				return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-			}
-		}
-		/* Use the new table */
-		if (hSize + 12ul >= srcSize) {
-			return 0;
-		}
-		op += hSize;
-		if (repeat) {
-			*repeat = HUF_repeat_none;
-		}
-		if (oldHufTable) {
-			memcpy(oldHufTable, CTable, CTableSize);
-		} /* Save the new table */
-	}
-	return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
-}
-
-size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			   size_t wkspSize)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
-}
-
-size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat,
-				     preferRepeat);
-}
-
-size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			   size_t wkspSize)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
-}
-
-size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat,
-				     preferRepeat);
-}
diff --git a/lib/zstd/huf_decompress.c b/lib/zstd/huf_decompress.c
deleted file mode 100644
index 6526482..0000000
--- a/lib/zstd/huf_decompress.c
+++ /dev/null
@@ -1,960 +0,0 @@
-/*
- * Huffman decoder, part of New Generation Entropy library
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Dependencies
-****************************************************************/
-#include "bitstream.h" /* BIT_* */
-#include "fse.h"       /* header compression */
-#include "huf.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-
-/*-***************************/
-/*  generic DTableDesc       */
-/*-***************************/
-
-typedef struct {
-	BYTE maxTableLog;
-	BYTE tableType;
-	BYTE tableLog;
-	BYTE reserved;
-} DTableDesc;
-
-static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
-{
-	DTableDesc dtd;
-	memcpy(&dtd, table, sizeof(dtd));
-	return dtd;
-}
-
-/*-***************************/
-/*  single-symbol decoding   */
-/*-***************************/
-
-typedef struct {
-	BYTE byte;
-	BYTE nbBits;
-} HUF_DEltX2; /* single-symbol decoding */
-
-size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 tableLog = 0;
-	U32 nbSymbols = 0;
-	size_t iSize;
-	void *const dtPtr = DTable + 1;
-	HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
-
-	U32 *rankVal;
-	BYTE *huffWeight;
-	size_t spaceUsed32 = 0;
-
-	rankVal = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
-	/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
-
-	iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-	if (HUF_isError(iSize))
-		return iSize;
-
-	/* Table header */
-	{
-		DTableDesc dtd = HUF_getDTableDesc(DTable);
-		if (tableLog > (U32)(dtd.maxTableLog + 1))
-			return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
-		dtd.tableType = 0;
-		dtd.tableLog = (BYTE)tableLog;
-		memcpy(DTable, &dtd, sizeof(dtd));
-	}
-
-	/* Calculate starting value for each rank */
-	{
-		U32 n, nextRankStart = 0;
-		for (n = 1; n < tableLog + 1; n++) {
-			U32 const curr = nextRankStart;
-			nextRankStart += (rankVal[n] << (n - 1));
-			rankVal[n] = curr;
-		}
-	}
-
-	/* fill DTable */
-	{
-		U32 n;
-		for (n = 0; n < nbSymbols; n++) {
-			U32 const w = huffWeight[n];
-			U32 const length = (1 << w) >> 1;
-			U32 u;
-			HUF_DEltX2 D;
-			D.byte = (BYTE)n;
-			D.nbBits = (BYTE)(tableLog + 1 - w);
-			for (u = rankVal[w]; u < rankVal[w] + length; u++)
-				dt[u] = D;
-			rankVal[w] += length;
-		}
-	}
-
-	return iSize;
-}
-
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
-{
-	size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
-	BYTE const c = dt[val].byte;
-	BIT_skipBits(Dstream, dt[val].nbBits);
-	return c;
-}
-
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)         \
-	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
-	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
-	if (ZSTD_64bits())                     \
-	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-
-FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
-{
-	BYTE *const pStart = p;
-
-	/* up to 4 symbols at a time */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
-		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
-		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-	}
-
-	/* closer to the end */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
-		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-
-	/* no more data to retrieve from bitstream, hence no need to reload */
-	while (p < pEnd)
-		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-
-	return pEnd - pStart;
-}
-
-static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	BYTE *op = (BYTE *)dst;
-	BYTE *const oend = op + dstSize;
-	const void *dtPtr = DTable + 1;
-	const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
-	BIT_DStream_t bitD;
-	DTableDesc const dtd = HUF_getDTableDesc(DTable);
-	U32 const dtLog = dtd.tableLog;
-
-	{
-		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
-		if (HUF_isError(errorCode))
-			return errorCode;
-	}
-
-	HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
-
-	/* check */
-	if (!BIT_endOfDStream(&bitD))
-		return ERROR(corruption_detected);
-
-	return dstSize;
-}
-
-size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 0)
-		return ERROR(GENERIC);
-	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	/* Check */
-	if (cSrcSize < 10)
-		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
-	{
-		const BYTE *const istart = (const BYTE *)cSrc;
-		BYTE *const ostart = (BYTE *)dst;
-		BYTE *const oend = ostart + dstSize;
-		const void *const dtPtr = DTable + 1;
-		const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
-
-		/* Init */
-		BIT_DStream_t bitD1;
-		BIT_DStream_t bitD2;
-		BIT_DStream_t bitD3;
-		BIT_DStream_t bitD4;
-		size_t const length1 = ZSTD_readLE16(istart);
-		size_t const length2 = ZSTD_readLE16(istart + 2);
-		size_t const length3 = ZSTD_readLE16(istart + 4);
-		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-		const BYTE *const istart1 = istart + 6; /* jumpTable */
-		const BYTE *const istart2 = istart1 + length1;
-		const BYTE *const istart3 = istart2 + length2;
-		const BYTE *const istart4 = istart3 + length3;
-		const size_t segmentSize = (dstSize + 3) / 4;
-		BYTE *const opStart2 = ostart + segmentSize;
-		BYTE *const opStart3 = opStart2 + segmentSize;
-		BYTE *const opStart4 = opStart3 + segmentSize;
-		BYTE *op1 = ostart;
-		BYTE *op2 = opStart2;
-		BYTE *op3 = opStart3;
-		BYTE *op4 = opStart4;
-		U32 endSignal;
-		DTableDesc const dtd = HUF_getDTableDesc(DTable);
-		U32 const dtLog = dtd.tableLog;
-
-		if (length4 > cSrcSize)
-			return ERROR(corruption_detected); /* overflow */
-		{
-			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-
-		/* 16-32 symbols per loop (4-8 symbols per stream) */
-		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
-			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		}
-
-		/* check corruption */
-		if (op1 > opStart2)
-			return ERROR(corruption_detected);
-		if (op2 > opStart3)
-			return ERROR(corruption_detected);
-		if (op3 > opStart4)
-			return ERROR(corruption_detected);
-		/* note : op4 supposed already verified within main loop */
-
-		/* finish bitStreams one by one */
-		HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-		HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-		HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-		HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
-
-		/* check */
-		endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-		if (!endSignal)
-			return ERROR(corruption_detected);
-
-		/* decoded size */
-		return dstSize;
-	}
-}
-
-size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 0)
-		return ERROR(GENERIC);
-	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
-}
-
-/* *************************/
-/* double-symbols decoding */
-/* *************************/
-typedef struct {
-	U16 sequence;
-	BYTE nbBits;
-	BYTE length;
-} HUF_DEltX4; /* double-symbols decoding */
-
-typedef struct {
-	BYTE symbol;
-	BYTE weight;
-} sortedSymbol_t;
-
-/* HUF_fillDTableX4Level2() :
- * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
-static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
-				   const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
-{
-	HUF_DEltX4 DElt;
-	U32 rankVal[HUF_TABLELOG_MAX + 1];
-
-	/* get pre-calculated rankVal */
-	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
-
-	/* fill skipped values */
-	if (minWeight > 1) {
-		U32 i, skipSize = rankVal[minWeight];
-		ZSTD_writeLE16(&(DElt.sequence), baseSeq);
-		DElt.nbBits = (BYTE)(consumed);
-		DElt.length = 1;
-		for (i = 0; i < skipSize; i++)
-			DTable[i] = DElt;
-	}
-
-	/* fill DTable */
-	{
-		U32 s;
-		for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
-			const U32 symbol = sortedSymbols[s].symbol;
-			const U32 weight = sortedSymbols[s].weight;
-			const U32 nbBits = nbBitsBaseline - weight;
-			const U32 length = 1 << (sizeLog - nbBits);
-			const U32 start = rankVal[weight];
-			U32 i = start;
-			const U32 end = start + length;
-
-			ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
-			DElt.nbBits = (BYTE)(nbBits + consumed);
-			DElt.length = 2;
-			do {
-				DTable[i++] = DElt;
-			} while (i < end); /* since length >= 1 */
-
-			rankVal[weight] += length;
-		}
-	}
-}
-
-typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
-typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
-
-static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
-			     rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
-{
-	U32 rankVal[HUF_TABLELOG_MAX + 1];
-	const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
-	const U32 minBits = nbBitsBaseline - maxWeight;
-	U32 s;
-
-	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
-
-	/* fill DTable */
-	for (s = 0; s < sortedListSize; s++) {
-		const U16 symbol = sortedList[s].symbol;
-		const U32 weight = sortedList[s].weight;
-		const U32 nbBits = nbBitsBaseline - weight;
-		const U32 start = rankVal[weight];
-		const U32 length = 1 << (targetLog - nbBits);
-
-		if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
-			U32 sortedRank;
-			int minWeight = nbBits + scaleLog;
-			if (minWeight < 1)
-				minWeight = 1;
-			sortedRank = rankStart[minWeight];
-			HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
-					       sortedListSize - sortedRank, nbBitsBaseline, symbol);
-		} else {
-			HUF_DEltX4 DElt;
-			ZSTD_writeLE16(&(DElt.sequence), symbol);
-			DElt.nbBits = (BYTE)(nbBits);
-			DElt.length = 1;
-			{
-				U32 const end = start + length;
-				U32 u;
-				for (u = start; u < end; u++)
-					DTable[u] = DElt;
-			}
-		}
-		rankVal[weight] += length;
-	}
-}
-
-size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 tableLog, maxW, sizeOfSort, nbSymbols;
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	U32 const maxTableLog = dtd.maxTableLog;
-	size_t iSize;
-	void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
-	HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
-	U32 *rankStart;
-
-	rankValCol_t *rankVal;
-	U32 *rankStats;
-	U32 *rankStart0;
-	sortedSymbol_t *sortedSymbol;
-	BYTE *weightList;
-	size_t spaceUsed32 = 0;
-
-	HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
-
-	rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
-	rankStats = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_MAX + 1;
-	rankStart0 = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_MAX + 2;
-	sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
-	weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	rankStart = rankStart0 + 1;
-	memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
-
-	HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
-	if (maxTableLog > HUF_TABLELOG_MAX)
-		return ERROR(tableLog_tooLarge);
-	/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
-
-	iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-	if (HUF_isError(iSize))
-		return iSize;
-
-	/* check result */
-	if (tableLog > maxTableLog)
-		return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
-
-	/* find maxWeight */
-	for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
-	} /* necessarily finds a solution before 0 */
-
-	/* Get start index of each weight */
-	{
-		U32 w, nextRankStart = 0;
-		for (w = 1; w < maxW + 1; w++) {
-			U32 curr = nextRankStart;
-			nextRankStart += rankStats[w];
-			rankStart[w] = curr;
-		}
-		rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
-		sizeOfSort = nextRankStart;
-	}
-
-	/* sort symbols by weight */
-	{
-		U32 s;
-		for (s = 0; s < nbSymbols; s++) {
-			U32 const w = weightList[s];
-			U32 const r = rankStart[w]++;
-			sortedSymbol[r].symbol = (BYTE)s;
-			sortedSymbol[r].weight = (BYTE)w;
-		}
-		rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
-	}
-
-	/* Build rankVal */
-	{
-		U32 *const rankVal0 = rankVal[0];
-		{
-			int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
-			U32 nextRankVal = 0;
-			U32 w;
-			for (w = 1; w < maxW + 1; w++) {
-				U32 curr = nextRankVal;
-				nextRankVal += rankStats[w] << (w + rescale);
-				rankVal0[w] = curr;
-			}
-		}
-		{
-			U32 const minBits = tableLog + 1 - maxW;
-			U32 consumed;
-			for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-				U32 *const rankValPtr = rankVal[consumed];
-				U32 w;
-				for (w = 1; w < maxW + 1; w++) {
-					rankValPtr[w] = rankVal0[w] >> consumed;
-				}
-			}
-		}
-	}
-
-	HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
-
-	dtd.tableLog = (BYTE)maxTableLog;
-	dtd.tableType = 1;
-	memcpy(DTable, &dtd, sizeof(dtd));
-	return iSize;
-}
-
-static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
-{
-	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
-	memcpy(op, dt + val, 2);
-	BIT_skipBits(DStream, dt[val].nbBits);
-	return dt[val].length;
-}
-
-static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
-{
-	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
-	memcpy(op, dt + val, 1);
-	if (dt[val].length == 1)
-		BIT_skipBits(DStream, dt[val].nbBits);
-	else {
-		if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
-			BIT_skipBits(DStream, dt[val].nbBits);
-			if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
-				/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
-				DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
-		}
-	}
-	return 1;
-}
-
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr)         \
-	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
-	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
-	if (ZSTD_64bits())                     \
-	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
-{
-	BYTE *const pStart = p;
-
-	/* up to 8 symbols at a time */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
-		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
-		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-	}
-
-	/* closer to end : up to 2 symbols at a time */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
-		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-
-	while (p <= pEnd - 2)
-		HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
-
-	if (p < pEnd)
-		p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
-
-	return p - pStart;
-}
-
-static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	BIT_DStream_t bitD;
-
-	/* Init */
-	{
-		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
-		if (HUF_isError(errorCode))
-			return errorCode;
-	}
-
-	/* decode */
-	{
-		BYTE *const ostart = (BYTE *)dst;
-		BYTE *const oend = ostart + dstSize;
-		const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
-		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
-		DTableDesc const dtd = HUF_getDTableDesc(DTable);
-		HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
-	}
-
-	/* check */
-	if (!BIT_endOfDStream(&bitD))
-		return ERROR(corruption_detected);
-
-	/* decoded size */
-	return dstSize;
-}
-
-size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 1)
-		return ERROR(GENERIC);
-	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	if (cSrcSize < 10)
-		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
-	{
-		const BYTE *const istart = (const BYTE *)cSrc;
-		BYTE *const ostart = (BYTE *)dst;
-		BYTE *const oend = ostart + dstSize;
-		const void *const dtPtr = DTable + 1;
-		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
-
-		/* Init */
-		BIT_DStream_t bitD1;
-		BIT_DStream_t bitD2;
-		BIT_DStream_t bitD3;
-		BIT_DStream_t bitD4;
-		size_t const length1 = ZSTD_readLE16(istart);
-		size_t const length2 = ZSTD_readLE16(istart + 2);
-		size_t const length3 = ZSTD_readLE16(istart + 4);
-		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-		const BYTE *const istart1 = istart + 6; /* jumpTable */
-		const BYTE *const istart2 = istart1 + length1;
-		const BYTE *const istart3 = istart2 + length2;
-		const BYTE *const istart4 = istart3 + length3;
-		size_t const segmentSize = (dstSize + 3) / 4;
-		BYTE *const opStart2 = ostart + segmentSize;
-		BYTE *const opStart3 = opStart2 + segmentSize;
-		BYTE *const opStart4 = opStart3 + segmentSize;
-		BYTE *op1 = ostart;
-		BYTE *op2 = opStart2;
-		BYTE *op3 = opStart3;
-		BYTE *op4 = opStart4;
-		U32 endSignal;
-		DTableDesc const dtd = HUF_getDTableDesc(DTable);
-		U32 const dtLog = dtd.tableLog;
-
-		if (length4 > cSrcSize)
-			return ERROR(corruption_detected); /* overflow */
-		{
-			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-
-		/* 16-32 symbols per loop (4-8 symbols per stream) */
-		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
-			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
-			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
-
-			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		}
-
-		/* check corruption */
-		if (op1 > opStart2)
-			return ERROR(corruption_detected);
-		if (op2 > opStart3)
-			return ERROR(corruption_detected);
-		if (op3 > opStart4)
-			return ERROR(corruption_detected);
-		/* note : op4 already verified within main loop */
-
-		/* finish bitStreams one by one */
-		HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
-		HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
-		HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
-		HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
-
-		/* check */
-		{
-			U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-			if (!endCheck)
-				return ERROR(corruption_detected);
-		}
-
-		/* decoded size */
-		return dstSize;
-	}
-}
-
-size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 1)
-		return ERROR(GENERIC);
-	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
-}
-
-/* ********************************/
-/* Generic decompression selector */
-/* ********************************/
-
-size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc const dtd = HUF_getDTableDesc(DTable);
-	return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
-			     : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc const dtd = HUF_getDTableDesc(DTable);
-	return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
-			     : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
-}
-
-typedef struct {
-	U32 tableTime;
-	U32 decode256Time;
-} algo_time_t;
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
-    /* single, double, quad */
-    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==0 : impossible */
-    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==1 : impossible */
-    {{38, 130}, {1313, 74}, {2151, 38}},     /* Q == 2 : 12-18% */
-    {{448, 128}, {1353, 74}, {2238, 41}},    /* Q == 3 : 18-25% */
-    {{556, 128}, {1353, 74}, {2238, 47}},    /* Q == 4 : 25-32% */
-    {{714, 128}, {1418, 74}, {2436, 53}},    /* Q == 5 : 32-38% */
-    {{883, 128}, {1437, 74}, {2464, 61}},    /* Q == 6 : 38-44% */
-    {{897, 128}, {1515, 75}, {2622, 68}},    /* Q == 7 : 44-50% */
-    {{926, 128}, {1613, 75}, {2730, 75}},    /* Q == 8 : 50-56% */
-    {{947, 128}, {1729, 77}, {3359, 77}},    /* Q == 9 : 56-62% */
-    {{1107, 128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
-    {{1177, 128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
-    {{1242, 128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
-    {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
-    {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
-    {{722, 128}, {1891, 145}, {1936, 146}},  /* Q ==15 : 93-99% */
-};
-
-/** HUF_selectDecoder() :
-*   Tells which decoder is likely to decode faster,
-*   based on a set of pre-determined metrics.
-*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
-U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
-{
-	/* decoder timing evaluation */
-	U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
-	U32 const D256 = (U32)(dstSize >> 8);
-	U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
-	U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
-	DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
-
-	return DTime1 < DTime0;
-}
-
-typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
-
-size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	/* validation checks */
-	if (dstSize == 0)
-		return ERROR(dstSize_tooSmall);
-	if (cSrcSize > dstSize)
-		return ERROR(corruption_detected); /* invalid */
-	if (cSrcSize == dstSize) {
-		memcpy(dst, cSrc, dstSize);
-		return dstSize;
-	} /* not compressed */
-	if (cSrcSize == 1) {
-		memset(dst, *(const BYTE *)cSrc, dstSize);
-		return dstSize;
-	} /* RLE */
-
-	{
-		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-	}
-}
-
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	/* validation checks */
-	if (dstSize == 0)
-		return ERROR(dstSize_tooSmall);
-	if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
-		return ERROR(corruption_detected); /* invalid */
-
-	{
-		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-	}
-}
-
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	/* validation checks */
-	if (dstSize == 0)
-		return ERROR(dstSize_tooSmall);
-	if (cSrcSize > dstSize)
-		return ERROR(corruption_detected); /* invalid */
-	if (cSrcSize == dstSize) {
-		memcpy(dst, cSrc, dstSize);
-		return dstSize;
-	} /* not compressed */
-	if (cSrcSize == 1) {
-		memset(dst, *(const BYTE *)cSrc, dstSize);
-		return dstSize;
-	} /* RLE */
-
-	{
-		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-		return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-			      : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-	}
-}
diff --git a/lib/zstd/mem.h b/lib/zstd/mem.h
deleted file mode 100644
index 93d7a2c..0000000
--- a/lib/zstd/mem.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-#ifndef MEM_H_MODULE
-#define MEM_H_MODULE
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include <asm/unaligned.h>
-#include <linux/string.h> /* memcpy */
-#include <linux/types.h>  /* size_t, ptrdiff_t */
-
-/*-****************************************
-*  Compiler specifics
-******************************************/
-#define ZSTD_STATIC static inline
-
-/*-**************************************************************
-*  Basic Types
-*****************************************************************/
-typedef uint8_t BYTE;
-typedef uint16_t U16;
-typedef int16_t S16;
-typedef uint32_t U32;
-typedef int32_t S32;
-typedef uint64_t U64;
-typedef int64_t S64;
-typedef ptrdiff_t iPtrDiff;
-typedef uintptr_t uPtrDiff;
-
-/*-**************************************************************
-*  Memory I/O
-*****************************************************************/
-ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; }
-ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; }
-
-#if defined(__LITTLE_ENDIAN)
-#define ZSTD_LITTLE_ENDIAN 1
-#else
-#define ZSTD_LITTLE_ENDIAN 0
-#endif
-
-ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; }
-
-ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); }
-
-ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); }
-
-ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); }
-
-/*=== Little endian r/w ===*/
-
-ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); }
-
-ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); }
-
-ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val)
-{
-	ZSTD_writeLE16(memPtr, (U16)val);
-	((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
-}
-
-ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); }
-
-ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr)
-{
-	if (ZSTD_32bits())
-		return (size_t)ZSTD_readLE32(memPtr);
-	else
-		return (size_t)ZSTD_readLE64(memPtr);
-}
-
-ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val)
-{
-	if (ZSTD_32bits())
-		ZSTD_writeLE32(memPtr, (U32)val);
-	else
-		ZSTD_writeLE64(memPtr, (U64)val);
-}
-
-/*=== Big endian r/w ===*/
-
-ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); }
-
-ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr)
-{
-	if (ZSTD_32bits())
-		return (size_t)ZSTD_readBE32(memPtr);
-	else
-		return (size_t)ZSTD_readBE64(memPtr);
-}
-
-ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val)
-{
-	if (ZSTD_32bits())
-		ZSTD_writeBE32(memPtr, (U32)val);
-	else
-		ZSTD_writeBE64(memPtr, (U64)val);
-}
-
-/* function safe only for comparisons */
-ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length)
-{
-	switch (length) {
-	default:
-	case 4: return ZSTD_read32(memPtr);
-	case 3:
-		if (ZSTD_isLittleEndian())
-			return ZSTD_read32(memPtr) << 8;
-		else
-			return ZSTD_read32(memPtr) >> 8;
-	}
-}
-
-#endif /* MEM_H_MODULE */
diff --git a/lib/zstd/zstd_common.c b/lib/zstd/zstd_common.c
deleted file mode 100644
index a282624..0000000
--- a/lib/zstd/zstd_common.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "error_private.h"
-#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
-#include <linux/kernel.h>
-
-/*=**************************************************************
-*  Custom allocator
-****************************************************************/
-
-#define stack_push(stack, size)                                 \
-	({                                                      \
-		void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \
-		(stack)->ptr = (char *)ptr + (size);            \
-		(stack)->ptr <= (stack)->end ? ptr : NULL;      \
-	})
-
-ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace};
-	ZSTD_stack *stack = (ZSTD_stack *)workspace;
-	/* Verify preconditions */
-	if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) {
-		ZSTD_customMem error = {NULL, NULL, NULL};
-		return error;
-	}
-	/* Initialize the stack */
-	stack->ptr = workspace;
-	stack->end = (char *)workspace + workspaceSize;
-	stack_push(stack, sizeof(ZSTD_stack));
-	return stackMem;
-}
-
-void *ZSTD_stackAllocAll(void *opaque, size_t *size)
-{
-	ZSTD_stack *stack = (ZSTD_stack *)opaque;
-	*size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr);
-	return stack_push(stack, *size);
-}
-
-void *ZSTD_stackAlloc(void *opaque, size_t size)
-{
-	ZSTD_stack *stack = (ZSTD_stack *)opaque;
-	return stack_push(stack, size);
-}
-void ZSTD_stackFree(void *opaque, void *address)
-{
-	(void)opaque;
-	(void)address;
-}
-
-void *ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); }
-
-void ZSTD_free(void *ptr, ZSTD_customMem customMem)
-{
-	if (ptr != NULL)
-		customMem.customFree(customMem.opaque, ptr);
-}
diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c
new file mode 100644
index 0000000..65548a4
--- /dev/null
+++ b/lib/zstd/zstd_compress_module.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#include "common/zstd_deps.h"
+#include "common/zstd_internal.h"
+
+#define ZSTD_FORWARD_IF_ERR(ret)            \
+	do {                                \
+		size_t const __ret = (ret); \
+		if (ZSTD_isError(__ret))    \
+			return __ret;       \
+	} while (0)
+
+static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
+	unsigned long long pledged_src_size)
+{
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
+		cctx, ZSTD_reset_session_and_parameters));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
+		cctx, pledged_src_size));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_strategy, parameters->cParams.strategy));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
+	return 0;
+}
+
+int zstd_min_clevel(void)
+{
+	return ZSTD_minCLevel();
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+	return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+size_t zstd_compress_bound(size_t src_size)
+{
+	return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+zstd_parameters zstd_get_params(int level,
+	unsigned long long estimated_src_size)
+{
+	return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+	return ZSTD_estimateCCtxSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+	if (workspace == NULL)
+		return NULL;
+	return ZSTD_initStaticCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+	ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
+	return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+	return ZSTD_estimateCStreamSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+	unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+	zstd_cstream *cstream;
+
+	if (workspace == NULL)
+		return NULL;
+
+	cstream = ZSTD_initStaticCStream(workspace, workspace_size);
+	if (cstream == NULL)
+		return NULL;
+
+	/* 0 means unknown in linux zstd API but means 0 in new zstd API */
+	if (pledged_src_size == 0)
+		pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+
+	if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
+		return NULL;
+
+	return cstream;
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+	unsigned long long pledged_src_size)
+{
+	return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
+{
+	return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+	return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+	return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c
new file mode 100644
index 0000000..f4ed952
--- /dev/null
+++ b/lib/zstd/zstd_decompress_module.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#include "common/zstd_deps.h"
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+	return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+	return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+	return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+	return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+	if (workspace == NULL)
+		return NULL;
+	return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size)
+{
+	return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+	return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+	size_t workspace_size)
+{
+	if (workspace == NULL)
+		return NULL;
+	(void)max_window_size;
+	return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+	return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
+{
+	return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+	return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+	size_t src_size)
+{
+	return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h
deleted file mode 100644
index dac7533..0000000
--- a/lib/zstd/zstd_internal.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-#ifndef ZSTD_CCOMMON_H_MODULE
-#define ZSTD_CCOMMON_H_MODULE
-
-/*-*******************************************************
-*  Compiler specifics
-*********************************************************/
-#define FORCE_INLINE static __always_inline
-#define FORCE_NOINLINE static noinline
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "error_private.h"
-#include "mem.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/xxhash.h>
-#include <linux/zstd.h>
-
-/*-*************************************
-*  shared macros
-***************************************/
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define CHECK_F(f)                       \
-	{                                \
-		size_t const errcod = f; \
-		if (ERR_isError(errcod)) \
-			return errcod;   \
-	} /* check and Forward error code */
-#define CHECK_E(f, e)                    \
-	{                                \
-		size_t const errcod = f; \
-		if (ERR_isError(errcod)) \
-			return ERROR(e); \
-	} /* check and send Error code */
-#define ZSTD_STATIC_ASSERT(c)                                   \
-	{                                                       \
-		enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \
-	}
-
-/*-*************************************
-*  Common constants
-***************************************/
-#define ZSTD_OPT_NUM (1 << 12)
-#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */
-
-#define ZSTD_REP_NUM 3		      /* number of repcodes */
-#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */
-#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1)
-#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
-static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8};
-
-#define KB *(1 << 10)
-#define MB *(1 << 20)
-#define GB *(1U << 30)
-
-#define BIT7 128
-#define BIT6 64
-#define BIT5 32
-#define BIT4 16
-#define BIT1 2
-#define BIT0 1
-
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
-static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8};
-static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4};
-
-#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
-static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
-typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
-
-#define MIN_SEQUENCES_SIZE 1									  /* nbSeq==0 */
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
-
-#define HufLog 12
-typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
-
-#define LONGNBSEQ 0x7F00
-
-#define MINMATCH 3
-#define EQUAL_READ32 4
-
-#define Litbits 8
-#define MaxLit ((1 << Litbits) - 1)
-#define MaxML 52
-#define MaxLL 35
-#define MaxOff 28
-#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
-#define MLFSELog 9
-#define LLFSELog 9
-#define OffFSELog 8
-
-static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
-#define LL_DEFAULTNORMLOG 6 /* for static allocation */
-static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
-
-static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
-				       0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
-					      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
-#define ML_DEFAULTNORMLOG 6 /* for static allocation */
-static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
-
-static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
-#define OF_DEFAULTNORMLOG 5 /* for static allocation */
-static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
-
-/*-*******************************************
-*  Shared functions to include for inlining
-*********************************************/
-ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
-	/*
-	 * zstd relies heavily on gcc being able to analyze and inline this
-	 * memcpy() call, since it is called in a tight loop. Preboot mode
-	 * is compiled in freestanding mode, which stops gcc from analyzing
-	 * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a
-	 * regular memcpy().
-	 */
-	__builtin_memcpy(dst, src, 8);
-}
-/*! ZSTD_wildcopy() :
-*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
-#define WILDCOPY_OVERLENGTH 8
-ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
-{
-	const BYTE* ip = (const BYTE*)src;
-	BYTE* op = (BYTE*)dst;
-	BYTE* const oend = op + length;
-#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200
-	/*
-	 * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
-	 * Avoid the bad case where the loop only runs once by handling the
-	 * special case separately. This doesn't trigger the bug because it
-	 * doesn't involve pointer/integer overflow.
-	 */
-	if (length <= 8)
-		return ZSTD_copy8(dst, src);
-#endif
-	do {
-		ZSTD_copy8(op, ip);
-		op += 8;
-		ip += 8;
-	} while (op < oend);
-}
-
-/*-*******************************************
-*  Private interfaces
-*********************************************/
-typedef struct ZSTD_stats_s ZSTD_stats_t;
-
-typedef struct {
-	U32 off;
-	U32 len;
-} ZSTD_match_t;
-
-typedef struct {
-	U32 price;
-	U32 off;
-	U32 mlen;
-	U32 litlen;
-	U32 rep[ZSTD_REP_NUM];
-} ZSTD_optimal_t;
-
-typedef struct seqDef_s {
-	U32 offset;
-	U16 litLength;
-	U16 matchLength;
-} seqDef;
-
-typedef struct {
-	seqDef *sequencesStart;
-	seqDef *sequences;
-	BYTE *litStart;
-	BYTE *lit;
-	BYTE *llCode;
-	BYTE *mlCode;
-	BYTE *ofCode;
-	U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
-	U32 longLengthPos;
-	/* opt */
-	ZSTD_optimal_t *priceTable;
-	ZSTD_match_t *matchTable;
-	U32 *matchLengthFreq;
-	U32 *litLengthFreq;
-	U32 *litFreq;
-	U32 *offCodeFreq;
-	U32 matchLengthSum;
-	U32 matchSum;
-	U32 litLengthSum;
-	U32 litSum;
-	U32 offCodeSum;
-	U32 log2matchLengthSum;
-	U32 log2matchSum;
-	U32 log2litLengthSum;
-	U32 log2litSum;
-	U32 log2offCodeSum;
-	U32 factor;
-	U32 staticPrices;
-	U32 cachedPrice;
-	U32 cachedLitLength;
-	const BYTE *cachedLiterals;
-} seqStore_t;
-
-const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx);
-void ZSTD_seqToCodes(const seqStore_t *seqStorePtr);
-int ZSTD_isSkipFrame(ZSTD_DCtx *dctx);
-
-/*= Custom memory allocation functions */
-typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size);
-typedef void (*ZSTD_freeFunction)(void *opaque, void *address);
-typedef struct {
-	ZSTD_allocFunction customAlloc;
-	ZSTD_freeFunction customFree;
-	void *opaque;
-} ZSTD_customMem;
-
-void *ZSTD_malloc(size_t size, ZSTD_customMem customMem);
-void ZSTD_free(void *ptr, ZSTD_customMem customMem);
-
-/*====== stack allocation  ======*/
-
-typedef struct {
-	void *ptr;
-	const void *end;
-} ZSTD_stack;
-
-#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t))
-#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t))
-
-ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize);
-
-void *ZSTD_stackAllocAll(void *opaque, size_t *size);
-void *ZSTD_stackAlloc(void *opaque, size_t size);
-void ZSTD_stackFree(void *opaque, void *address);
-
-/*======  common function  ======*/
-
-ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); }
-
-/* hidden functions */
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx);
-
-size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx);
-size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx);
-size_t ZSTD_freeCDict(ZSTD_CDict *cdict);
-size_t ZSTD_freeDDict(ZSTD_DDict *cdict);
-size_t ZSTD_freeCStream(ZSTD_CStream *zcs);
-size_t ZSTD_freeDStream(ZSTD_DStream *zds);
-
-#endif /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/zstd_opt.h b/lib/zstd/zstd_opt.h
deleted file mode 100644
index 55e1b4c..0000000
--- a/lib/zstd/zstd_opt.h
+++ /dev/null
@@ -1,1014 +0,0 @@
-/**
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* Note : this file is intended to be included within zstd_compress.c */
-
-#ifndef ZSTD_OPT_H_91842398743
-#define ZSTD_OPT_H_91842398743
-
-#define ZSTD_LITFREQ_ADD 2
-#define ZSTD_FREQ_DIV 4
-#define ZSTD_MAX_PRICE (1 << 30)
-
-/*-*************************************
-*  Price functions for optimal parser
-***************************************/
-FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t *ssPtr)
-{
-	ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum + 1);
-	ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum + 1);
-	ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum + 1);
-	ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum + 1);
-	ssPtr->factor = 1 + ((ssPtr->litSum >> 5) / ssPtr->litLengthSum) + ((ssPtr->litSum << 1) / (ssPtr->litSum + ssPtr->matchSum));
-}
-
-ZSTD_STATIC void ZSTD_rescaleFreqs(seqStore_t *ssPtr, const BYTE *src, size_t srcSize)
-{
-	unsigned u;
-
-	ssPtr->cachedLiterals = NULL;
-	ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
-	ssPtr->staticPrices = 0;
-
-	if (ssPtr->litLengthSum == 0) {
-		if (srcSize <= 1024)
-			ssPtr->staticPrices = 1;
-
-		for (u = 0; u <= MaxLit; u++)
-			ssPtr->litFreq[u] = 0;
-		for (u = 0; u < srcSize; u++)
-			ssPtr->litFreq[src[u]]++;
-
-		ssPtr->litSum = 0;
-		ssPtr->litLengthSum = MaxLL + 1;
-		ssPtr->matchLengthSum = MaxML + 1;
-		ssPtr->offCodeSum = (MaxOff + 1);
-		ssPtr->matchSum = (ZSTD_LITFREQ_ADD << Litbits);
-
-		for (u = 0; u <= MaxLit; u++) {
-			ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> ZSTD_FREQ_DIV);
-			ssPtr->litSum += ssPtr->litFreq[u];
-		}
-		for (u = 0; u <= MaxLL; u++)
-			ssPtr->litLengthFreq[u] = 1;
-		for (u = 0; u <= MaxML; u++)
-			ssPtr->matchLengthFreq[u] = 1;
-		for (u = 0; u <= MaxOff; u++)
-			ssPtr->offCodeFreq[u] = 1;
-	} else {
-		ssPtr->matchLengthSum = 0;
-		ssPtr->litLengthSum = 0;
-		ssPtr->offCodeSum = 0;
-		ssPtr->matchSum = 0;
-		ssPtr->litSum = 0;
-
-		for (u = 0; u <= MaxLit; u++) {
-			ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> (ZSTD_FREQ_DIV + 1));
-			ssPtr->litSum += ssPtr->litFreq[u];
-		}
-		for (u = 0; u <= MaxLL; u++) {
-			ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u] >> (ZSTD_FREQ_DIV + 1));
-			ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
-		}
-		for (u = 0; u <= MaxML; u++) {
-			ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV);
-			ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
-			ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
-		}
-		ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
-		for (u = 0; u <= MaxOff; u++) {
-			ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV);
-			ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
-		}
-	}
-
-	ZSTD_setLog2Prices(ssPtr);
-}
-
-FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t *ssPtr, U32 litLength, const BYTE *literals)
-{
-	U32 price, u;
-
-	if (ssPtr->staticPrices)
-		return ZSTD_highbit32((U32)litLength + 1) + (litLength * 6);
-
-	if (litLength == 0)
-		return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0] + 1);
-
-	/* literals */
-	if (ssPtr->cachedLiterals == literals) {
-		U32 const additional = litLength - ssPtr->cachedLitLength;
-		const BYTE *literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
-		price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
-		for (u = 0; u < additional; u++)
-			price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]] + 1);
-		ssPtr->cachedPrice = price;
-		ssPtr->cachedLitLength = litLength;
-	} else {
-		price = litLength * ssPtr->log2litSum;
-		for (u = 0; u < litLength; u++)
-			price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]] + 1);
-
-		if (litLength >= 12) {
-			ssPtr->cachedLiterals = literals;
-			ssPtr->cachedPrice = price;
-			ssPtr->cachedLitLength = litLength;
-		}
-	}
-
-	/* literal Length */
-	{
-		const BYTE LL_deltaCode = 19;
-		const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-		price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode] + 1);
-	}
-
-	return price;
-}
-
-FORCE_INLINE U32 ZSTD_getPrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength, const int ultra)
-{
-	/* offset */
-	U32 price;
-	BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
-
-	if (seqStorePtr->staticPrices)
-		return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength + 1) + 16 + offCode;
-
-	price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode] + 1);
-	if (!ultra && offCode >= 20)
-		price += (offCode - 19) * 2;
-
-	/* match Length */
-	{
-		const BYTE ML_deltaCode = 36;
-		const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-		price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode] + 1);
-	}
-
-	return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
-}
-
-ZSTD_STATIC void ZSTD_updatePrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength)
-{
-	U32 u;
-
-	/* literals */
-	seqStorePtr->litSum += litLength * ZSTD_LITFREQ_ADD;
-	for (u = 0; u < litLength; u++)
-		seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
-
-	/* literal Length */
-	{
-		const BYTE LL_deltaCode = 19;
-		const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-		seqStorePtr->litLengthFreq[llCode]++;
-		seqStorePtr->litLengthSum++;
-	}
-
-	/* match offset */
-	{
-		BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
-		seqStorePtr->offCodeSum++;
-		seqStorePtr->offCodeFreq[offCode]++;
-	}
-
-	/* match Length */
-	{
-		const BYTE ML_deltaCode = 36;
-		const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-		seqStorePtr->matchLengthFreq[mlCode]++;
-		seqStorePtr->matchLengthSum++;
-	}
-
-	ZSTD_setLog2Prices(seqStorePtr);
-}
-
-#define SET_PRICE(pos, mlen_, offset_, litlen_, price_)           \
-	{                                                         \
-		while (last_pos < pos) {                          \
-			opt[last_pos + 1].price = ZSTD_MAX_PRICE; \
-			last_pos++;                               \
-		}                                                 \
-		opt[pos].mlen = mlen_;                            \
-		opt[pos].off = offset_;                           \
-		opt[pos].litlen = litlen_;                        \
-		opt[pos].price = price_;                          \
-	}
-
-/* Update hashTable3 up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
-U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx *zc, const BYTE *ip)
-{
-	U32 *const hashTable3 = zc->hashTable3;
-	U32 const hashLog3 = zc->hashLog3;
-	const BYTE *const base = zc->base;
-	U32 idx = zc->nextToUpdate3;
-	const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
-	const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
-
-	while (idx < target) {
-		hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx;
-		idx++;
-	}
-
-	return hashTable3[hash3];
-}
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-static U32 ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, U32 nbCompares, const U32 mls, U32 extDict,
-					 ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	const BYTE *const base = zc->base;
-	const U32 curr = (U32)(ip - base);
-	const U32 hashLog = zc->params.cParams.hashLog;
-	const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
-	U32 *const hashTable = zc->hashTable;
-	U32 matchIndex = hashTable[h];
-	U32 *const bt = zc->chainTable;
-	const U32 btLog = zc->params.cParams.chainLog - 1;
-	const U32 btMask = (1U << btLog) - 1;
-	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-	const U32 windowLow = zc->lowLimit;
-	U32 *smallerPtr = bt + 2 * (curr & btMask);
-	U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
-	U32 matchEndIdx = curr + 8;
-	U32 dummy32; /* to be nullified at the end */
-	U32 mnum = 0;
-
-	const U32 minMatch = (mls == 3) ? 3 : 4;
-	size_t bestLength = minMatchLen - 1;
-
-	if (minMatch == 3) { /* HC3 match finder */
-		U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(zc, ip);
-		if (matchIndex3 > windowLow && (curr - matchIndex3 < (1 << 18))) {
-			const BYTE *match;
-			size_t currMl = 0;
-			if ((!extDict) || matchIndex3 >= dictLimit) {
-				match = base + matchIndex3;
-				if (match[bestLength] == ip[bestLength])
-					currMl = ZSTD_count(ip, match, iLimit);
-			} else {
-				match = dictBase + matchIndex3;
-				if (ZSTD_readMINMATCH(match, MINMATCH) ==
-				    ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
-					currMl = ZSTD_count_2segments(ip + MINMATCH, match + MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
-			}
-
-			/* save best solution */
-			if (currMl > bestLength) {
-				bestLength = currMl;
-				matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex3;
-				matches[mnum].len = (U32)currMl;
-				mnum++;
-				if (currMl > ZSTD_OPT_NUM)
-					goto update;
-				if (ip + currMl == iLimit)
-					goto update; /* best possible, and avoid read overflow*/
-			}
-		}
-	}
-
-	hashTable[h] = curr; /* Update Hash Table */
-
-	while (nbCompares-- && (matchIndex > windowLow)) {
-		U32 *nextPtr = bt + 2 * (matchIndex & btMask);
-		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-		const BYTE *match;
-
-		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-			match = base + matchIndex;
-			if (match[matchLength] == ip[matchLength]) {
-				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iLimit) + 1;
-			}
-		} else {
-			match = dictBase + matchIndex;
-			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart);
-			if (matchIndex + matchLength >= dictLimit)
-				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-		}
-
-		if (matchLength > bestLength) {
-			if (matchLength > matchEndIdx - matchIndex)
-				matchEndIdx = matchIndex + (U32)matchLength;
-			bestLength = matchLength;
-			matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex;
-			matches[mnum].len = (U32)matchLength;
-			mnum++;
-			if (matchLength > ZSTD_OPT_NUM)
-				break;
-			if (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */
-				break;			/* drop, to guarantee consistency (miss a little bit of compression) */
-		}
-
-		if (match[matchLength] < ip[matchLength]) {
-			/* match is smaller than curr */
-			*smallerPtr = matchIndex;	  /* update smaller idx */
-			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-			if (matchIndex <= btLow) {
-				smallerPtr = &dummy32;
-				break;
-			}			  /* beyond tree size, stop the search */
-			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-		} else {
-			/* match is larger than curr */
-			*largerPtr = matchIndex;
-			commonLengthLarger = matchLength;
-			if (matchIndex <= btLow) {
-				largerPtr = &dummy32;
-				break;
-			} /* beyond tree size, stop the search */
-			largerPtr = nextPtr;
-			matchIndex = nextPtr[0];
-		}
-	}
-
-	*smallerPtr = *largerPtr = 0;
-
-update:
-	zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
-	return mnum;
-}
-
-/** Tree updater, providing best match */
-static U32 ZSTD_BtGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t *matches,
-				const U32 minMatchLen)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
-}
-
-static U32 ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
-					  const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
-					  ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	switch (matchLengthSearch) {
-	case 3: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
-	default:
-	case 4: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
-	case 5: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
-	case 7:
-	case 6: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
-	}
-}
-
-/** Tree updater, providing best match */
-static U32 ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls,
-					ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
-}
-
-static U32 ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
-						  const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
-						  ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	switch (matchLengthSearch) {
-	case 3: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
-	default:
-	case 4: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
-	case 5: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
-	case 7:
-	case 6: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
-	}
-}
-
-/*-*******************************
-*  Optimal parser
-*********************************/
-FORCE_INLINE
-void ZSTD_compressBlock_opt_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base;
-	const BYTE *const prefixStart = base + ctx->dictLimit;
-
-	const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-	const U32 sufficient_len = ctx->params.cParams.targetLength;
-	const U32 mls = ctx->params.cParams.searchLength;
-	const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
-
-	ZSTD_optimal_t *opt = seqStorePtr->priceTable;
-	ZSTD_match_t *matches = seqStorePtr->matchTable;
-	const BYTE *inr;
-	U32 offset, rep[ZSTD_REP_NUM];
-
-	/* init */
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
-	ip += (ip == prefixStart);
-	{
-		U32 i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			rep[i] = ctx->rep[i];
-	}
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		U32 cur, match_num, last_pos, litlen, price;
-		U32 u, mlen, best_mlen, best_off, litLength;
-		memset(opt, 0, sizeof(ZSTD_optimal_t));
-		last_pos = 0;
-		litlen = (U32)(ip - anchor);
-
-		/* check repCode */
-		{
-			U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
-			for (i = (ip == anchor); i < last_i; i++) {
-				const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
-				if ((repCur > 0) && (repCur < (S32)(ip - prefixStart)) &&
-				    (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
-					mlen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repCur, iend) + minMatch;
-					if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-						best_mlen = mlen;
-						best_off = i;
-						cur = 0;
-						last_pos = 1;
-						goto _storeSequence;
-					}
-					best_off = i - (ip == anchor);
-					do {
-						price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-						if (mlen > last_pos || price < opt[mlen].price)
-							SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
-						mlen--;
-					} while (mlen >= minMatch);
-				}
-			}
-		}
-
-		match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
-
-		if (!last_pos && !match_num) {
-			ip++;
-			continue;
-		}
-
-		if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-			best_mlen = matches[match_num - 1].len;
-			best_off = matches[match_num - 1].off;
-			cur = 0;
-			last_pos = 1;
-			goto _storeSequence;
-		}
-
-		/* set prices using matches at position = 0 */
-		best_mlen = (last_pos) ? last_pos : minMatch;
-		for (u = 0; u < match_num; u++) {
-			mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-			best_mlen = matches[u].len;
-			while (mlen <= best_mlen) {
-				price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-				if (mlen > last_pos || price < opt[mlen].price)
-					SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
-				mlen++;
-			}
-		}
-
-		if (last_pos < minMatch) {
-			ip++;
-			continue;
-		}
-
-		/* initialize opt[0] */
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				opt[0].rep[i] = rep[i];
-		}
-		opt[0].mlen = 1;
-		opt[0].litlen = litlen;
-
-		/* check further positions */
-		for (cur = 1; cur <= last_pos; cur++) {
-			inr = ip + cur;
-
-			if (opt[cur - 1].mlen == 1) {
-				litlen = opt[cur - 1].litlen + 1;
-				if (cur > litlen) {
-					price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
-				} else
-					price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
-			} else {
-				litlen = 1;
-				price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
-			}
-
-			if (cur > last_pos || price <= opt[cur].price)
-				SET_PRICE(cur, 1, 0, litlen, price);
-
-			if (cur == last_pos)
-				break;
-
-			if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
-				continue;
-
-			mlen = opt[cur].mlen;
-			if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
-				opt[cur].rep[2] = opt[cur - mlen].rep[1];
-				opt[cur].rep[1] = opt[cur - mlen].rep[0];
-				opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
-			} else {
-				opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
-				opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
-				opt[cur].rep[0] =
-				    ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
-			}
-
-			best_mlen = minMatch;
-			{
-				U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
-				for (i = (opt[cur].mlen != 1); i < last_i; i++) { /* check rep */
-					const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
-					if ((repCur > 0) && (repCur < (S32)(inr - prefixStart)) &&
-					    (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
-						mlen = (U32)ZSTD_count(inr + minMatch, inr + minMatch - repCur, iend) + minMatch;
-
-						if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-							best_mlen = mlen;
-							best_off = i;
-							last_pos = cur + 1;
-							goto _storeSequence;
-						}
-
-						best_off = i - (opt[cur].mlen != 1);
-						if (mlen > best_mlen)
-							best_mlen = mlen;
-
-						do {
-							if (opt[cur].mlen == 1) {
-								litlen = opt[cur].litlen;
-								if (cur > litlen) {
-									price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
-															best_off, mlen - MINMATCH, ultra);
-								} else
-									price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-							} else {
-								litlen = 0;
-								price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
-							}
-
-							if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
-								SET_PRICE(cur + mlen, mlen, i, litlen, price);
-							mlen--;
-						} while (mlen >= minMatch);
-					}
-				}
-			}
-
-			match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
-
-			if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-				best_mlen = matches[match_num - 1].len;
-				best_off = matches[match_num - 1].off;
-				last_pos = cur + 1;
-				goto _storeSequence;
-			}
-
-			/* set prices using matches at position = cur */
-			for (u = 0; u < match_num; u++) {
-				mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-				best_mlen = matches[u].len;
-
-				while (mlen <= best_mlen) {
-					if (opt[cur].mlen == 1) {
-						litlen = opt[cur].litlen;
-						if (cur > litlen)
-							price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
-													matches[u].off - 1, mlen - MINMATCH, ultra);
-						else
-							price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-					} else {
-						litlen = 0;
-						price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
-					}
-
-					if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
-						SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
-
-					mlen++;
-				}
-			}
-		}
-
-		best_mlen = opt[last_pos].mlen;
-		best_off = opt[last_pos].off;
-		cur = last_pos - best_mlen;
-
-	/* store sequence */
-_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
-		opt[0].mlen = 1;
-
-		while (1) {
-			mlen = opt[cur].mlen;
-			offset = opt[cur].off;
-			opt[cur].mlen = best_mlen;
-			opt[cur].off = best_off;
-			best_mlen = mlen;
-			best_off = offset;
-			if (mlen > cur)
-				break;
-			cur -= mlen;
-		}
-
-		for (u = 0; u <= last_pos;) {
-			u += opt[u].mlen;
-		}
-
-		for (cur = 0; cur < last_pos;) {
-			mlen = opt[cur].mlen;
-			if (mlen == 1) {
-				ip++;
-				cur++;
-				continue;
-			}
-			offset = opt[cur].off;
-			cur += mlen;
-			litLength = (U32)(ip - anchor);
-
-			if (offset > ZSTD_REP_MOVE_OPT) {
-				rep[2] = rep[1];
-				rep[1] = rep[0];
-				rep[0] = offset - ZSTD_REP_MOVE_OPT;
-				offset--;
-			} else {
-				if (offset != 0) {
-					best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
-					if (offset != 1)
-						rep[2] = rep[1];
-					rep[1] = rep[0];
-					rep[0] = best_off;
-				}
-				if (litLength == 0)
-					offset--;
-			}
-
-			ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			anchor = ip = ip + mlen;
-		}
-	} /* for (cur=0; cur < last_pos; ) */
-
-	/* Save reps for next block */
-	{
-		int i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			ctx->repToConfirm[i] = rep[i];
-	}
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base;
-	const U32 lowestIndex = ctx->lowLimit;
-	const U32 dictLimit = ctx->dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-
-	const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-	const U32 sufficient_len = ctx->params.cParams.targetLength;
-	const U32 mls = ctx->params.cParams.searchLength;
-	const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
-
-	ZSTD_optimal_t *opt = seqStorePtr->priceTable;
-	ZSTD_match_t *matches = seqStorePtr->matchTable;
-	const BYTE *inr;
-
-	/* init */
-	U32 offset, rep[ZSTD_REP_NUM];
-	{
-		U32 i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			rep[i] = ctx->rep[i];
-	}
-
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
-	ip += (ip == prefixStart);
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		U32 cur, match_num, last_pos, litlen, price;
-		U32 u, mlen, best_mlen, best_off, litLength;
-		U32 curr = (U32)(ip - base);
-		memset(opt, 0, sizeof(ZSTD_optimal_t));
-		last_pos = 0;
-		opt[0].litlen = (U32)(ip - anchor);
-
-		/* check repCode */
-		{
-			U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
-			for (i = (ip == anchor); i < last_i; i++) {
-				const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
-				const U32 repIndex = (U32)(curr - repCur);
-				const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-				const BYTE *const repMatch = repBase + repIndex;
-				if ((repCur > 0 && repCur <= (S32)curr) &&
-				    (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-				    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
-					/* repcode detected we should take it */
-					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-					mlen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
-
-					if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-						best_mlen = mlen;
-						best_off = i;
-						cur = 0;
-						last_pos = 1;
-						goto _storeSequence;
-					}
-
-					best_off = i - (ip == anchor);
-					litlen = opt[0].litlen;
-					do {
-						price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-						if (mlen > last_pos || price < opt[mlen].price)
-							SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
-						mlen--;
-					} while (mlen >= minMatch);
-				}
-			}
-		}
-
-		match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
-
-		if (!last_pos && !match_num) {
-			ip++;
-			continue;
-		}
-
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				opt[0].rep[i] = rep[i];
-		}
-		opt[0].mlen = 1;
-
-		if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-			best_mlen = matches[match_num - 1].len;
-			best_off = matches[match_num - 1].off;
-			cur = 0;
-			last_pos = 1;
-			goto _storeSequence;
-		}
-
-		best_mlen = (last_pos) ? last_pos : minMatch;
-
-		/* set prices using matches at position = 0 */
-		for (u = 0; u < match_num; u++) {
-			mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-			best_mlen = matches[u].len;
-			litlen = opt[0].litlen;
-			while (mlen <= best_mlen) {
-				price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-				if (mlen > last_pos || price < opt[mlen].price)
-					SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
-				mlen++;
-			}
-		}
-
-		if (last_pos < minMatch) {
-			ip++;
-			continue;
-		}
-
-		/* check further positions */
-		for (cur = 1; cur <= last_pos; cur++) {
-			inr = ip + cur;
-
-			if (opt[cur - 1].mlen == 1) {
-				litlen = opt[cur - 1].litlen + 1;
-				if (cur > litlen) {
-					price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
-				} else
-					price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
-			} else {
-				litlen = 1;
-				price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
-			}
-
-			if (cur > last_pos || price <= opt[cur].price)
-				SET_PRICE(cur, 1, 0, litlen, price);
-
-			if (cur == last_pos)
-				break;
-
-			if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
-				continue;
-
-			mlen = opt[cur].mlen;
-			if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
-				opt[cur].rep[2] = opt[cur - mlen].rep[1];
-				opt[cur].rep[1] = opt[cur - mlen].rep[0];
-				opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
-			} else {
-				opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
-				opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
-				opt[cur].rep[0] =
-				    ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
-			}
-
-			best_mlen = minMatch;
-			{
-				U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
-				for (i = (mlen != 1); i < last_i; i++) {
-					const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
-					const U32 repIndex = (U32)(curr + cur - repCur);
-					const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-					const BYTE *const repMatch = repBase + repIndex;
-					if ((repCur > 0 && repCur <= (S32)(curr + cur)) &&
-					    (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-					    && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
-						/* repcode detected */
-						const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-						mlen = (U32)ZSTD_count_2segments(inr + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
-
-						if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-							best_mlen = mlen;
-							best_off = i;
-							last_pos = cur + 1;
-							goto _storeSequence;
-						}
-
-						best_off = i - (opt[cur].mlen != 1);
-						if (mlen > best_mlen)
-							best_mlen = mlen;
-
-						do {
-							if (opt[cur].mlen == 1) {
-								litlen = opt[cur].litlen;
-								if (cur > litlen) {
-									price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
-															best_off, mlen - MINMATCH, ultra);
-								} else
-									price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-							} else {
-								litlen = 0;
-								price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
-							}
-
-							if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
-								SET_PRICE(cur + mlen, mlen, i, litlen, price);
-							mlen--;
-						} while (mlen >= minMatch);
-					}
-				}
-			}
-
-			match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
-
-			if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-				best_mlen = matches[match_num - 1].len;
-				best_off = matches[match_num - 1].off;
-				last_pos = cur + 1;
-				goto _storeSequence;
-			}
-
-			/* set prices using matches at position = cur */
-			for (u = 0; u < match_num; u++) {
-				mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-				best_mlen = matches[u].len;
-
-				while (mlen <= best_mlen) {
-					if (opt[cur].mlen == 1) {
-						litlen = opt[cur].litlen;
-						if (cur > litlen)
-							price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
-													matches[u].off - 1, mlen - MINMATCH, ultra);
-						else
-							price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-					} else {
-						litlen = 0;
-						price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
-					}
-
-					if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
-						SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
-
-					mlen++;
-				}
-			}
-		} /* for (cur = 1; cur <= last_pos; cur++) */
-
-		best_mlen = opt[last_pos].mlen;
-		best_off = opt[last_pos].off;
-		cur = last_pos - best_mlen;
-
-	/* store sequence */
-_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
-		opt[0].mlen = 1;
-
-		while (1) {
-			mlen = opt[cur].mlen;
-			offset = opt[cur].off;
-			opt[cur].mlen = best_mlen;
-			opt[cur].off = best_off;
-			best_mlen = mlen;
-			best_off = offset;
-			if (mlen > cur)
-				break;
-			cur -= mlen;
-		}
-
-		for (u = 0; u <= last_pos;) {
-			u += opt[u].mlen;
-		}
-
-		for (cur = 0; cur < last_pos;) {
-			mlen = opt[cur].mlen;
-			if (mlen == 1) {
-				ip++;
-				cur++;
-				continue;
-			}
-			offset = opt[cur].off;
-			cur += mlen;
-			litLength = (U32)(ip - anchor);
-
-			if (offset > ZSTD_REP_MOVE_OPT) {
-				rep[2] = rep[1];
-				rep[1] = rep[0];
-				rep[0] = offset - ZSTD_REP_MOVE_OPT;
-				offset--;
-			} else {
-				if (offset != 0) {
-					best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
-					if (offset != 1)
-						rep[2] = rep[1];
-					rep[1] = rep[0];
-					rep[0] = best_off;
-				}
-
-				if (litLength == 0)
-					offset--;
-			}
-
-			ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			anchor = ip = ip + mlen;
-		}
-	} /* for (cur=0; cur < last_pos; ) */
-
-	/* Save reps for next block */
-	{
-		int i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			ctx->repToConfirm[i] = rep[i];
-	}
-
-	/* Last Literals */
-	{
-		size_t lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-#endif /* ZSTD_OPT_H_91842398743 */
diff --git a/mm/Kconfig b/mm/Kconfig
index 068ce59..28edafc 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -890,6 +890,9 @@
 config KMAP_LOCAL
 	bool
 
+config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
+	bool
+
 # struct io_mapping based helper.  Selected by drivers that need them
 config IO_MAPPING
 	bool
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index eccc14b..9b520bb 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -32,7 +32,7 @@
 	if (*ppos)
 		return ERR_PTR(-EINVAL);
 
-	kbuf = kmalloc(count + 1, GFP_KERNEL);
+	kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN);
 	if (!kbuf)
 		return ERR_PTR(-ENOMEM);
 
@@ -133,7 +133,7 @@
 	char *kbuf;
 	ssize_t len;
 
-	kbuf = kmalloc(count, GFP_KERNEL);
+	kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
 	if (!kbuf)
 		return -ENOMEM;
 
@@ -452,7 +452,7 @@
 	char *kbuf;
 	ssize_t len;
 
-	kbuf = kmalloc(count, GFP_KERNEL);
+	kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
 	if (!kbuf)
 		return -ENOMEM;
 
@@ -578,7 +578,7 @@
 	char *kbuf;
 	ssize_t len;
 
-	kbuf = kmalloc(count, GFP_KERNEL);
+	kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
 	if (!kbuf)
 		return -ENOMEM;
 
@@ -877,12 +877,14 @@
 		return -EINVAL;
 	}
 
+	mutex_lock(&damon_dbgfs_lock);
 	if (!strncmp(kbuf, "on", count)) {
 		int i;
 
 		for (i = 0; i < dbgfs_nr_ctxs; i++) {
 			if (damon_targets_empty(dbgfs_ctxs[i])) {
 				kfree(kbuf);
+				mutex_unlock(&damon_dbgfs_lock);
 				return -EINVAL;
 			}
 		}
@@ -892,6 +894,7 @@
 	} else {
 		ret = -EINVAL;
 	}
+	mutex_unlock(&damon_dbgfs_lock);
 
 	if (!ret)
 		ret = count;
@@ -944,15 +947,16 @@
 
 static int __init damon_dbgfs_init(void)
 {
-	int rc;
+	int rc = -ENOMEM;
 
+	mutex_lock(&damon_dbgfs_lock);
 	dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL);
 	if (!dbgfs_ctxs)
-		return -ENOMEM;
+		goto out;
 	dbgfs_ctxs[0] = dbgfs_new_ctx();
 	if (!dbgfs_ctxs[0]) {
 		kfree(dbgfs_ctxs);
-		return -ENOMEM;
+		goto out;
 	}
 	dbgfs_nr_ctxs = 1;
 
@@ -963,6 +967,8 @@
 		pr_err("%s: dbgfs init failed\n", __func__);
 	}
 
+out:
+	mutex_unlock(&damon_dbgfs_lock);
 	return rc;
 }
 
diff --git a/mm/highmem.c b/mm/highmem.c
index 88f65f1..7626790 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -359,7 +359,6 @@
 }
 EXPORT_SYMBOL(kunmap_high);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2)
 {
@@ -416,7 +415,6 @@
 	BUG_ON((start1 | start2 | end1 | end2) != 0);
 }
 EXPORT_SYMBOL(zero_user_segments);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* CONFIG_HIGHMEM */
 
 #ifdef CONFIG_KMAP_LOCAL
@@ -503,16 +501,22 @@
 
 static pte_t *__kmap_pte;
 
-static pte_t *kmap_get_pte(void)
+static pte_t *kmap_get_pte(unsigned long vaddr, int idx)
 {
+	if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY))
+		/*
+		 * Set by the arch if __kmap_pte[-idx] does not produce
+		 * the correct entry.
+		 */
+		return virt_to_kpte(vaddr);
 	if (!__kmap_pte)
 		__kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
-	return __kmap_pte;
+	return &__kmap_pte[-idx];
 }
 
 void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
 {
-	pte_t pteval, *kmap_pte = kmap_get_pte();
+	pte_t pteval, *kmap_pte;
 	unsigned long vaddr;
 	int idx;
 
@@ -524,9 +528,10 @@
 	preempt_disable();
 	idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-	BUG_ON(!pte_none(*(kmap_pte - idx)));
+	kmap_pte = kmap_get_pte(vaddr, idx);
+	BUG_ON(!pte_none(*kmap_pte));
 	pteval = pfn_pte(pfn, prot);
-	arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval);
+	arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
 	arch_kmap_local_post_map(vaddr, pteval);
 	current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
 	preempt_enable();
@@ -559,7 +564,7 @@
 void kunmap_local_indexed(void *vaddr)
 {
 	unsigned long addr = (unsigned long) vaddr & PAGE_MASK;
-	pte_t *kmap_pte = kmap_get_pte();
+	pte_t *kmap_pte;
 	int idx;
 
 	if (addr < __fix_to_virt(FIX_KMAP_END) ||
@@ -584,8 +589,9 @@
 	idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr);
 	WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
 
+	kmap_pte = kmap_get_pte(addr, idx);
 	arch_kmap_local_pre_unmap(addr);
-	pte_clear(&init_mm, addr, kmap_pte - idx);
+	pte_clear(&init_mm, addr, kmap_pte);
 	arch_kmap_local_post_unmap(addr);
 	current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
 	kmap_local_idx_pop();
@@ -607,7 +613,7 @@
 void __kmap_local_sched_out(void)
 {
 	struct task_struct *tsk = current;
-	pte_t *kmap_pte = kmap_get_pte();
+	pte_t *kmap_pte;
 	int i;
 
 	/* Clear kmaps */
@@ -634,8 +640,9 @@
 		idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
 
 		addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+		kmap_pte = kmap_get_pte(addr, idx);
 		arch_kmap_local_pre_unmap(addr);
-		pte_clear(&init_mm, addr, kmap_pte - idx);
+		pte_clear(&init_mm, addr, kmap_pte);
 		arch_kmap_local_post_unmap(addr);
 	}
 }
@@ -643,7 +650,7 @@
 void __kmap_local_sched_in(void)
 {
 	struct task_struct *tsk = current;
-	pte_t *kmap_pte = kmap_get_pte();
+	pte_t *kmap_pte;
 	int i;
 
 	/* Restore kmaps */
@@ -663,7 +670,8 @@
 		/* See comment in __kmap_local_sched_out() */
 		idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
 		addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-		set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
+		kmap_pte = kmap_get_pte(addr, idx);
+		set_pte_at(&init_mm, addr, kmap_pte, pteval);
 		arch_kmap_local_post_map(addr, pteval);
 	}
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e09159c..abcd178 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1037,8 +1037,10 @@
 	 */
 	struct resv_map *reservations = vma_resv_map(vma);
 
-	if (reservations && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+	if (reservations && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+		resv_map_put_hugetlb_cgroup_uncharge_info(reservations);
 		kref_put(&reservations->refs, resv_map_release);
+	}
 
 	reset_vma_resv_huge_pages(vma);
 }
@@ -4917,9 +4919,9 @@
 
 		move_huge_pte(vma, old_addr, new_addr, src_pte);
 	}
-	i_mmap_unlock_write(mapping);
 	flush_tlb_range(vma, old_end - len, old_end);
 	mmu_notifier_invalidate_range_end(&range);
+	i_mmap_unlock_write(mapping);
 
 	return len + old_addr - old_end;
 }
@@ -4937,6 +4939,7 @@
 	struct hstate *h = hstate_vma(vma);
 	unsigned long sz = huge_page_size(h);
 	struct mmu_notifier_range range;
+	bool force_flush = false;
 
 	WARN_ON(!is_vm_hugetlb_page(vma));
 	BUG_ON(start & ~huge_page_mask(h));
@@ -4965,10 +4968,8 @@
 		ptl = huge_pte_lock(h, mm, ptep);
 		if (huge_pmd_unshare(mm, vma, &address, ptep)) {
 			spin_unlock(ptl);
-			/*
-			 * We just unmapped a page of PMDs by clearing a PUD.
-			 * The caller's TLB flush range should cover this area.
-			 */
+			tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
+			force_flush = true;
 			continue;
 		}
 
@@ -5025,6 +5026,22 @@
 	}
 	mmu_notifier_invalidate_range_end(&range);
 	tlb_end_vma(tlb, vma);
+
+	/*
+	 * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
+	 * could defer the flush until now, since by holding i_mmap_rwsem we
+	 * guaranteed that the last refernece would not be dropped. But we must
+	 * do the flushing before we return, as otherwise i_mmap_rwsem will be
+	 * dropped and the last reference to the shared PMDs page might be
+	 * dropped as well.
+	 *
+	 * In theory we could defer the freeing of the PMD pages as well, but
+	 * huge_pmd_unshare() relies on the exact page_count for the PMD page to
+	 * detect sharing, so we cannot defer the release of the page either.
+	 * Instead, do flush now.
+	 */
+	if (force_flush)
+		tlb_flush_mmu_tlbonly(tlb);
 }
 
 void __unmap_hugepage_range_final(struct mmu_gather *tlb,
@@ -5734,13 +5751,14 @@
 	int ret = -ENOMEM;
 	struct page *page;
 	int writable;
-	bool new_pagecache_page = false;
+	bool page_in_pagecache = false;
 
 	if (is_continue) {
 		ret = -EFAULT;
 		page = find_lock_page(mapping, idx);
 		if (!page)
 			goto out;
+		page_in_pagecache = true;
 	} else if (!*pagep) {
 		/* If a page already exists, then it's UFFDIO_COPY for
 		 * a non-missing case. Return -EEXIST.
@@ -5828,7 +5846,7 @@
 		ret = huge_add_to_page_cache(page, mapping, idx);
 		if (ret)
 			goto out_release_nounlock;
-		new_pagecache_page = true;
+		page_in_pagecache = true;
 	}
 
 	ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
@@ -5892,7 +5910,7 @@
 	if (vm_shared || is_continue)
 		unlock_page(page);
 out_release_nounlock:
-	if (!new_pagecache_page)
+	if (!page_in_pagecache)
 		restore_reserve_on_error(h, dst_vma, dst_addr, page);
 	put_page(page);
 	goto out;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 781605e..6863a83 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5558,7 +5558,7 @@
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
-	VM_BUG_ON(compound && !folio_test_multi(folio));
+	VM_BUG_ON(compound && !folio_test_large(folio));
 
 	/*
 	 * Prevent mem_cgroup_migrate() from looking at
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f64ebb6..07c875f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -58,7 +58,6 @@
 #include <linux/ratelimit.h>
 #include <linux/page-isolation.h>
 #include <linux/pagewalk.h>
-#include <linux/shmem_fs.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -868,7 +867,6 @@
 {
 	int ret;
 	struct address_space *mapping;
-	bool extra_pins;
 
 	delete_from_lru_cache(p);
 
@@ -898,23 +896,17 @@
 	}
 
 	/*
-	 * The shmem page is kept in page cache instead of truncating
-	 * so is expected to have an extra refcount after error-handling.
-	 */
-	extra_pins = shmem_mapping(mapping);
-
-	/*
 	 * Truncation is a bit tricky. Enable it per file system for now.
 	 *
 	 * Open: to take i_rwsem or not for this? Right now we don't.
 	 */
 	ret = truncate_error_page(p, page_to_pfn(p), mapping);
-	if (has_extra_refcount(ps, p, extra_pins))
-		ret = MF_FAILED;
-
 out:
 	unlock_page(p);
 
+	if (has_extra_refcount(ps, p, false))
+		ret = MF_FAILED;
+
 	return ret;
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2d498bb..a613f8e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2967,7 +2967,7 @@
  */
 void folio_wait_stable(struct folio *folio)
 {
-	if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
+	if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
 		folio_wait_writeback(folio);
 }
 EXPORT_SYMBOL_GPL(folio_wait_stable);
diff --git a/mm/shmem.c b/mm/shmem.c
index f0eee4e..18f93c2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2303,6 +2303,7 @@
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
 		cache_no_acl(inode);
+		mapping_set_large_folios(inode->i_mapping);
 
 		switch (mode & S_IFMT) {
 		default:
@@ -2456,7 +2457,6 @@
 	struct inode *inode = mapping->host;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	pgoff_t index = pos >> PAGE_SHIFT;
-	int ret = 0;
 
 	/* i_rwsem is held by caller */
 	if (unlikely(info->seals & (F_SEAL_GROW |
@@ -2467,15 +2467,7 @@
 			return -EPERM;
 	}
 
-	ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
-
-	if (*pagep && PageHWPoison(*pagep)) {
-		unlock_page(*pagep);
-		put_page(*pagep);
-		ret = -EIO;
-	}
-
-	return ret;
+	return shmem_getpage(inode, index, pagep, SGP_WRITE);
 }
 
 static int
@@ -2562,12 +2554,6 @@
 			if (sgp == SGP_CACHE)
 				set_page_dirty(page);
 			unlock_page(page);
-
-			if (PageHWPoison(page)) {
-				put_page(page);
-				error = -EIO;
-				break;
-			}
 		}
 
 		/*
@@ -3107,8 +3093,7 @@
 		page = find_get_page(inode->i_mapping, 0);
 		if (!page)
 			return ERR_PTR(-ECHILD);
-		if (PageHWPoison(page) ||
-		    !PageUptodate(page)) {
+		if (!PageUptodate(page)) {
 			put_page(page);
 			return ERR_PTR(-ECHILD);
 		}
@@ -3116,11 +3101,6 @@
 		error = shmem_getpage(inode, 0, &page, SGP_READ);
 		if (error)
 			return ERR_PTR(error);
-		if (page && PageHWPoison(page)) {
-			unlock_page(page);
-			put_page(page);
-			return ERR_PTR(-ECHILD);
-		}
 		unlock_page(page);
 	}
 	set_delayed_call(done, shmem_put_link, page);
@@ -3771,13 +3751,6 @@
 	kmem_cache_destroy(shmem_inode_cachep);
 }
 
-/* Keep the page in page cache instead of truncating it */
-static int shmem_error_remove_page(struct address_space *mapping,
-				   struct page *page)
-{
-	return 0;
-}
-
 const struct address_space_operations shmem_aops = {
 	.writepage	= shmem_writepage,
 	.set_page_dirty	= __set_page_dirty_no_writeback,
@@ -3788,7 +3761,7 @@
 #ifdef CONFIG_MIGRATION
 	.migratepage	= migrate_page,
 #endif
-	.error_remove_page = shmem_error_remove_page,
+	.error_remove_page = generic_error_remove_page,
 };
 EXPORT_SYMBOL(shmem_aops);
 
@@ -3898,7 +3871,7 @@
 	.parameters	= shmem_fs_parameters,
 #endif
 	.kill_sb	= kill_litter_super,
-	.fs_flags	= FS_USERNS_MOUNT | FS_THP_SUPPORT,
+	.fs_flags	= FS_USERNS_MOUNT,
 };
 
 int __init shmem_init(void)
@@ -4199,10 +4172,6 @@
 		page = ERR_PTR(error);
 	else
 		unlock_page(page);
-
-	if (PageHWPoison(page))
-		page = ERR_PTR(-EIO);
-
 	return page;
 #else
 	/*
diff --git a/mm/slab.c b/mm/slab.c
index da132a9..ca4822f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3733,14 +3733,13 @@
 	if (!cachep)
 		return;
 
+	trace_kmem_cache_free(_RET_IP_, objp, cachep->name);
 	local_irq_save(flags);
 	debug_check_no_locks_freed(objp, cachep->object_size);
 	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(objp, cachep->object_size);
 	__cache_free(cachep, objp, _RET_IP_);
 	local_irq_restore(flags);
-
-	trace_kmem_cache_free(_RET_IP_, objp, cachep->name);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
diff --git a/mm/slab.h b/mm/slab.h
index 58c01a3..56ad7ee 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -147,7 +147,7 @@
 #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
 			  SLAB_TEMPORARY | SLAB_ACCOUNT)
 #else
-#define SLAB_CACHE_FLAGS (0)
+#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
 #endif
 
 /* Common flags available with current configuration */
diff --git a/mm/slob.c b/mm/slob.c
index 74d3f6e..03deee1 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -666,6 +666,7 @@
 void kmem_cache_free(struct kmem_cache *c, void *b)
 {
 	kmemleak_free_recursive(b, c->flags);
+	trace_kmem_cache_free(_RET_IP_, b, c->name);
 	if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
 		struct slob_rcu *slob_rcu;
 		slob_rcu = b + (c->size - sizeof(struct slob_rcu));
@@ -674,8 +675,6 @@
 	} else {
 		__kmem_cache_free(b, c->size);
 	}
-
-	trace_kmem_cache_free(_RET_IP_, b, c->name);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
diff --git a/mm/slub.c b/mm/slub.c
index f7368bf..a862682 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3526,8 +3526,8 @@
 	s = cache_from_obj(s, x);
 	if (!s)
 		return;
-	slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
 	trace_kmem_cache_free(_RET_IP_, x, s->name);
+	slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
diff --git a/mm/swap.c b/mm/swap.c
index 1841c24..e8c9dc6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -156,6 +156,7 @@
 	}
 
 	free_unref_page_list(pages);
+	INIT_LIST_HEAD(pages);
 }
 EXPORT_SYMBOL(put_pages_list);
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0780c2a..ac6f036 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -232,11 +232,6 @@
 		goto out;
 	}
 
-	if (PageHWPoison(page)) {
-		ret = -EIO;
-		goto out_release;
-	}
-
 	ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
 				       page, false, wp_copy);
 	if (ret)
diff --git a/mm/util.c b/mm/util.c
index e58151a..741ba32 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -670,7 +670,7 @@
 {
 	long i, nr;
 
-	if (folio_test_single(folio))
+	if (!folio_test_large(folio))
 		return atomic_read(&folio->_mapcount) >= 0;
 	if (atomic_read(folio_mapcount_ptr(folio)) >= 0)
 		return true;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a3a0a5e..abaa5d9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -184,9 +184,6 @@
 	if (err)
 		goto out_unregister_netdev;
 
-	/* Account for reference in struct vlan_dev_priv */
-	dev_hold(real_dev);
-
 	vlan_stacked_transfer_operstate(real_dev, dev, vlan);
 	linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ab6dee2..a54535c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -615,6 +615,9 @@
 	if (!vlan->vlan_pcpu_stats)
 		return -ENOMEM;
 
+	/* Get vlan's reference to real_dev */
+	dev_hold(real_dev);
+
 	return 0;
 }
 
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 013cbdb..6a6898e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1153,12 +1153,11 @@
 
 int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 {
-	int err = 0;
+	int err;
 
 	dout("init\n");
 	memset(monc, 0, sizeof(*monc));
 	monc->client = cl;
-	monc->monmap = NULL;
 	mutex_init(&monc->mutex);
 
 	err = build_initial_monmap(monc);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ff8624a..1c58155 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5310,14 +5310,14 @@
 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 }
 
-static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
-				     u64 src_snapid, u64 src_version,
-				     struct ceph_object_id *src_oid,
-				     struct ceph_object_locator *src_oloc,
-				     u32 src_fadvise_flags,
-				     u32 dst_fadvise_flags,
-				     u32 truncate_seq, u64 truncate_size,
-				     u8 copy_from_flags)
+int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+			      u64 src_snapid, u64 src_version,
+			      struct ceph_object_id *src_oid,
+			      struct ceph_object_locator *src_oloc,
+			      u32 src_fadvise_flags,
+			      u32 dst_fadvise_flags,
+			      u32 truncate_seq, u64 truncate_size,
+			      u8 copy_from_flags)
 {
 	struct ceph_osd_req_op *op;
 	struct page **pages;
@@ -5346,49 +5346,7 @@
 				 op->indata_len, 0, false, true);
 	return 0;
 }
-
-int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
-			u64 src_snapid, u64 src_version,
-			struct ceph_object_id *src_oid,
-			struct ceph_object_locator *src_oloc,
-			u32 src_fadvise_flags,
-			struct ceph_object_id *dst_oid,
-			struct ceph_object_locator *dst_oloc,
-			u32 dst_fadvise_flags,
-			u32 truncate_seq, u64 truncate_size,
-			u8 copy_from_flags)
-{
-	struct ceph_osd_request *req;
-	int ret;
-
-	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
-	if (!req)
-		return -ENOMEM;
-
-	req->r_flags = CEPH_OSD_FLAG_WRITE;
-
-	ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
-	ceph_oid_copy(&req->r_t.base_oid, dst_oid);
-
-	ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
-					src_oloc, src_fadvise_flags,
-					dst_fadvise_flags, truncate_seq,
-					truncate_size, copy_from_flags);
-	if (ret)
-		goto out;
-
-	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
-	if (ret)
-		goto out;
-
-	ceph_osdc_start_request(osdc, req, false);
-	ret = ceph_osdc_wait_request(osdc, req);
-
-out:
-	ceph_osdc_put_request(req);
-	return ret;
-}
-EXPORT_SYMBOL(ceph_osdc_copy_from);
+EXPORT_SYMBOL(osd_req_op_copy_from_init);
 
 int __init ceph_osdc_setup(void)
 {
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5ba4f94..5ad72db 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4229,7 +4229,9 @@
 	WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
 		cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
 		cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
-	WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+	if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+		return;
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
diff --git a/net/core/filter.c b/net/core/filter.c
index e471c9b..6102f09 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7162,6 +7162,8 @@
 #endif
 	case BPF_FUNC_sk_storage_get:
 		return &bpf_sk_storage_get_cg_sock_proto;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -10327,6 +10329,8 @@
 		return &sk_reuseport_load_bytes_relative_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_ptr_cookie_proto;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -10833,6 +10837,8 @@
 	case BPF_FUNC_skc_to_unix_sock:
 		func = &bpf_skc_to_unix_sock_proto;
 		break;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 47931c8..72ba027 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1779,6 +1779,7 @@
 {
 	neigh_tables[index] = NULL;
 	/* It is not clean... Fix it to unload IPv6 module safely */
+	cancel_delayed_work_sync(&tbl->managed_work);
 	cancel_delayed_work_sync(&tbl->gc_work);
 	del_timer_sync(&tbl->proxy_timer);
 	pneigh_queue_purge(&tbl->proxy_queue);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b60e43..1a69784 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -49,12 +49,6 @@
 	 * which is the XDP_TX use-case.
 	 */
 	if (pool->p.flags & PP_FLAG_DMA_MAP) {
-		/* DMA-mapping is not supported on 32-bit systems with
-		 * 64-bit DMA mapping.
-		 */
-		if (sizeof(dma_addr_t) > sizeof(unsigned long))
-			return -EOPNOTSUPP;
-
 		if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
 		    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
 			return -EINVAL;
@@ -75,6 +69,10 @@
 		 */
 	}
 
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+	    pool->p.flags & PP_FLAG_PAGE_FRAG)
+		return -EINVAL;
+
 	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
 		return -ENOMEM;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f2b2f2..41e91d0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2124,8 +2124,10 @@
 	newsk->sk_prot_creator = prot;
 
 	/* SANITY */
-	if (likely(newsk->sk_net_refcnt))
+	if (likely(newsk->sk_net_refcnt)) {
 		get_net(sock_net(newsk));
+		sock_inuse_add(sock_net(newsk), 1);
+	}
 	sk_node_init(&newsk->sk_node);
 	sock_lock_init(newsk);
 	bh_lock_sock(newsk);
@@ -2197,8 +2199,6 @@
 	newsk->sk_err_soft = 0;
 	newsk->sk_priority = 0;
 	newsk->sk_incoming_cpu = raw_smp_processor_id();
-	if (likely(newsk->sk_net_refcnt))
-		sock_inuse_add(sock_net(newsk), 1);
 
 	/* Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.rst for details)
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 65e9bc1..20bcf86 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1719,7 +1719,7 @@
 	struct ethtool_coalesce coalesce;
 	int ret;
 
-	if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce)
+	if (!dev->ethtool_ops->set_coalesce || !dev->ethtool_ops->get_coalesce)
 		return -EOPNOTSUPP;
 
 	ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 2cf02b4..4bb9401 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -205,6 +205,8 @@
 		    offsetof(struct tcp_congestion_ops, release))
 			return &bpf_sk_getsockopt_proto;
 		return NULL;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ec73a0d..323e622 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2591,7 +2591,7 @@
 free:
 	kfree(t);
 out:
-	return -ENOBUFS;
+	return -ENOMEM;
 }
 
 static void __devinet_sysctl_unregister(struct net *net,
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 9e81007..5dbd4b5 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1899,15 +1899,36 @@
 /* if any FIB entries reference this nexthop, any dst entries
  * need to be regenerated
  */
-static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
+			      struct nexthop *replaced_nh)
 {
 	struct fib6_info *f6i;
+	struct nh_group *nhg;
+	int i;
 
 	if (!list_empty(&nh->fi_list))
 		rt_cache_flush(net);
 
 	list_for_each_entry(f6i, &nh->f6i_list, nh_list)
 		ipv6_stub->fib6_update_sernum(net, f6i);
+
+	/* if an IPv6 group was replaced, we have to release all old
+	 * dsts to make sure all refcounts are released
+	 */
+	if (!replaced_nh->is_group)
+		return;
+
+	/* new dsts must use only the new nexthop group */
+	synchronize_net();
+
+	nhg = rtnl_dereference(replaced_nh->nh_grp);
+	for (i = 0; i < nhg->num_nh; i++) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
+
+		if (nhi->family == AF_INET6)
+			ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
+	}
 }
 
 static int replace_nexthop_grp(struct net *net, struct nexthop *old,
@@ -2247,7 +2268,7 @@
 		err = replace_nexthop_single(net, old, new, extack);
 
 	if (!err) {
-		nh_rt_cache_flush(net, old);
+		nh_rt_cache_flush(net, old, new);
 
 		__remove_nexthop(net, new, NULL);
 		nexthop_put(new);
@@ -2544,11 +2565,15 @@
 	/* sets nh_dev if successful */
 	err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
 				      extack);
-	if (err)
+	if (err) {
+		/* IPv6 is not enabled, don't call fib6_nh_release */
+		if (err == -EAFNOSUPPORT)
+			goto out;
 		ipv6_stub->fib6_nh_release(fib6_nh);
-	else
+	} else {
 		nh->nh_flags = fib6_nh->fib_nh_flags;
-
+	}
+out:
 	return err;
 }
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b7796b4..bbb3d39 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1758,6 +1758,9 @@
 {
 	skb_frag_t *frag;
 
+	if (unlikely(offset_skb >= skb->len))
+		return NULL;
+
 	offset_skb -= skb_headlen(skb);
 	if ((int)offset_skb < 0 || skb_has_frag_list(skb))
 		return NULL;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 5e9d9c5..e07837e 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -330,8 +330,6 @@
 		return;
 
 	if (tcp_in_slow_start(tp)) {
-		if (hystart && after(ack, ca->end_seq))
-			bictcp_hystart_reset(sk);
 		acked = tcp_slow_start(tp, acked);
 		if (!acked)
 			return;
@@ -391,6 +389,9 @@
 	struct bictcp *ca = inet_csk_ca(sk);
 	u32 threshold;
 
+	if (after(tp->snd_una, ca->end_seq))
+		bictcp_hystart_reset(sk);
+
 	if (hystart_detect & HYSTART_ACK_TRAIN) {
 		u32 now = bictcp_clock_us(sk);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 319dd7b..8bcecdd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1807,6 +1807,17 @@
 		skb = skb_recv_udp(sk, 0, 1, &err);
 		if (!skb)
 			return err;
+
+		if (udp_lib_checksum_complete(skb)) {
+			__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+					IS_UDPLITE(sk));
+			__UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+					IS_UDPLITE(sk));
+			atomic_inc(&sk->sk_drops);
+			kfree_skb(skb);
+			continue;
+		}
+
 		used = recv_actor(desc, skb, 0, skb->len);
 		if (used <= 0) {
 			if (!copied)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0c4da16..dab4a04 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1026,6 +1026,7 @@
 	.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
 	.fib6_nh_init	   = fib6_nh_init,
 	.fib6_nh_release   = fib6_nh_release,
+	.fib6_nh_release_dsts = fib6_nh_release_dsts,
 	.fib6_update_sernum = fib6_update_sernum_stub,
 	.fib6_rt_update	   = fib6_rt_update,
 	.ip6_del_rt	   = ip6_del_rt,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ed2f061..f0bac6f 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -808,6 +808,12 @@
 		struct tcphdr *th;
 
 		offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+
+		if (offset < 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
 		uh = (void *)(skb->data + offset);
 		th = (void *)(skb->data + offset);
 		hdr_len += offset;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2f044a4..ff4e83e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -174,7 +174,7 @@
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
 	if (skb_dst(skb)->xfrm) {
-		IPCB(skb)->flags |= IPSKB_REROUTED;
+		IP6CB(skb)->flags |= IP6SKB_REROUTED;
 		return dst_output(net, sk, skb);
 	}
 #endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3ae25b8..42d60c7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3680,6 +3680,25 @@
 	fib_nh_common_release(&fib6_nh->nh_common);
 }
 
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
+{
+	int cpu;
+
+	if (!fib6_nh->rt6i_pcpu)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct rt6_info *pcpu_rt, **ppcpu_rt;
+
+		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+		pcpu_rt = xchg(ppcpu_rt, NULL);
+		if (pcpu_rt) {
+			dst_dev_put(&pcpu_rt->dst);
+			dst_release(&pcpu_rt->dst);
+		}
+	}
+}
+
 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      gfp_t gfp_flags,
 					      struct netlink_ext_ack *extack)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e2b791c..bd3d319 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -80,7 +80,8 @@
 	}
 
 	/* also validate MU-MIMO change */
-	monitor_sdata = rtnl_dereference(local->monitor_sdata);
+	monitor_sdata = wiphy_dereference(local->hw.wiphy,
+					  local->monitor_sdata);
 
 	if (!monitor_sdata &&
 	    (params->vht_mumimo_groups || params->vht_mumimo_follow_addr))
@@ -840,7 +841,8 @@
 
 	mutex_lock(&local->mtx);
 	if (local->use_chanctx) {
-		sdata = rtnl_dereference(local->monitor_sdata);
+		sdata = wiphy_dereference(local->hw.wiphy,
+					  local->monitor_sdata);
 		if (sdata) {
 			ieee80211_vif_release_channel(sdata);
 			ret = ieee80211_vif_use_channel(sdata, chandef,
@@ -2707,7 +2709,8 @@
 		sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
 
 		if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
-			sdata = rtnl_dereference(local->monitor_sdata);
+			sdata = wiphy_dereference(local->hw.wiphy,
+						  local->monitor_sdata);
 			if (!sdata)
 				return -EOPNOTSUPP;
 		}
@@ -2767,7 +2770,8 @@
 	mutex_unlock(&local->iflist_mtx);
 
 	if (has_monitor) {
-		sdata = rtnl_dereference(local->monitor_sdata);
+		sdata = wiphy_dereference(local->hw.wiphy,
+					  local->monitor_sdata);
 		if (sdata) {
 			sdata->user_power_level = local->user_power_level;
 			if (txp_type != sdata->vif.bss_conf.txpower_type)
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 9a2145c..20aa5cc 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -588,7 +588,7 @@
 	 */
 	if (local->suspended) {
 		WARN_ON(local->wowlan);
-		WARN_ON(rtnl_dereference(local->monitor_sdata));
+		WARN_ON(rcu_access_pointer(local->monitor_sdata));
 		return;
 	}
 
@@ -961,6 +961,7 @@
 		return 0;
 
 	ASSERT_RTNL();
+	lockdep_assert_wiphy(local->hw.wiphy);
 
 	if (local->monitor_sdata)
 		return 0;
@@ -1028,6 +1029,7 @@
 		return;
 
 	ASSERT_RTNL();
+	lockdep_assert_wiphy(local->hw.wiphy);
 
 	mutex_lock(&local->iflist_mtx);
 
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index fb3aaa3..b71a142 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -72,19 +72,19 @@
 #endif
 
 static inline void
-ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes)
+ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, int bytes)
 {
 #ifdef CONFIG_MAC80211_LEDS
-	if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
+	if (atomic_read(&local->tpt_led_active))
 		local->tpt_led_trigger->tx_bytes += bytes;
 #endif
 }
 
 static inline void
-ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes)
+ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, int bytes)
 {
 #ifdef CONFIG_MAC80211_LEDS
-	if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
+	if (atomic_read(&local->tpt_led_active))
 		local->tpt_led_trigger->rx_bytes += bytes;
 #endif
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fc5c608..9541a4c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -364,7 +364,7 @@
 	 * the compiler to think we have walked past the end of the
 	 * struct member.
 	 */
-	pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional];
+	pos = (void *)&rthdr->it_optional[it_present + 1 - rthdr->it_optional];
 
 	/* the order of the following fields is important */
 
@@ -1952,7 +1952,8 @@
 		int keyid = rx->sta->ptk_idx;
 		sta_ptk = rcu_dereference(rx->sta->ptk[keyid]);
 
-		if (ieee80211_has_protected(fc)) {
+		if (ieee80211_has_protected(fc) &&
+		    !(status->flag & RX_FLAG_IV_STRIPPED)) {
 			cs = rx->sta->cipher_scheme;
 			keyid = ieee80211_get_keyid(rx->skb, cs);
 
@@ -4863,6 +4864,7 @@
 	struct ieee80211_rate *rate = NULL;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
 	WARN_ON_ONCE(softirq_count() == 0);
 
@@ -4959,9 +4961,9 @@
 	if (!(status->flag & RX_FLAG_8023))
 		skb = ieee80211_rx_monitor(local, skb, rate);
 	if (skb) {
-		ieee80211_tpt_led_trig_rx(local,
-					  ((struct ieee80211_hdr *)skb->data)->frame_control,
-					  skb->len);
+		if ((status->flag & RX_FLAG_8023) ||
+			ieee80211_is_data_present(hdr->frame_control))
+			ieee80211_tpt_led_trig_rx(local, skb->len);
 
 		if (status->flag & RX_FLAG_8023)
 			__ieee80211_rx_handle_8023(hw, pubsta, skb, list);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index a756a19..278945e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1721,21 +1721,19 @@
  * Returns false if the frame couldn't be transmitted but was queued instead.
  */
 static bool __ieee80211_tx(struct ieee80211_local *local,
-			   struct sk_buff_head *skbs, int led_len,
-			   struct sta_info *sta, bool txpending)
+			   struct sk_buff_head *skbs, struct sta_info *sta,
+			   bool txpending)
 {
 	struct ieee80211_tx_info *info;
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_vif *vif;
 	struct sk_buff *skb;
 	bool result;
-	__le16 fc;
 
 	if (WARN_ON(skb_queue_empty(skbs)))
 		return true;
 
 	skb = skb_peek(skbs);
-	fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
 	info = IEEE80211_SKB_CB(skb);
 	sdata = vif_to_sdata(info->control.vif);
 	if (sta && !sta->uploaded)
@@ -1769,8 +1767,6 @@
 
 	result = ieee80211_tx_frags(local, vif, sta, skbs, txpending);
 
-	ieee80211_tpt_led_trig_tx(local, fc, led_len);
-
 	WARN_ON_ONCE(!skb_queue_empty(skbs));
 
 	return result;
@@ -1920,7 +1916,6 @@
 	ieee80211_tx_result res_prepare;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	bool result = true;
-	int led_len;
 
 	if (unlikely(skb->len < 10)) {
 		dev_kfree_skb(skb);
@@ -1928,7 +1923,6 @@
 	}
 
 	/* initialises tx */
-	led_len = skb->len;
 	res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb);
 
 	if (unlikely(res_prepare == TX_DROP)) {
@@ -1951,8 +1945,7 @@
 		return true;
 
 	if (!invoke_tx_handlers_late(&tx))
-		result = __ieee80211_tx(local, &tx.skbs, led_len,
-					tx.sta, txpending);
+		result = __ieee80211_tx(local, &tx.skbs, tx.sta, txpending);
 
 	return result;
 }
@@ -4175,6 +4168,7 @@
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	struct sk_buff *next;
+	int len = skb->len;
 
 	if (unlikely(skb->len < ETH_HLEN)) {
 		kfree_skb(skb);
@@ -4221,10 +4215,8 @@
 		}
 	} else {
 		/* we cannot process non-linear frames on this path */
-		if (skb_linearize(skb)) {
-			kfree_skb(skb);
-			goto out;
-		}
+		if (skb_linearize(skb))
+			goto out_free;
 
 		/* the frame could be fragmented, software-encrypted, and other
 		 * things so we cannot really handle checksum offload with it -
@@ -4258,7 +4250,10 @@
 	goto out;
  out_free:
 	kfree_skb(skb);
+	len = 0;
  out:
+	if (len)
+		ieee80211_tpt_led_trig_tx(local, len);
 	rcu_read_unlock();
 }
 
@@ -4396,8 +4391,7 @@
 }
 
 static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
-			      struct sk_buff *skb, int led_len,
-			      struct sta_info *sta,
+			      struct sk_buff *skb, struct sta_info *sta,
 			      bool txpending)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -4410,6 +4404,8 @@
 	if (sta)
 		sk_pacing_shift_update(skb->sk, local->hw.tx_sk_pacing_shift);
 
+	ieee80211_tpt_led_trig_tx(local, skb->len);
+
 	if (ieee80211_queue_skb(local, sdata, sta, skb))
 		return true;
 
@@ -4498,7 +4494,7 @@
 	if (key)
 		info->control.hw_key = &key->conf;
 
-	ieee80211_tx_8023(sdata, skb, skb->len, sta, false);
+	ieee80211_tx_8023(sdata, skb, sta, false);
 
 	return;
 
@@ -4637,7 +4633,7 @@
 		if (IS_ERR(sta) || (sta && !sta->uploaded))
 			sta = NULL;
 
-		result = ieee80211_tx_8023(sdata, skb, skb->len, sta, true);
+		result = ieee80211_tx_8023(sdata, skb, sta, true);
 	} else {
 		struct sk_buff_head skbs;
 
@@ -4647,7 +4643,7 @@
 		hdr = (struct ieee80211_hdr *)skb->data;
 		sta = sta_info_get(sdata, hdr->addr1);
 
-		result = __ieee80211_tx(local, &skbs, skb->len, sta, true);
+		result = __ieee80211_tx(local, &skbs, sta, true);
 	}
 
 	return result;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 39fa2a5..43df2f0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -796,7 +796,7 @@
 
 	sdata = rcu_dereference_check(local->monitor_sdata,
 				      lockdep_is_held(&local->iflist_mtx) ||
-				      lockdep_rtnl_is_held());
+				      lockdep_is_held(&local->hw.wiphy->mtx));
 	if (sdata &&
 	    (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only ||
 	     sdata->flags & IEEE80211_SDATA_IN_DRIVER))
@@ -2381,7 +2381,7 @@
 				   IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
 
 	/* add interfaces */
-	sdata = rtnl_dereference(local->monitor_sdata);
+	sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
 	if (sdata) {
 		/* in HW restart it exists already */
 		WARN_ON(local->resuming);
@@ -2426,7 +2426,8 @@
 				WARN_ON(drv_add_chanctx(local, ctx));
 		mutex_unlock(&local->chanctx_mtx);
 
-		sdata = rtnl_dereference(local->monitor_sdata);
+		sdata = wiphy_dereference(local->hw.wiphy,
+					  local->monitor_sdata);
 		if (sdata && ieee80211_sdata_running(sdata))
 			ieee80211_assign_chanctx(local, sdata);
 	}
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 9ea6004..62c6733 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -143,7 +143,6 @@
 u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
 			     struct sta_info *sta, struct sk_buff *skb)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct mac80211_qos_map *qos_map;
 	bool qos;
 
@@ -156,7 +155,7 @@
 	else
 		qos = false;
 
-	if (!qos || (info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER)) {
+	if (!qos) {
 		skb->priority = 0; /* required for correct WPA/11i MIC */
 		return IEEE80211_AC_BE;
 	}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 7c3420a..fe98e4f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -422,28 +422,6 @@
 	return false;
 }
 
-/* MP_JOIN client subflow must wait for 4th ack before sending any data:
- * TCP can't schedule delack timer before the subflow is fully established.
- * MPTCP uses the delack timer to do 3rd ack retransmissions
- */
-static void schedule_3rdack_retransmission(struct sock *sk)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned long timeout;
-
-	/* reschedule with a timeout above RTT, as we must look only for drop */
-	if (tp->srtt_us)
-		timeout = tp->srtt_us << 1;
-	else
-		timeout = TCP_TIMEOUT_INIT;
-
-	WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
-	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
-	icsk->icsk_ack.timeout = timeout;
-	sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
-}
-
 static void clear_3rdack_retransmission(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -526,7 +504,15 @@
 		*size = TCPOLEN_MPTCP_MPJ_ACK;
 		pr_debug("subflow=%p", subflow);
 
-		schedule_3rdack_retransmission(sk);
+		/* we can use the full delegate action helper only from BH context
+		 * If we are in process context - sk is flushing the backlog at
+		 * socket lock release time - just set the appropriate flag, will
+		 * be handled by the release callback
+		 */
+		if (sock_owned_by_user(sk))
+			set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status);
+		else
+			mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK);
 		return true;
 	}
 	return false;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b7e32e3..c82a76d2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1596,7 +1596,8 @@
 			if (!xmit_ssk)
 				goto out;
 			if (xmit_ssk != ssk) {
-				mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+				mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
+						       MPTCP_DELEGATE_SEND);
 				goto out;
 			}
 
@@ -2943,7 +2944,7 @@
 		if (xmit_ssk == ssk)
 			__mptcp_subflow_push_pending(sk, ssk);
 		else if (xmit_ssk)
-			mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+			mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
 	} else {
 		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
 	}
@@ -2993,18 +2994,50 @@
 	__mptcp_update_rmem(sk);
 }
 
+/* MP_JOIN client subflow must wait for 4th ack before sending any data:
+ * TCP can't schedule delack timer before the subflow is fully established.
+ * MPTCP uses the delack timer to do 3rd ack retransmissions
+ */
+static void schedule_3rdack_retransmission(struct sock *ssk)
+{
+	struct inet_connection_sock *icsk = inet_csk(ssk);
+	struct tcp_sock *tp = tcp_sk(ssk);
+	unsigned long timeout;
+
+	if (mptcp_subflow_ctx(ssk)->fully_established)
+		return;
+
+	/* reschedule with a timeout above RTT, as we must look only for drop */
+	if (tp->srtt_us)
+		timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
+	else
+		timeout = TCP_TIMEOUT_INIT;
+	timeout += jiffies;
+
+	WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
+	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+	icsk->icsk_ack.timeout = timeout;
+	sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
+}
+
 void mptcp_subflow_process_delegated(struct sock *ssk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 	struct sock *sk = subflow->conn;
 
-	mptcp_data_lock(sk);
-	if (!sock_owned_by_user(sk))
-		__mptcp_subflow_push_pending(sk, ssk);
-	else
-		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
-	mptcp_data_unlock(sk);
-	mptcp_subflow_delegated_done(subflow);
+	if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+		mptcp_data_lock(sk);
+		if (!sock_owned_by_user(sk))
+			__mptcp_subflow_push_pending(sk, ssk);
+		else
+			set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+		mptcp_data_unlock(sk);
+		mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
+	}
+	if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
+		schedule_3rdack_retransmission(ssk);
+		mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
+	}
 }
 
 static int mptcp_hash(struct sock *sk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 67a61ac..d87cc04 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -387,6 +387,7 @@
 DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
 
 #define MPTCP_DELEGATE_SEND		0
+#define MPTCP_DELEGATE_ACK		1
 
 /* MPTCP subflow context */
 struct mptcp_subflow_context {
@@ -492,23 +493,23 @@
 
 void mptcp_subflow_process_delegated(struct sock *ssk);
 
-static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
 {
 	struct mptcp_delegated_action *delegated;
 	bool schedule;
 
+	/* the caller held the subflow bh socket lock */
+	lockdep_assert_in_softirq();
+
 	/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
 	 * ensures the below list check sees list updates done prior to status
 	 * bit changes
 	 */
-	if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+	if (!test_and_set_bit(action, &subflow->delegated_status)) {
 		/* still on delegated list from previous scheduling */
 		if (!list_empty(&subflow->delegated_node))
 			return;
 
-		/* the caller held the subflow bh socket lock */
-		lockdep_assert_in_softirq();
-
 		delegated = this_cpu_ptr(&mptcp_delegated_actions);
 		schedule = list_empty(&delegated->head);
 		list_add_tail(&subflow->delegated_node, &delegated->head);
@@ -533,16 +534,16 @@
 
 static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
 {
-	return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+	return !!READ_ONCE(subflow->delegated_status);
 }
 
-static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
 {
 	/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
 	 * touching the status bit
 	 */
 	smp_wmb();
-	clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+	clear_bit(action, &subflow->delegated_status);
 }
 
 int mptcp_is_enabled(const struct net *net);
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index ba9ae48..dda8b76 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -18,6 +18,8 @@
 #include "internal.h"
 #include "ncsi-pkt.h"
 
+static const int padding_bytes = 26;
+
 u32 ncsi_calculate_checksum(unsigned char *data, int len)
 {
 	u32 checksum = 0;
@@ -213,12 +215,17 @@
 {
 	struct ncsi_cmd_oem_pkt *cmd;
 	unsigned int len;
+	int payload;
+	/* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2
+	 * requires payload to be padded with 0 to
+	 * 32-bit boundary before the checksum field.
+	 * Ensure the padding bytes are accounted for in
+	 * skb allocation
+	 */
 
+	payload = ALIGN(nca->payload, 4);
 	len = sizeof(struct ncsi_cmd_pkt_hdr) + 4;
-	if (nca->payload < 26)
-		len += 26;
-	else
-		len += nca->payload;
+	len += max(payload, padding_bytes);
 
 	cmd = skb_put_zero(skb, len);
 	memcpy(&cmd->mfr_id, nca->data, nca->payload);
@@ -272,6 +279,7 @@
 	struct net_device *dev = nd->dev;
 	int hlen = LL_RESERVED_SPACE(dev);
 	int tlen = dev->needed_tailroom;
+	int payload;
 	int len = hlen + tlen;
 	struct sk_buff *skb;
 	struct ncsi_request *nr;
@@ -281,14 +289,14 @@
 		return NULL;
 
 	/* NCSI command packet has 16-bytes header, payload, 4 bytes checksum.
+	 * Payload needs padding so that the checksum field following payload is
+	 * aligned to 32-bit boundary.
 	 * The packet needs padding if its payload is less than 26 bytes to
 	 * meet 64 bytes minimal ethernet frame length.
 	 */
 	len += sizeof(struct ncsi_cmd_pkt_hdr) + 4;
-	if (nca->payload < 26)
-		len += 26;
-	else
-		len += nca->payload;
+	payload = ALIGN(nca->payload, 4);
+	len += max(payload, padding_bytes);
 
 	/* Allocate skb */
 	skb = alloc_skb(len, GFP_ATOMIC);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e93c937a..51ad557a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1919,7 +1919,6 @@
 	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
 	int ret, pkts;
-	int conn_reuse_mode;
 	struct sock *sk;
 	int af = state->pf;
 
@@ -1997,15 +1996,16 @@
 	cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
 			     ipvs, af, skb, &iph);
 
-	conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
-	if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+	if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+		int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
 		bool old_ct = false, resched = false;
 
 		if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
 		    unlikely(!atomic_read(&cp->dest->weight))) {
 			resched = true;
 			old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
-		} else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+		} else if (conn_reuse_mode &&
+			   is_new_conn_expected(cp, conn_reuse_mode)) {
 			old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
 			if (!atomic_read(&cp->n_control)) {
 				resched = true;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f1e5443..c7708bd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1011,11 +1011,9 @@
 						   CTA_TUPLE_REPLY,
 						   filter->family,
 						   &filter->zone,
-						   filter->orig_flags);
-		if (err < 0) {
-			err = -EINVAL;
+						   filter->reply_flags);
+		if (err < 0)
 			goto err_filter;
-		}
 	}
 
 	return filter;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index d6bf1b2..b561e0a 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -65,11 +65,11 @@
 		       sizeof(struct in6_addr));
 		if (memcmp(&key->enc_ipv6.src, &in6addr_any,
 			   sizeof(struct in6_addr)))
-			memset(&key->enc_ipv6.src, 0xff,
+			memset(&mask->enc_ipv6.src, 0xff,
 			       sizeof(struct in6_addr));
 		if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
 			   sizeof(struct in6_addr)))
-			memset(&key->enc_ipv6.dst, 0xff,
+			memset(&mask->enc_ipv6.dst, 0xff,
 			       sizeof(struct in6_addr));
 		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
 		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index cbfe4e4..bd68993 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -22,7 +22,6 @@
 #include <linux/icmpv6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/ip.h>
 #include <net/sctp/checksum.h>
 
 static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 2f7cf5e..0f8bb0b 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -85,9 +85,9 @@
 	mutex_unlock(&list_mutex);
 
 	if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
-		return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff);
+		return sysfs_emit(buf, "%ld\n", time_diff);
 
-	return snprintf(buf, PAGE_SIZE, "0\n");
+	return sysfs_emit(buf, "0\n");
 }
 
 static void idletimer_tg_work(struct work_struct *work)
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 3c645c1..dc7a240 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -94,13 +94,13 @@
 
 	device_lock(&dev->dev);
 
-	if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
-		rc = -ERFKILL;
+	if (!device_is_registered(&dev->dev)) {
+		rc = -ENODEV;
 		goto error;
 	}
 
-	if (!device_is_registered(&dev->dev)) {
-		rc = -ENODEV;
+	if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
+		rc = -ERFKILL;
 		goto error;
 	}
 
@@ -1125,11 +1125,7 @@
 	if (rc)
 		pr_err("Could not register llcp device\n");
 
-	rc = nfc_genl_device_added(dev);
-	if (rc)
-		pr_debug("The userspace won't be notified that the device %s was added\n",
-			 dev_name(&dev->dev));
-
+	device_lock(&dev->dev);
 	dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
 				   RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
 	if (dev->rfkill) {
@@ -1138,6 +1134,12 @@
 			dev->rfkill = NULL;
 		}
 	}
+	device_unlock(&dev->dev);
+
+	rc = nfc_genl_device_added(dev);
+	if (rc)
+		pr_debug("The userspace won't be notified that the device %s was added\n",
+			 dev_name(&dev->dev));
 
 	return 0;
 }
@@ -1154,10 +1156,17 @@
 
 	pr_debug("dev_name=%s\n", dev_name(&dev->dev));
 
+	rc = nfc_genl_device_removed(dev);
+	if (rc)
+		pr_debug("The userspace won't be notified that the device %s "
+			 "was removed\n", dev_name(&dev->dev));
+
+	device_lock(&dev->dev);
 	if (dev->rfkill) {
 		rfkill_unregister(dev->rfkill);
 		rfkill_destroy(dev->rfkill);
 	}
+	device_unlock(&dev->dev);
 
 	if (dev->ops->check_presence) {
 		device_lock(&dev->dev);
@@ -1167,11 +1176,6 @@
 		cancel_work_sync(&dev->check_pres_work);
 	}
 
-	rc = nfc_genl_device_removed(dev);
-	if (rc)
-		pr_debug("The userspace won't be notified that the device %s "
-			 "was removed\n", dev_name(&dev->dev));
-
 	nfc_llcp_unregister_device(dev);
 
 	mutex_lock(&nfc_devlist_mutex);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 6fd873a..d253738 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -144,12 +144,15 @@
 {
 	int rc;
 
-	if (!test_bit(NCI_UP, &ndev->flags))
-		return -ENETDOWN;
-
 	/* Serialize all requests */
 	mutex_lock(&ndev->req_lock);
-	rc = __nci_request(ndev, req, opt, timeout);
+	/* check the state after obtaing the lock against any races
+	 * from nci_close_device when the device gets removed.
+	 */
+	if (test_bit(NCI_UP, &ndev->flags))
+		rc = __nci_request(ndev, req, opt, timeout);
+	else
+		rc = -ENETDOWN;
 	mutex_unlock(&ndev->req_lock);
 
 	return rc;
@@ -473,6 +476,11 @@
 
 	mutex_lock(&ndev->req_lock);
 
+	if (test_bit(NCI_UNREG, &ndev->flags)) {
+		rc = -ENODEV;
+		goto done;
+	}
+
 	if (test_bit(NCI_UP, &ndev->flags)) {
 		rc = -EALREADY;
 		goto done;
@@ -545,6 +553,10 @@
 static int nci_close_device(struct nci_dev *ndev)
 {
 	nci_req_cancel(ndev, ENODEV);
+
+	/* This mutex needs to be held as a barrier for
+	 * caller nci_unregister_device
+	 */
 	mutex_lock(&ndev->req_lock);
 
 	if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
@@ -582,8 +594,8 @@
 
 	del_timer_sync(&ndev->cmd_timer);
 
-	/* Clear flags */
-	ndev->flags = 0;
+	/* Clear flags except NCI_UNREG */
+	ndev->flags &= BIT(NCI_UNREG);
 
 	mutex_unlock(&ndev->req_lock);
 
@@ -1266,6 +1278,12 @@
 {
 	struct nci_conn_info *conn_info, *n;
 
+	/* This set_bit is not protected with specialized barrier,
+	 * However, it is fine because the mutex_lock(&ndev->req_lock);
+	 * in nci_close_device() will help to emit one.
+	 */
+	set_bit(NCI_UNREG, &ndev->flags);
+
 	nci_close_device(ndev);
 
 	destroy_workqueue(ndev->cmd_wq);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index d64b0ee..efc963a 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -19,6 +19,7 @@
 #include <linux/if_arp.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
+#include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 #include <linux/tc_act/tc_mirred.h>
@@ -228,6 +229,7 @@
 	bool want_ingress;
 	bool is_redirect;
 	bool expects_nh;
+	bool at_ingress;
 	int m_eaction;
 	int mac_len;
 	bool at_nh;
@@ -263,7 +265,8 @@
 	 * ingress - that covers the TC S/W datapath.
 	 */
 	is_redirect = tcf_mirred_is_act_redirect(m_eaction);
-	use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
+	at_ingress = skb_at_tc_ingress(skb);
+	use_reinsert = at_ingress && is_redirect &&
 		       tcf_mirred_can_reinsert(retval);
 	if (!use_reinsert) {
 		skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -271,10 +274,12 @@
 			goto out;
 	}
 
+	want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+
 	/* All mirred/redirected skbs should clear previous ct info */
 	nf_reset_ct(skb2);
-
-	want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+	if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
+		skb_dst_drop(skb2);
 
 	expects_nh = want_ingress || !m_mac_header_xmit;
 	at_nh = skb->data == skb_network_header(skb);
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 0eae9ff..e007fc7 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -665,12 +665,14 @@
 			q->classes[i].deficit = quanta[i];
 		}
 	}
+	for (i = q->nbands; i < oldbands; i++) {
+		qdisc_tree_flush_backlog(q->classes[i].qdisc);
+		if (i >= q->nstrict)
+			list_del(&q->classes[i].alist);
+	}
 	q->nstrict = nstrict;
 	memcpy(q->prio2band, priomap, sizeof(priomap));
 
-	for (i = q->nbands; i < oldbands; i++)
-		qdisc_tree_flush_backlog(q->classes[i].qdisc);
-
 	for (i = 0; i < q->nbands; i++)
 		q->classes[i].quantum = quanta[i];
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 39ba82e..354c1c4d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -946,7 +946,7 @@
 	sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
 
 	/* Set peer label for connection. */
-	security_sctp_assoc_established((struct sctp_association *)asoc, chunk->skb);
+	security_inet_conn_established(ep->base.sk, chunk->skb);
 
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 59284da..230072f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -566,6 +566,10 @@
 
 static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
 {
+	wait_queue_head_t *smc_wait = sk_sleep(&smc->sk);
+	wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk);
+	unsigned long flags;
+
 	smc->use_fallback = true;
 	smc->fallback_rsn = reason_code;
 	smc_stat_fallback(smc);
@@ -575,6 +579,16 @@
 		smc->clcsock->file->private_data = smc->clcsock;
 		smc->clcsock->wq.fasync_list =
 			smc->sk.sk_socket->wq.fasync_list;
+
+		/* There may be some entries remaining in
+		 * smc socket->wq, which should be removed
+		 * to clcsocket->wq during the fallback.
+		 */
+		spin_lock_irqsave(&smc_wait->lock, flags);
+		spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
+		list_splice_init(&smc_wait->head, &clc_wait->head);
+		spin_unlock(&clc_wait->lock);
+		spin_unlock_irqrestore(&smc_wait->lock, flags);
 	}
 }
 
@@ -2120,8 +2134,10 @@
 	smc->clcsock->sk->sk_user_data =
 		(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
 	rc = kernel_listen(smc->clcsock, backlog);
-	if (rc)
+	if (rc) {
+		smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
 		goto out;
+	}
 	sk->sk_max_ack_backlog = backlog;
 	sk->sk_ack_backlog = 0;
 	sk->sk_state = SMC_LISTEN;
@@ -2354,8 +2370,10 @@
 static int smc_shutdown(struct socket *sock, int how)
 {
 	struct sock *sk = sock->sk;
+	bool do_shutdown = true;
 	struct smc_sock *smc;
 	int rc = -EINVAL;
+	int old_state;
 	int rc1 = 0;
 
 	smc = smc_sk(sk);
@@ -2382,7 +2400,11 @@
 	}
 	switch (how) {
 	case SHUT_RDWR:		/* shutdown in both directions */
+		old_state = sk->sk_state;
 		rc = smc_close_active(smc);
+		if (old_state == SMC_ACTIVE &&
+		    sk->sk_state == SMC_PEERCLOSEWAIT1)
+			do_shutdown = false;
 		break;
 	case SHUT_WR:
 		rc = smc_close_shutdown_write(smc);
@@ -2392,7 +2414,7 @@
 		/* nothing more to do because peer is not involved */
 		break;
 	}
-	if (smc->clcsock)
+	if (do_shutdown && smc->clcsock)
 		rc1 = kernel_sock_shutdown(smc->clcsock, how);
 	/* map sock_shutdown_cmd constants to sk_shutdown value range */
 	sk->sk_shutdown |= how + 1;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 0f9ffba..3715d2f 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -228,6 +228,12 @@
 			/* send close request */
 			rc = smc_close_final(conn);
 			sk->sk_state = SMC_PEERCLOSEWAIT1;
+
+			/* actively shutdown clcsock before peer close it,
+			 * prevent peer from entering TIME_WAIT state.
+			 */
+			if (smc->clcsock && smc->clcsock->sk)
+				rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
 		} else {
 			/* peer event has changed the state */
 			goto again;
@@ -354,9 +360,9 @@
 	if (rxflags->peer_conn_abort) {
 		/* peer has not received all data */
 		smc_close_passive_abort_received(smc);
-		release_sock(&smc->sk);
+		release_sock(sk);
 		cancel_delayed_work_sync(&conn->tx_work);
-		lock_sock(&smc->sk);
+		lock_sock(sk);
 		goto wakeup;
 	}
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 49b8ba3..bb52c8b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -708,13 +708,14 @@
 	int i;
 
 	while (1) {
+again:
 		link_id = ++lgr->next_link_id;
 		if (!link_id)	/* skip zero as link_id */
 			link_id = ++lgr->next_link_id;
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 			if (smc_link_usable(&lgr->lnk[i]) &&
 			    lgr->lnk[i].link_id == link_id)
-				continue;
+				goto again;
 		}
 		break;
 	}
@@ -1671,14 +1672,26 @@
 	mutex_unlock(&lgr->llc_conf_mutex);
 }
 
-/* Determine vlan of internal TCP socket.
- * @vlan_id: address to store the determined vlan id into
- */
+static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
+				  struct netdev_nested_priv *priv)
+{
+	unsigned short *vlan_id = (unsigned short *)priv->data;
+
+	if (is_vlan_dev(lower_dev)) {
+		*vlan_id = vlan_dev_vlan_id(lower_dev);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Determine vlan of internal TCP socket. */
 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct netdev_nested_priv priv;
 	struct net_device *ndev;
-	int i, nest_lvl, rc = 0;
+	int rc = 0;
 
 	ini->vlan_id = 0;
 	if (!dst) {
@@ -1696,20 +1709,9 @@
 		goto out_rel;
 	}
 
+	priv.data = (void *)&ini->vlan_id;
 	rtnl_lock();
-	nest_lvl = ndev->lower_level;
-	for (i = 0; i < nest_lvl; i++) {
-		struct list_head *lower = &ndev->adj_list.lower;
-
-		if (list_empty(lower))
-			break;
-		lower = lower->next;
-		ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
-		if (is_vlan_dev(ndev)) {
-			ini->vlan_id = vlan_dev_vlan_id(ndev);
-			break;
-		}
-	}
+	netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
 	rtnl_unlock();
 
 out_rel:
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ae48c9c..d8ee06a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1720,15 +1720,15 @@
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-static struct lock_class_key xs_key[2];
-static struct lock_class_key xs_slock_key[2];
+static struct lock_class_key xs_key[3];
+static struct lock_class_key xs_slock_key[3];
 
 static inline void xs_reclassify_socketu(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 
 	sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
-		&xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
+		&xs_slock_key[0], "sk_lock-AF_LOCAL-RPC", &xs_key[0]);
 }
 
 static inline void xs_reclassify_socket4(struct socket *sock)
@@ -1736,7 +1736,7 @@
 	struct sock *sk = sock->sk;
 
 	sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
-		&xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
+		&xs_slock_key[1], "sk_lock-AF_INET-RPC", &xs_key[1]);
 }
 
 static inline void xs_reclassify_socket6(struct socket *sock)
@@ -1744,7 +1744,7 @@
 	struct sock *sk = sock->sk;
 
 	sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
-		&xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
+		&xs_slock_key[2], "sk_lock-AF_INET6-RPC", &xs_key[2]);
 }
 
 static inline void xs_reclassify_socket(int family, struct socket *sock)
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index dc60c32..b4d9419 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -524,7 +524,7 @@
 		return -EEXIST;
 
 	/* Allocate a new AEAD */
-	tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
 	if (unlikely(!tmp))
 		return -ENOMEM;
 
@@ -597,6 +597,10 @@
 	tmp->cloned = NULL;
 	tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
 	tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
+	if (!tmp->key) {
+		tipc_aead_free(&tmp->rcu);
+		return -ENOMEM;
+	}
 	memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
 	atomic_set(&tmp->users, 0);
 	atomic64_set(&tmp->seqno, 0);
@@ -1470,7 +1474,7 @@
 		return -EEXIST;
 
 	/* Allocate crypto */
-	c = kzalloc(sizeof(*c), GFP_ATOMIC);
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
 	if (!c)
 		return -ENOMEM;
 
@@ -1484,7 +1488,7 @@
 	}
 
 	/* Allocate statistic structure */
-	c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
+	c->stats = alloc_percpu(struct tipc_crypto_stats);
 	if (!c->stats) {
 		if (c->wq)
 			destroy_workqueue(c->wq);
@@ -2457,7 +2461,7 @@
 	}
 
 	/* Lets duplicate it first */
-	skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC);
+	skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_KERNEL);
 	rcu_read_unlock();
 
 	/* Now, generate new key, initiate & distribute it */
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1b7a487..09ae844 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1298,8 +1298,11 @@
 		return false;
 #ifdef CONFIG_TIPC_CRYPTO
 	case MSG_CRYPTO:
-		tipc_crypto_msg_rcv(l->net, skb);
-		return true;
+		if (TIPC_SKB_CB(skb)->decrypted) {
+			tipc_crypto_msg_rcv(l->net, skb);
+			return true;
+		}
+		fallthrough;
 #endif
 	default:
 		pr_warn("Dropping received illegal msg type\n");
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index acfba9f..6bc2879 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -61,7 +61,7 @@
 static const struct proto *saved_tcpv4_prot;
 static DEFINE_MUTEX(tcpv4_prot_mutex);
 static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
-static struct proto_ops tls_sw_proto_ops;
+static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 			 const struct proto *base);
 
@@ -71,6 +71,8 @@
 
 	WRITE_ONCE(sk->sk_prot,
 		   &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+	WRITE_ONCE(sk->sk_socket->ops,
+		   &tls_proto_ops[ip_ver][ctx->tx_conf][ctx->rx_conf]);
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -669,8 +671,6 @@
 	if (tx) {
 		ctx->sk_write_space = sk->sk_write_space;
 		sk->sk_write_space = tls_write_space;
-	} else {
-		sk->sk_socket->ops = &tls_sw_proto_ops;
 	}
 	goto out;
 
@@ -728,6 +728,39 @@
 	return ctx;
 }
 
+static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+			    const struct proto_ops *base)
+{
+	ops[TLS_BASE][TLS_BASE] = *base;
+
+	ops[TLS_SW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
+	ops[TLS_SW  ][TLS_BASE].sendpage_locked	= tls_sw_sendpage_locked;
+
+	ops[TLS_BASE][TLS_SW  ] = ops[TLS_BASE][TLS_BASE];
+	ops[TLS_BASE][TLS_SW  ].splice_read	= tls_sw_splice_read;
+
+	ops[TLS_SW  ][TLS_SW  ] = ops[TLS_SW  ][TLS_BASE];
+	ops[TLS_SW  ][TLS_SW  ].splice_read	= tls_sw_splice_read;
+
+#ifdef CONFIG_TLS_DEVICE
+	ops[TLS_HW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
+	ops[TLS_HW  ][TLS_BASE].sendpage_locked	= NULL;
+
+	ops[TLS_HW  ][TLS_SW  ] = ops[TLS_BASE][TLS_SW  ];
+	ops[TLS_HW  ][TLS_SW  ].sendpage_locked	= NULL;
+
+	ops[TLS_BASE][TLS_HW  ] = ops[TLS_BASE][TLS_SW  ];
+
+	ops[TLS_SW  ][TLS_HW  ] = ops[TLS_SW  ][TLS_SW  ];
+
+	ops[TLS_HW  ][TLS_HW  ] = ops[TLS_HW  ][TLS_SW  ];
+	ops[TLS_HW  ][TLS_HW  ].sendpage_locked	= NULL;
+#endif
+#ifdef CONFIG_TLS_TOE
+	ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
+#endif
+}
+
 static void tls_build_proto(struct sock *sk)
 {
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
@@ -739,6 +772,8 @@
 		mutex_lock(&tcpv6_prot_mutex);
 		if (likely(prot != saved_tcpv6_prot)) {
 			build_protos(tls_prots[TLSV6], prot);
+			build_proto_ops(tls_proto_ops[TLSV6],
+					sk->sk_socket->ops);
 			smp_store_release(&saved_tcpv6_prot, prot);
 		}
 		mutex_unlock(&tcpv6_prot_mutex);
@@ -749,6 +784,8 @@
 		mutex_lock(&tcpv4_prot_mutex);
 		if (likely(prot != saved_tcpv4_prot)) {
 			build_protos(tls_prots[TLSV4], prot);
+			build_proto_ops(tls_proto_ops[TLSV4],
+					sk->sk_socket->ops);
 			smp_store_release(&saved_tcpv4_prot, prot);
 		}
 		mutex_unlock(&tcpv4_prot_mutex);
@@ -959,10 +996,6 @@
 	if (err)
 		return err;
 
-	tls_sw_proto_ops = inet_stream_ops;
-	tls_sw_proto_ops.splice_read = tls_sw_splice_read;
-	tls_sw_proto_ops.sendpage_locked   = tls_sw_sendpage_locked;
-
 	tls_device_init();
 	tcp_register_ulp(&tcp_tls_ulp_ops);
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index d815640..d3e7ff9 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2005,6 +2005,7 @@
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	ssize_t copied = 0;
+	bool from_queue;
 	int err = 0;
 	long timeo;
 	int chunk;
@@ -2014,25 +2015,28 @@
 
 	timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
 
-	skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err);
-	if (!skb)
-		goto splice_read_end;
-
-	if (!ctx->decrypted) {
-		err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
-
-		/* splice does not support reading control messages */
-		if (ctx->control != TLS_RECORD_TYPE_DATA) {
-			err = -EINVAL;
+	from_queue = !skb_queue_empty(&ctx->rx_list);
+	if (from_queue) {
+		skb = __skb_dequeue(&ctx->rx_list);
+	} else {
+		skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
+				    &err);
+		if (!skb)
 			goto splice_read_end;
-		}
 
+		err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
 		if (err < 0) {
 			tls_err_abort(sk, -EBADMSG);
 			goto splice_read_end;
 		}
-		ctx->decrypted = 1;
 	}
+
+	/* splice does not support reading control messages */
+	if (ctx->control != TLS_RECORD_TYPE_DATA) {
+		err = -EINVAL;
+		goto splice_read_end;
+	}
+
 	rxm = strp_msg(skb);
 
 	chunk = min_t(unsigned int, rxm->full_len, len);
@@ -2040,7 +2044,17 @@
 	if (copied < 0)
 		goto splice_read_end;
 
-	tls_sw_advance_skb(sk, skb, copied);
+	if (!from_queue) {
+		ctx->recv_pkt = NULL;
+		__strp_unpause(&ctx->strp);
+	}
+	if (chunk < rxm->full_len) {
+		__skb_queue_head(&ctx->rx_list, skb);
+		rxm->offset += len;
+		rxm->full_len -= len;
+	} else {
+		consume_skb(skb);
+	}
 
 splice_read_end:
 	release_sock(sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 78e08e8..b0bfc78 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2882,9 +2882,6 @@
 
 	unix_state_lock(sk);
 	sk->sk_shutdown |= mode;
-	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
-	    mode == SHUTDOWN_MASK)
-		sk->sk_state = TCP_CLOSE;
 	other = unix_peer(sk);
 	if (other)
 		sock_hold(other);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 81232b7..a27b3b5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -936,33 +936,37 @@
 	[NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
 };
 
-int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
-			      struct cfg80211_registered_device **rdev,
-			      struct wireless_dev **wdev)
+static int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
+				     struct cfg80211_registered_device **rdev,
+				     struct wireless_dev **wdev,
+				     struct nlattr **attrbuf)
 {
 	int err;
 
 	if (!cb->args[0]) {
-		struct nlattr **attrbuf;
+		struct nlattr **attrbuf_free = NULL;
 
-		attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
-				  GFP_KERNEL);
-		if (!attrbuf)
-			return -ENOMEM;
+		if (!attrbuf) {
+			attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
+					  GFP_KERNEL);
+			if (!attrbuf)
+				return -ENOMEM;
+			attrbuf_free = attrbuf;
+		}
 
 		err = nlmsg_parse_deprecated(cb->nlh,
 					     GENL_HDRLEN + nl80211_fam.hdrsize,
 					     attrbuf, nl80211_fam.maxattr,
 					     nl80211_policy, NULL);
 		if (err) {
-			kfree(attrbuf);
+			kfree(attrbuf_free);
 			return err;
 		}
 
 		rtnl_lock();
 		*wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(cb->skb->sk),
 						   attrbuf);
-		kfree(attrbuf);
+		kfree(attrbuf_free);
 		if (IS_ERR(*wdev)) {
 			rtnl_unlock();
 			return PTR_ERR(*wdev);
@@ -6197,7 +6201,7 @@
 	int sta_idx = cb->args[2];
 	int err;
 
-	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
 	if (err)
 		return err;
 	/* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -7092,7 +7096,7 @@
 	int path_idx = cb->args[2];
 	int err;
 
-	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
 	if (err)
 		return err;
 	/* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -7292,7 +7296,7 @@
 	int path_idx = cb->args[2];
 	int err;
 
-	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
 	if (err)
 		return err;
 	/* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -9718,7 +9722,7 @@
 	int start = cb->args[2], idx = 0;
 	int err;
 
-	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+	err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
 	if (err)
 		return err;
 	/* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -9851,7 +9855,7 @@
 	if (!attrbuf)
 		return -ENOMEM;
 
-	res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+	res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf);
 	if (res) {
 		kfree(attrbuf);
 		return res;
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a3f3877..d642e3b 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Portions of this file
- * Copyright (C) 2018, 2020 Intel Corporation
+ * Copyright (C) 2018, 2020-2021 Intel Corporation
  */
 #ifndef __NET_WIRELESS_NL80211_H
 #define __NET_WIRELESS_NL80211_H
@@ -22,10 +22,6 @@
 	       ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32);
 }
 
-int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
-			      struct cfg80211_registered_device **rdev,
-			      struct wireless_dev **wdev);
-
 int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
 			  struct genl_info *info,
 			  struct cfg80211_chan_def *chandef);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 5ff1f872..41ea65d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1046,6 +1046,7 @@
 
 		switch (otype) {
 		case NL80211_IFTYPE_AP:
+		case NL80211_IFTYPE_P2P_GO:
 			cfg80211_stop_ap(rdev, dev, true);
 			break;
 		case NL80211_IFTYPE_ADHOC:
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 90c4e1e..bc4ad48 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -500,7 +500,7 @@
 		pool->free_list_cnt--;
 		xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
 					free_list_node);
-		list_del(&xskb->free_list_node);
+		list_del_init(&xskb->free_list_node);
 	}
 
 	xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
@@ -568,7 +568,7 @@
 	i = nb_entries;
 	while (i--) {
 		xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
-		list_del(&xskb->free_list_node);
+		list_del_init(&xskb->free_list_node);
 
 		*xdp = &xskb->xdp;
 		xdp++;
@@ -615,6 +615,9 @@
 
 void xp_free(struct xdp_buff_xsk *xskb)
 {
+	if (!list_empty(&xskb->free_list_node))
+		return;
+
 	xskb->pool->free_list_cnt++;
 	list_add(&xskb->free_list_node, &xskb->pool->free_list);
 }
diff --git a/samples/Kconfig b/samples/Kconfig
index bec3528..43d2e9a 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -31,6 +31,15 @@
 	  This builds an ftrace direct function example
 	  that hooks to wake_up_process and prints the parameters.
 
+config SAMPLE_FTRACE_DIRECT_MULTI
+	tristate "Build register_ftrace_direct_multi() example"
+	depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m
+	depends on HAVE_SAMPLE_FTRACE_DIRECT_MULTI
+	help
+	  This builds an ftrace direct function example
+	  that hooks to wake_up_process and schedule, and prints
+	  the function addresses.
+
 config SAMPLE_TRACE_ARRAY
         tristate "Build sample module for kernel access to Ftrace instancess"
 	depends on EVENT_TRACING && m
@@ -237,5 +246,5 @@
 config HAVE_SAMPLE_FTRACE_DIRECT
 	bool
 
-config HAVE_SAMPLE_FTRACE_MULTI_DIRECT
+config HAVE_SAMPLE_FTRACE_DIRECT_MULTI
 	bool
diff --git a/samples/Makefile b/samples/Makefile
index b7b9830..4bcd6b9 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -22,7 +22,7 @@
 obj-$(CONFIG_SAMPLE_TRACE_EVENTS)	+= trace_events/
 obj-$(CONFIG_SAMPLE_TRACE_PRINTK)	+= trace_printk/
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT)	+= ftrace/
-obj-$(CONFIG_SAMPLE_FTRACE_MULTI_DIRECT) += ftrace/
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/
 obj-$(CONFIG_SAMPLE_TRACE_ARRAY)	+= ftrace/
 subdir-$(CONFIG_SAMPLE_UHID)		+= uhid
 obj-$(CONFIG_VIDEO_PCI_SKELETON)	+= v4l/
diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h
index 722b3fa..1752a46 100644
--- a/samples/bpf/hbm_kern.h
+++ b/samples/bpf/hbm_kern.h
@@ -9,8 +9,6 @@
  * Include file for sample Host Bandwidth Manager (HBM) BPF programs
  */
 #define KBUILD_MODNAME "foo"
-#include <stddef.h>
-#include <stdbool.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/if_packet.h>
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index d84e694..a81704d 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -309,7 +309,6 @@
 	const char *mprog_filename = NULL, *mprog_name = NULL;
 	struct xdp_redirect_cpu *skel;
 	struct bpf_map_info info = {};
-	char ifname_buf[IF_NAMESIZE];
 	struct bpf_cpumap_val value;
 	__u32 infosz = sizeof(info);
 	int ret = EXIT_FAIL_OPTION;
@@ -390,10 +389,10 @@
 		case 'd':
 			if (strlen(optarg) >= IF_NAMESIZE) {
 				fprintf(stderr, "-d/--dev name too long\n");
+				usage(argv, long_options, __doc__, mask, true, skel->obj);
 				goto end_cpu;
 			}
-			safe_strncpy(ifname_buf, optarg, strlen(ifname_buf));
-			ifindex = if_nametoindex(ifname_buf);
+			ifindex = if_nametoindex(optarg);
 			if (!ifindex)
 				ifindex = strtoul(optarg, NULL, 0);
 			if (!ifindex) {
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
index b32d821..8740838 100644
--- a/samples/bpf/xdp_sample_user.c
+++ b/samples/bpf/xdp_sample_user.c
@@ -120,7 +120,10 @@
 		__u64 xmit;
 	} totals;
 	struct {
-		__u64 pps;
+		union {
+			__u64 pps;
+			__u64 num;
+		};
 		__u64 drop;
 		__u64 err;
 	} rx_cnt;
@@ -1322,7 +1325,7 @@
 
 static void sample_summary_print(void)
 {
-	double period = sample_out.rx_cnt.pps;
+	double num = sample_out.rx_cnt.num;
 
 	if (sample_out.totals.rx) {
 		double pkts = sample_out.totals.rx;
@@ -1330,7 +1333,7 @@
 		print_always("  Packets received    : %'-10llu\n",
 			     sample_out.totals.rx);
 		print_always("  Average packets/s   : %'-10.0f\n",
-			     sample_round(pkts / period));
+			     sample_round(pkts / num));
 	}
 	if (sample_out.totals.redir) {
 		double pkts = sample_out.totals.redir;
@@ -1338,7 +1341,7 @@
 		print_always("  Packets redirected  : %'-10llu\n",
 			     sample_out.totals.redir);
 		print_always("  Average redir/s     : %'-10.0f\n",
-			     sample_round(pkts / period));
+			     sample_round(pkts / num));
 	}
 	if (sample_out.totals.drop)
 		print_always("  Rx dropped          : %'-10llu\n",
@@ -1355,7 +1358,7 @@
 		print_always("  Packets transmitted : %'-10llu\n",
 			     sample_out.totals.xmit);
 		print_always("  Average transmit/s  : %'-10.0f\n",
-			     sample_round(pkts / period));
+			     sample_round(pkts / num));
 	}
 }
 
@@ -1422,7 +1425,7 @@
 	return 0;
 }
 
-static void sample_summary_update(struct sample_output *out, int interval)
+static void sample_summary_update(struct sample_output *out)
 {
 	sample_out.totals.rx += out->totals.rx;
 	sample_out.totals.redir += out->totals.redir;
@@ -1430,12 +1433,11 @@
 	sample_out.totals.drop_xmit += out->totals.drop_xmit;
 	sample_out.totals.err += out->totals.err;
 	sample_out.totals.xmit += out->totals.xmit;
-	sample_out.rx_cnt.pps += interval;
+	sample_out.rx_cnt.num++;
 }
 
 static void sample_stats_print(int mask, struct stats_record *cur,
-			       struct stats_record *prev, char *prog_name,
-			       int interval)
+			       struct stats_record *prev, char *prog_name)
 {
 	struct sample_output out = {};
 
@@ -1452,7 +1454,7 @@
 	else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
 		stats_get_devmap_xmit_multi(cur, prev, 0, &out,
 					    mask & SAMPLE_DEVMAP_XMIT_CNT);
-	sample_summary_update(&out, interval);
+	sample_summary_update(&out);
 
 	stats_print(prog_name, mask, cur, prev, &out);
 }
@@ -1495,7 +1497,7 @@
 }
 
 static int sample_timer_cb(int timerfd, struct stats_record **rec,
-			   struct stats_record **prev, int interval)
+			   struct stats_record **prev)
 {
 	char line[64] = "Summary";
 	int ret;
@@ -1524,7 +1526,7 @@
 		snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
 	}
 
-	sample_stats_print(sample_mask, *rec, *prev, line, interval);
+	sample_stats_print(sample_mask, *rec, *prev, line);
 	return 0;
 }
 
@@ -1579,7 +1581,7 @@
 		if (pfd[0].revents & POLLIN)
 			ret = sample_signal_cb();
 		else if (pfd[1].revents & POLLIN)
-			ret = sample_timer_cb(timerfd, &rec, &prev, interval);
+			ret = sample_timer_cb(timerfd, &rec, &prev);
 
 		if (ret)
 			break;
diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile
index e8a3f85..b9198e2 100644
--- a/samples/ftrace/Makefile
+++ b/samples/ftrace/Makefile
@@ -3,7 +3,7 @@
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct.o
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o
-obj-$(CONFIG_SAMPLE_FTRACE_MULTI_DIRECT) += ftrace-direct-multi.o
+obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o
 
 CFLAGS_sample-trace-array.o := -I$(src)
 obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index b6d7806..2fafc9a 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -4,6 +4,7 @@
 #include <linux/mm.h> /* for handle_mm_fault() */
 #include <linux/ftrace.h>
 #include <linux/sched/stat.h>
+#include <asm/asm-offsets.h>
 
 extern void my_direct_func(unsigned long ip);
 
@@ -14,6 +15,8 @@
 
 extern void my_tramp(void *);
 
+#ifdef CONFIG_X86_64
+
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
 "	.type		my_tramp, @function\n"
@@ -31,6 +34,33 @@
 "	.popsection\n"
 );
 
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_S390
+
+asm (
+"	.pushsection	.text, \"ax\", @progbits\n"
+"	.type		my_tramp, @function\n"
+"	.globl		my_tramp\n"
+"   my_tramp:"
+"	lgr		%r1,%r15\n"
+"	stmg		%r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+"	stg		%r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+"	aghi		%r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n"
+"	stg		%r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n"
+"	lgr		%r2,%r0\n"
+"	brasl		%r14,my_direct_func\n"
+"	aghi		%r15,"__stringify(STACK_FRAME_OVERHEAD)"\n"
+"	lmg		%r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n"
+"	lg		%r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n"
+"	lgr		%r1,%r0\n"
+"	br		%r1\n"
+"	.size		my_tramp, .-my_tramp\n"
+"	.popsection\n"
+);
+
+#endif /* CONFIG_S390 */
+
 static struct ftrace_ops direct;
 
 static int __init ftrace_direct_multi_init(void)
diff --git a/scripts/coccinelle/misc/do_div.cocci b/scripts/coccinelle/misc/do_div.cocci
new file mode 100644
index 0000000..79db083
--- /dev/null
+++ b/scripts/coccinelle/misc/do_div.cocci
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/// do_div() does a 64-by-32 division.
+/// When the divisor is long, unsigned long, u64, or s64,
+/// do_div() truncates it to 32 bits, this means it can test
+/// non-zero and be truncated to 0 for division on 64bit platforms.
+///
+//# This makes an effort to find those inappropriate do_div() calls.
+//
+// Confidence: Moderate
+// Copyright: (C) 2020 Wen Yang, Alibaba.
+// Comments:
+// Options: --no-includes --include-headers
+
+virtual context
+virtual org
+virtual report
+
+@initialize:python@
+@@
+
+def get_digit_type_and_value(str):
+    is_digit = False
+    value = 0
+
+    try:
+        if (str.isdigit()):
+           is_digit = True
+           value =  int(str, 0)
+        elif (str.upper().endswith('ULL')):
+           is_digit = True
+           value = int(str[:-3], 0)
+        elif (str.upper().endswith('LL')):
+           is_digit = True
+           value = int(str[:-2], 0)
+        elif (str.upper().endswith('UL')):
+           is_digit = True
+           value = int(str[:-2], 0)
+        elif (str.upper().endswith('L')):
+           is_digit = True
+           value = int(str[:-1], 0)
+        elif (str.upper().endswith('U')):
+           is_digit = True
+           value = int(str[:-1], 0)
+    except Exception as e:
+          print('Error:',e)
+          is_digit = False
+          value = 0
+    finally:
+        return is_digit, value
+
+def filter_out_safe_constants(str):
+    is_digit, value = get_digit_type_and_value(str)
+    if (is_digit):
+        if (value >= 0x100000000):
+            return True
+        else:
+            return False
+    else:
+        return True
+
+def construct_warnings(suggested_fun):
+    msg="WARNING: do_div() does a 64-by-32 division, please consider using %s instead."
+    return  msg % suggested_fun
+
+@depends on context@
+expression f;
+long l: script:python() { filter_out_safe_constants(l) };
+unsigned long ul : script:python() { filter_out_safe_constants(ul) };
+u64 ul64 : script:python() { filter_out_safe_constants(ul64) };
+s64 sl64 : script:python() { filter_out_safe_constants(sl64) };
+
+@@
+(
+* do_div(f, l);
+|
+* do_div(f, ul);
+|
+* do_div(f, ul64);
+|
+* do_div(f, sl64);
+)
+
+@r depends on (org || report)@
+expression f;
+position p;
+long l: script:python() { filter_out_safe_constants(l) };
+unsigned long ul : script:python() { filter_out_safe_constants(ul) };
+u64 ul64 : script:python() { filter_out_safe_constants(ul64) };
+s64 sl64 : script:python() { filter_out_safe_constants(sl64) };
+@@
+(
+do_div@p(f, l);
+|
+do_div@p(f, ul);
+|
+do_div@p(f, ul64);
+|
+do_div@p(f, sl64);
+)
+
+@script:python depends on org@
+p << r.p;
+ul << r.ul;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_ul"))
+
+@script:python depends on org@
+p << r.p;
+l << r.l;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_long"))
+
+@script:python depends on org@
+p << r.p;
+ul64 << r.ul64;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_u64"))
+
+@script:python depends on org@
+p << r.p;
+sl64 << r.sl64;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_s64"))
+
+@script:python depends on report@
+p << r.p;
+ul << r.ul;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_ul"))
+
+@script:python depends on report@
+p << r.p;
+l << r.l;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_long"))
+
+@script:python depends on report@
+p << r.p;
+sl64 << r.sl64;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_s64"))
+
+@script:python depends on report@
+p << r.p;
+ul64 << r.ul64;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_u64"))
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index cc36256..c0d3bcb 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -259,5 +259,8 @@
 	DEVID_FIELD(dfl_device_id, type);
 	DEVID_FIELD(dfl_device_id, feature_id);
 
+	DEVID(ishtp_device_id);
+	DEVID_FIELD(ishtp_device_id, guid);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 49aba86..5258247 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -115,6 +115,17 @@
 		uuid.b[12], uuid.b[13], uuid.b[14], uuid.b[15]);
 }
 
+static inline void add_guid(char *str, guid_t guid)
+{
+	int len = strlen(str);
+
+	sprintf(str + len, "%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
+		guid.b[3], guid.b[2], guid.b[1], guid.b[0],
+		guid.b[5], guid.b[4], guid.b[7], guid.b[6],
+		guid.b[8], guid.b[9], guid.b[10], guid.b[11],
+		guid.b[12], guid.b[13], guid.b[14], guid.b[15]);
+}
+
 /**
  * Check that sizeof(device_id type) are consistent with size of section
  * in .o file. If in-consistent then userspace and kernel does not agree
@@ -1380,6 +1391,18 @@
 	return 1;
 }
 
+/* Looks like: ishtp:{guid} */
+static int do_ishtp_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, ishtp_device_id, guid);
+
+	strcpy(alias, ISHTP_MODULE_PREFIX "{");
+	add_guid(alias, guid);
+	strcat(alias, "}");
+
+	return 1;
+}
+
 static int do_auxiliary_entry(const char *filename, void *symval, char *alias)
 {
 	DEF_FIELD_ADDR(symval, auxiliary_device_id, name);
@@ -1499,6 +1522,7 @@
 	{"auxiliary", SIZE_auxiliary_device_id, do_auxiliary_entry},
 	{"ssam", SIZE_ssam_device_id, do_ssam_entry},
 	{"dfl", SIZE_dfl_device_id, do_dfl_entry},
+	{"ishtp", SIZE_ishtp_device_id, do_ishtp_entry},
 };
 
 /* Create MODULE_ALIAS() statements.
diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files
index c3eb81c..0114c41 100755
--- a/scripts/remove-stale-files
+++ b/scripts/remove-stale-files
@@ -28,4 +28,9 @@
 	do
 		rm -f arch/arm/boot/compressed/${f}
 	done
+
+	for f in uart-ath79.c ashldi3.c bswapdi.c bswapsi.c
+	do
+		rm -f arch/mips/boot/compressed/${f}
+	done
 fi
diff --git a/security/security.c b/security/security.c
index 779a9ed..c88167a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2388,13 +2388,6 @@
 }
 EXPORT_SYMBOL(security_sctp_sk_clone);
 
-void security_sctp_assoc_established(struct sctp_association *asoc,
-				     struct sk_buff *skb)
-{
-	call_void_hook(sctp_assoc_established, asoc, skb);
-}
-EXPORT_SYMBOL(security_sctp_assoc_established);
-
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5e5215f..62d30c0 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5502,8 +5502,7 @@
 	if (!selinux_policycap_extsockclass())
 		return selinux_sk_clone_security(sk, newsk);
 
-	if (asoc->secid != SECSID_WILD)
-		newsksec->sid = asoc->secid;
+	newsksec->sid = asoc->secid;
 	newsksec->peer_sid = asoc->peer_secid;
 	newsksec->sclass = sksec->sclass;
 	selinux_netlbl_sctp_sk_clone(sk, newsk);
@@ -5559,16 +5558,6 @@
 	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
 }
 
-static void selinux_sctp_assoc_established(struct sctp_association *asoc,
-					   struct sk_buff *skb)
-{
-	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
-
-	selinux_inet_conn_established(asoc->base.sk, skb);
-	asoc->peer_secid = sksec->peer_sid;
-	asoc->secid = SECSID_WILD;
-}
-
 static int selinux_secmark_relabel_packet(u32 sid)
 {
 	const struct task_security_struct *__tsec;
@@ -7239,7 +7228,6 @@
 	LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request),
 	LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone),
 	LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect),
-	LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established),
 	LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request),
 	LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone),
 	LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established),
diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
index 727c3b4..0ae4e4e 100644
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c
@@ -31,13 +31,20 @@
 
 int hashtab_init(struct hashtab *h, u32 nel_hint)
 {
-	h->size = hashtab_compute_size(nel_hint);
-	h->nel = 0;
-	if (!h->size)
-		return 0;
+	u32 size = hashtab_compute_size(nel_hint);
 
-	h->htable = kcalloc(h->size, sizeof(*h->htable), GFP_KERNEL);
-	return h->htable ? 0 : -ENOMEM;
+	/* should already be zeroed, but better be safe */
+	h->nel = 0;
+	h->size = 0;
+	h->htable = NULL;
+
+	if (size) {
+		h->htable = kcalloc(size, sizeof(*h->htable), GFP_KERNEL);
+		if (!h->htable)
+			return -ENOMEM;
+		h->size = size;
+	}
+	return 0;
 }
 
 int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
diff --git a/sound/core/Makefile b/sound/core/Makefile
index 350d704c..79e1407 100644
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -19,6 +19,7 @@
 snd-pcm-y := pcm.o pcm_native.o pcm_lib.o pcm_misc.o \
 		pcm_memory.o memalloc.o
 snd-pcm-$(CONFIG_SND_PCM_TIMER) += pcm_timer.o
+snd-pcm-$(CONFIG_SND_DMA_SGBUF) += sgbuf.o
 snd-pcm-$(CONFIG_SND_PCM_ELD) += pcm_drm_eld.o
 snd-pcm-$(CONFIG_SND_PCM_IEC958) += pcm_iec958.o
 
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index 99cd0f6..9fc971a 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -183,8 +183,11 @@
 int snd_dma_buffer_mmap(struct snd_dma_buffer *dmab,
 			struct vm_area_struct *area)
 {
-	const struct snd_malloc_ops *ops = snd_dma_get_ops(dmab);
+	const struct snd_malloc_ops *ops;
 
+	if (!dmab)
+		return -ENOENT;
+	ops = snd_dma_get_ops(dmab);
 	if (ops && ops->mmap)
 		return ops->mmap(dmab, area);
 	else
@@ -549,61 +552,74 @@
 	}
 }
 
+static inline void snd_dma_noncontig_iter_set(struct snd_dma_buffer *dmab,
+					      struct sg_page_iter *piter,
+					      size_t offset)
+{
+	struct sg_table *sgt = dmab->private_data;
+
+	__sg_page_iter_start(piter, sgt->sgl, sgt->orig_nents,
+			     offset >> PAGE_SHIFT);
+}
+
+static dma_addr_t snd_dma_noncontig_get_addr(struct snd_dma_buffer *dmab,
+					     size_t offset)
+{
+	struct sg_dma_page_iter iter;
+
+	snd_dma_noncontig_iter_set(dmab, &iter.base, offset);
+	__sg_page_iter_dma_next(&iter);
+	return sg_page_iter_dma_address(&iter) + offset % PAGE_SIZE;
+}
+
+static struct page *snd_dma_noncontig_get_page(struct snd_dma_buffer *dmab,
+					       size_t offset)
+{
+	struct sg_page_iter iter;
+
+	snd_dma_noncontig_iter_set(dmab, &iter, offset);
+	__sg_page_iter_next(&iter);
+	return sg_page_iter_page(&iter);
+}
+
+static unsigned int
+snd_dma_noncontig_get_chunk_size(struct snd_dma_buffer *dmab,
+				 unsigned int ofs, unsigned int size)
+{
+	struct sg_dma_page_iter iter;
+	unsigned int start, end;
+	unsigned long addr;
+
+	start = ALIGN_DOWN(ofs, PAGE_SIZE);
+	end = ofs + size - 1; /* the last byte address */
+	snd_dma_noncontig_iter_set(dmab, &iter.base, start);
+	if (!__sg_page_iter_dma_next(&iter))
+		return 0;
+	/* check page continuity */
+	addr = sg_page_iter_dma_address(&iter);
+	for (;;) {
+		start += PAGE_SIZE;
+		if (start > end)
+			break;
+		addr += PAGE_SIZE;
+		if (!__sg_page_iter_dma_next(&iter) ||
+		    sg_page_iter_dma_address(&iter) != addr)
+			return start - ofs;
+	}
+	/* ok, all on continuous pages */
+	return size;
+}
+
 static const struct snd_malloc_ops snd_dma_noncontig_ops = {
 	.alloc = snd_dma_noncontig_alloc,
 	.free = snd_dma_noncontig_free,
 	.mmap = snd_dma_noncontig_mmap,
 	.sync = snd_dma_noncontig_sync,
-	/* re-use vmalloc helpers for get_* ops */
-	.get_addr = snd_dma_vmalloc_get_addr,
-	.get_page = snd_dma_vmalloc_get_page,
-	.get_chunk_size = snd_dma_vmalloc_get_chunk_size,
+	.get_addr = snd_dma_noncontig_get_addr,
+	.get_page = snd_dma_noncontig_get_page,
+	.get_chunk_size = snd_dma_noncontig_get_chunk_size,
 };
 
-/* x86-specific SG-buffer with WC pages */
-#ifdef CONFIG_SND_DMA_SGBUF
-#define vmalloc_to_virt(v) (unsigned long)page_to_virt(vmalloc_to_page(v))
-
-static void *snd_dma_sg_wc_alloc(struct snd_dma_buffer *dmab, size_t size)
-{
-	void *p = snd_dma_noncontig_alloc(dmab, size);
-	size_t ofs;
-
-	if (!p)
-		return NULL;
-	for (ofs = 0; ofs < size; ofs += PAGE_SIZE)
-		set_memory_uc(vmalloc_to_virt(p + ofs), 1);
-	return p;
-}
-
-static void snd_dma_sg_wc_free(struct snd_dma_buffer *dmab)
-{
-	size_t ofs;
-
-	for (ofs = 0; ofs < dmab->bytes; ofs += PAGE_SIZE)
-		set_memory_wb(vmalloc_to_virt(dmab->area + ofs), 1);
-	snd_dma_noncontig_free(dmab);
-}
-
-static int snd_dma_sg_wc_mmap(struct snd_dma_buffer *dmab,
-			      struct vm_area_struct *area)
-{
-	area->vm_page_prot = pgprot_writecombine(area->vm_page_prot);
-	/* FIXME: dma_mmap_noncontiguous() works? */
-	return -ENOENT; /* continue with the default mmap handler */
-}
-
-const struct snd_malloc_ops snd_dma_sg_wc_ops = {
-	.alloc = snd_dma_sg_wc_alloc,
-	.free = snd_dma_sg_wc_free,
-	.mmap = snd_dma_sg_wc_mmap,
-	.sync = snd_dma_noncontig_sync,
-	.get_addr = snd_dma_vmalloc_get_addr,
-	.get_page = snd_dma_vmalloc_get_page,
-	.get_chunk_size = snd_dma_vmalloc_get_chunk_size,
-};
-#endif /* CONFIG_SND_DMA_SGBUF */
-
 /*
  * Non-coherent pages allocator
  */
@@ -663,17 +679,20 @@
 	[SNDRV_DMA_TYPE_DEV_WC] = &snd_dma_wc_ops,
 	[SNDRV_DMA_TYPE_NONCONTIG] = &snd_dma_noncontig_ops,
 	[SNDRV_DMA_TYPE_NONCOHERENT] = &snd_dma_noncoherent_ops,
-#ifdef CONFIG_SND_DMA_SGBUF
-	[SNDRV_DMA_TYPE_DEV_WC_SG] = &snd_dma_sg_wc_ops,
-#endif
 #ifdef CONFIG_GENERIC_ALLOCATOR
 	[SNDRV_DMA_TYPE_DEV_IRAM] = &snd_dma_iram_ops,
 #endif /* CONFIG_GENERIC_ALLOCATOR */
 #endif /* CONFIG_HAS_DMA */
+#ifdef CONFIG_SND_DMA_SGBUF
+	[SNDRV_DMA_TYPE_DEV_SG] = &snd_dma_sg_ops,
+	[SNDRV_DMA_TYPE_DEV_WC_SG] = &snd_dma_sg_ops,
+#endif
 };
 
 static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab)
 {
+	if (WARN_ON_ONCE(!dmab))
+		return NULL;
 	if (WARN_ON_ONCE(dmab->dev.type <= SNDRV_DMA_TYPE_UNKNOWN ||
 			 dmab->dev.type >= ARRAY_SIZE(dma_ops)))
 		return NULL;
diff --git a/sound/core/sgbuf.c b/sound/core/sgbuf.c
new file mode 100644
index 0000000..8352a5c
--- /dev/null
+++ b/sound/core/sgbuf.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Scatter-Gather buffer
+ *
+ *  Copyright (c) by Takashi Iwai <tiwai@suse.de>
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/export.h>
+#include <sound/memalloc.h>
+#include "memalloc_local.h"
+
+struct snd_sg_page {
+	void *buf;
+	dma_addr_t addr;
+};
+
+struct snd_sg_buf {
+	int size;	/* allocated byte size */
+	int pages;	/* allocated pages */
+	int tblsize;	/* allocated table size */
+	struct snd_sg_page *table;	/* address table */
+	struct page **page_table;	/* page table (for vmap/vunmap) */
+	struct device *dev;
+};
+
+/* table entries are align to 32 */
+#define SGBUF_TBL_ALIGN		32
+#define sgbuf_align_table(tbl)	ALIGN((tbl), SGBUF_TBL_ALIGN)
+
+static void snd_dma_sg_free(struct snd_dma_buffer *dmab)
+{
+	struct snd_sg_buf *sgbuf = dmab->private_data;
+	struct snd_dma_buffer tmpb;
+	int i;
+
+	if (!sgbuf)
+		return;
+
+	vunmap(dmab->area);
+	dmab->area = NULL;
+
+	tmpb.dev.type = SNDRV_DMA_TYPE_DEV;
+	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
+		tmpb.dev.type = SNDRV_DMA_TYPE_DEV_WC;
+	tmpb.dev.dev = sgbuf->dev;
+	for (i = 0; i < sgbuf->pages; i++) {
+		if (!(sgbuf->table[i].addr & ~PAGE_MASK))
+			continue; /* continuous pages */
+		tmpb.area = sgbuf->table[i].buf;
+		tmpb.addr = sgbuf->table[i].addr & PAGE_MASK;
+		tmpb.bytes = (sgbuf->table[i].addr & ~PAGE_MASK) << PAGE_SHIFT;
+		snd_dma_free_pages(&tmpb);
+	}
+
+	kfree(sgbuf->table);
+	kfree(sgbuf->page_table);
+	kfree(sgbuf);
+	dmab->private_data = NULL;
+}
+
+#define MAX_ALLOC_PAGES		32
+
+static void *snd_dma_sg_alloc(struct snd_dma_buffer *dmab, size_t size)
+{
+	struct snd_sg_buf *sgbuf;
+	unsigned int i, pages, chunk, maxpages;
+	struct snd_dma_buffer tmpb;
+	struct snd_sg_page *table;
+	struct page **pgtable;
+	int type = SNDRV_DMA_TYPE_DEV;
+	pgprot_t prot = PAGE_KERNEL;
+	void *area;
+
+	dmab->private_data = sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL);
+	if (!sgbuf)
+		return NULL;
+	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) {
+		type = SNDRV_DMA_TYPE_DEV_WC;
+#ifdef pgprot_noncached
+		prot = pgprot_noncached(PAGE_KERNEL);
+#endif
+	}
+	sgbuf->dev = dmab->dev.dev;
+	pages = snd_sgbuf_aligned_pages(size);
+	sgbuf->tblsize = sgbuf_align_table(pages);
+	table = kcalloc(sgbuf->tblsize, sizeof(*table), GFP_KERNEL);
+	if (!table)
+		goto _failed;
+	sgbuf->table = table;
+	pgtable = kcalloc(sgbuf->tblsize, sizeof(*pgtable), GFP_KERNEL);
+	if (!pgtable)
+		goto _failed;
+	sgbuf->page_table = pgtable;
+
+	/* allocate pages */
+	maxpages = MAX_ALLOC_PAGES;
+	while (pages > 0) {
+		chunk = pages;
+		/* don't be too eager to take a huge chunk */
+		if (chunk > maxpages)
+			chunk = maxpages;
+		chunk <<= PAGE_SHIFT;
+		if (snd_dma_alloc_pages_fallback(type, dmab->dev.dev,
+						 chunk, &tmpb) < 0) {
+			if (!sgbuf->pages)
+				goto _failed;
+			size = sgbuf->pages * PAGE_SIZE;
+			break;
+		}
+		chunk = tmpb.bytes >> PAGE_SHIFT;
+		for (i = 0; i < chunk; i++) {
+			table->buf = tmpb.area;
+			table->addr = tmpb.addr;
+			if (!i)
+				table->addr |= chunk; /* mark head */
+			table++;
+			*pgtable++ = virt_to_page(tmpb.area);
+			tmpb.area += PAGE_SIZE;
+			tmpb.addr += PAGE_SIZE;
+		}
+		sgbuf->pages += chunk;
+		pages -= chunk;
+		if (chunk < maxpages)
+			maxpages = chunk;
+	}
+
+	sgbuf->size = size;
+	area = vmap(sgbuf->page_table, sgbuf->pages, VM_MAP, prot);
+	if (!area)
+		goto _failed;
+	return area;
+
+ _failed:
+	snd_dma_sg_free(dmab); /* free the table */
+	return NULL;
+}
+
+static dma_addr_t snd_dma_sg_get_addr(struct snd_dma_buffer *dmab,
+				      size_t offset)
+{
+	struct snd_sg_buf *sgbuf = dmab->private_data;
+	dma_addr_t addr;
+
+	addr = sgbuf->table[offset >> PAGE_SHIFT].addr;
+	addr &= ~((dma_addr_t)PAGE_SIZE - 1);
+	return addr + offset % PAGE_SIZE;
+}
+
+static struct page *snd_dma_sg_get_page(struct snd_dma_buffer *dmab,
+					size_t offset)
+{
+	struct snd_sg_buf *sgbuf = dmab->private_data;
+	unsigned int idx = offset >> PAGE_SHIFT;
+
+	if (idx >= (unsigned int)sgbuf->pages)
+		return NULL;
+	return sgbuf->page_table[idx];
+}
+
+static unsigned int snd_dma_sg_get_chunk_size(struct snd_dma_buffer *dmab,
+					      unsigned int ofs,
+					      unsigned int size)
+{
+	struct snd_sg_buf *sg = dmab->private_data;
+	unsigned int start, end, pg;
+
+	start = ofs >> PAGE_SHIFT;
+	end = (ofs + size - 1) >> PAGE_SHIFT;
+	/* check page continuity */
+	pg = sg->table[start].addr >> PAGE_SHIFT;
+	for (;;) {
+		start++;
+		if (start > end)
+			break;
+		pg++;
+		if ((sg->table[start].addr >> PAGE_SHIFT) != pg)
+			return (start << PAGE_SHIFT) - ofs;
+	}
+	/* ok, all on continuous pages */
+	return size;
+}
+
+static int snd_dma_sg_mmap(struct snd_dma_buffer *dmab,
+			   struct vm_area_struct *area)
+{
+	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
+		area->vm_page_prot = pgprot_writecombine(area->vm_page_prot);
+	return -ENOENT; /* continue with the default mmap handler */
+}
+
+const struct snd_malloc_ops snd_dma_sg_ops = {
+	.alloc = snd_dma_sg_alloc,
+	.free = snd_dma_sg_free,
+	.get_addr = snd_dma_sg_get_addr,
+	.get_page = snd_dma_sg_get_page,
+	.get_chunk_size = snd_dma_sg_get_chunk_size,
+	.mmap = snd_dma_sg_mmap,
+};
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 92b7008..b3214ba 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -624,13 +624,13 @@
 	if (!timer)
 		return -EINVAL;
 	spin_lock_irqsave(&timer->lock, flags);
+	list_del_init(&timeri->ack_list);
+	list_del_init(&timeri->active_list);
 	if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING |
 			       SNDRV_TIMER_IFLG_START))) {
 		result = -EBUSY;
 		goto unlock;
 	}
-	list_del_init(&timeri->ack_list);
-	list_del_init(&timeri->active_list);
 	if (timer->card && timer->card->shutdown)
 		goto unlock;
 	if (stop) {
@@ -665,23 +665,22 @@
 static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop)
 {
 	unsigned long flags;
+	bool running;
 
 	spin_lock_irqsave(&slave_active_lock, flags);
-	if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) {
-		spin_unlock_irqrestore(&slave_active_lock, flags);
-		return -EBUSY;
-	}
+	running = timeri->flags & SNDRV_TIMER_IFLG_RUNNING;
 	timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING;
 	if (timeri->timer) {
 		spin_lock(&timeri->timer->lock);
 		list_del_init(&timeri->ack_list);
 		list_del_init(&timeri->active_list);
-		snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
-				  SNDRV_TIMER_EVENT_PAUSE);
+		if (running)
+			snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
+					  SNDRV_TIMER_EVENT_PAUSE);
 		spin_unlock(&timeri->timer->lock);
 	}
 	spin_unlock_irqrestore(&slave_active_lock, flags);
-	return 0;
+	return running ? 0 : -EBUSY;
 }
 
 /*
diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig
index fd109be..22b6c77 100644
--- a/sound/firewire/Kconfig
+++ b/sound/firewire/Kconfig
@@ -169,6 +169,7 @@
 	  * 828
 	  * 896
 	  * 828mk2
+	  * 896hd
 	  * Traveler
 	  * Ultralite
 	  * 8pre
@@ -176,7 +177,9 @@
 	  * 828mk3 (Hybrid)
 	  * Ultralite mk3 (FireWire only)
 	  * Ultralite mk3 (Hybrid)
+	  * Traveler mk3
 	  * Audio Express
+	  * Track 16
 	  * 4pre
 
 	 To compile this driver as a module, choose M here: the module
diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c
index ac66f08..53dbd4d 100644
--- a/sound/firewire/fireworks/fireworks_stream.c
+++ b/sound/firewire/fireworks/fireworks_stream.c
@@ -50,8 +50,9 @@
 		     efw->firmware_version == 0x5070300 ||
 		     efw->firmware_version == 0x5080000))
 			efw->tx_stream.flags |= CIP_UNALIGHED_DBC;
-		// AudioFire9 always reports wrong dbs.
-		if (efw->is_af9)
+		// AudioFire9 always reports wrong dbs. Onyx 1200F with the latest firmware (v4.6.0)
+		// also report wrong dbs at 88.2 kHz or greater.
+		if (efw->is_af9 || efw->firmware_version == 0x4060000)
 			efw->tx_stream.flags |= CIP_WRONG_DBS;
 		// Firmware version 5.5 reports fixed interval for dbc.
 		if (efw->firmware_version == 0x5050000)
diff --git a/sound/firewire/motu/motu-protocol-v3.c b/sound/firewire/motu/motu-protocol-v3.c
index 05608e8..8a04269 100644
--- a/sound/firewire/motu/motu-protocol-v3.c
+++ b/sound/firewire/motu/motu-protocol-v3.c
@@ -16,6 +16,7 @@
 #define   V3_CLOCK_SRC_INTERNAL		0x00
 #define   V3_CLOCK_SRC_WORD_ON_BNC	0x01
 #define   V3_CLOCK_SRC_SPH		0x02
+#define   V3_CLOCK_SRC_AESEBU_ON_XLR	0x08
 #define   V3_CLOCK_SRC_SPDIF_ON_COAX	0x10
 #define   V3_CLOCK_SRC_OPT_IFACE_A	0x18
 #define   V3_CLOCK_SRC_OPT_IFACE_B	0x19
@@ -126,6 +127,9 @@
 	case V3_CLOCK_SRC_SPH:
 		*src = SND_MOTU_CLOCK_SOURCE_SPH;
 		break;
+	case V3_CLOCK_SRC_AESEBU_ON_XLR:
+		*src = SND_MOTU_CLOCK_SOURCE_AESEBU_ON_XLR;
+		break;
 	case V3_CLOCK_SRC_SPDIF_ON_COAX:
 		*src = SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX;
 		break;
@@ -185,7 +189,7 @@
 					  sizeof(reg));
 }
 
-static int detect_packet_formats_828mk3(struct snd_motu *motu, u32 data)
+static int detect_packet_formats_with_opt_ifaces(struct snd_motu *motu, u32 data)
 {
 	if (data & V3_ENABLE_OPT_IN_IFACE_A) {
 		if (data & V3_NO_ADAT_OPT_IN_IFACE_A) {
@@ -255,8 +259,11 @@
 	       motu->spec->rx_fixed_pcm_chunks,
 	       sizeof(motu->rx_packet_formats.pcm_chunks));
 
-	if (motu->spec == &snd_motu_spec_828mk3_fw || motu->spec == &snd_motu_spec_828mk3_hybrid)
-		return detect_packet_formats_828mk3(motu, data);
+	if (motu->spec == &snd_motu_spec_828mk3_fw ||
+	    motu->spec == &snd_motu_spec_828mk3_hybrid ||
+	    motu->spec == &snd_motu_spec_traveler_mk3 ||
+	    motu->spec == &snd_motu_spec_track16)
+		return detect_packet_formats_with_opt_ifaces(motu, data);
 	else
 		return 0;
 }
@@ -281,6 +288,16 @@
 	.rx_fixed_pcm_chunks = {14, 14, 14},	// Additional 4 dummy chunks at higher rate.
 };
 
+const struct snd_motu_spec snd_motu_spec_traveler_mk3 = {
+	.name = "TravelerMk3",
+	.protocol_version = SND_MOTU_PROTOCOL_V3,
+	.flags = SND_MOTU_SPEC_RX_MIDI_3RD_Q |
+		 SND_MOTU_SPEC_TX_MIDI_3RD_Q |
+		 SND_MOTU_SPEC_COMMAND_DSP,
+	.tx_fixed_pcm_chunks = {18, 14, 10},
+	.rx_fixed_pcm_chunks = {14, 14, 10},
+};
+
 const struct snd_motu_spec snd_motu_spec_ultralite_mk3 = {
 	.name = "UltraLiteMk3",
 	.protocol_version = SND_MOTU_PROTOCOL_V3,
@@ -301,6 +318,16 @@
 	.rx_fixed_pcm_chunks = {10, 10, 0},
 };
 
+const struct snd_motu_spec snd_motu_spec_track16 = {
+	.name = "Track16",
+	.protocol_version = SND_MOTU_PROTOCOL_V3,
+	.flags = SND_MOTU_SPEC_RX_MIDI_3RD_Q |
+		 SND_MOTU_SPEC_TX_MIDI_3RD_Q |
+		 SND_MOTU_SPEC_COMMAND_DSP,
+	.tx_fixed_pcm_chunks = {14, 14, 14},
+	.rx_fixed_pcm_chunks = {6, 6, 6},
+};
+
 const struct snd_motu_spec snd_motu_spec_4pre = {
 	.name = "4pre",
 	.protocol_version = SND_MOTU_PROTOCOL_V3,
diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c
index 5fc7ae4..f8b7fe3 100644
--- a/sound/firewire/motu/motu.c
+++ b/sound/firewire/motu/motu.c
@@ -169,9 +169,11 @@
 	SND_MOTU_DEV_ENTRY(0x00000f, &snd_motu_spec_8pre),
 	SND_MOTU_DEV_ENTRY(0x000015, &snd_motu_spec_828mk3_fw), // FireWire only.
 	SND_MOTU_DEV_ENTRY(0x000019, &snd_motu_spec_ultralite_mk3), // FireWire only.
+	SND_MOTU_DEV_ENTRY(0x00001b, &snd_motu_spec_traveler_mk3),
 	SND_MOTU_DEV_ENTRY(0x000030, &snd_motu_spec_ultralite_mk3), // Hybrid.
 	SND_MOTU_DEV_ENTRY(0x000035, &snd_motu_spec_828mk3_hybrid), // Hybrid.
 	SND_MOTU_DEV_ENTRY(0x000033, &snd_motu_spec_audio_express),
+	SND_MOTU_DEV_ENTRY(0x000039, &snd_motu_spec_track16),
 	SND_MOTU_DEV_ENTRY(0x000045, &snd_motu_spec_4pre),
 	{ }
 };
diff --git a/sound/firewire/motu/motu.h b/sound/firewire/motu/motu.h
index 79704ae..4189f21 100644
--- a/sound/firewire/motu/motu.h
+++ b/sound/firewire/motu/motu.h
@@ -138,8 +138,10 @@
 
 extern const struct snd_motu_spec snd_motu_spec_828mk3_fw;
 extern const struct snd_motu_spec snd_motu_spec_828mk3_hybrid;
+extern const struct snd_motu_spec snd_motu_spec_traveler_mk3;
 extern const struct snd_motu_spec snd_motu_spec_ultralite_mk3;
 extern const struct snd_motu_spec snd_motu_spec_audio_express;
+extern const struct snd_motu_spec snd_motu_spec_track16;
 extern const struct snd_motu_spec snd_motu_spec_4pre;
 
 int amdtp_motu_init(struct amdtp_stream *s, struct fw_unit *unit,
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index b9ac9e9..4208fa8 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -252,6 +252,11 @@
 		.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
 		.device = 0x02c8,
 	},
+	{
+		.flags = FLAG_SOF,
+		.device = 0x02c8,
+		.codec_hid = "ESSX8336",
+	},
 /* Cometlake-H */
 	{
 		.flags = FLAG_SOF,
@@ -276,6 +281,11 @@
 		.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
 		.device = 0x06c8,
 	},
+		{
+		.flags = FLAG_SOF,
+		.device = 0x06c8,
+		.codec_hid = "ESSX8336",
+	},
 #endif
 
 /* Icelake */
@@ -299,6 +309,15 @@
 	},
 #endif
 
+/* JasperLake */
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
+	{
+		.flags = FLAG_SOF,
+		.device = 0x4dc8,
+		.codec_hid = "ESSX8336",
+	},
+#endif
+
 /* Tigerlake */
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
 	{
diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c
index ea20236..9a678b5 100644
--- a/sound/pci/cmipci.c
+++ b/sound/pci/cmipci.c
@@ -3218,7 +3218,6 @@
 {
 	static int dev;
 	struct snd_card *card;
-	struct cmipci *cm;
 	int err;
 
 	if (dev >= SNDRV_CARDS)
@@ -3229,10 +3228,9 @@
 	}
 
 	err = snd_devm_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
-				sizeof(*cm), &card);
+				sizeof(struct cmipci), &card);
 	if (err < 0)
 		return err;
-	cm = card->private_data;
 	
 	switch (pci->device) {
 	case PCI_DEVICE_ID_CMEDIA_CM8738:
diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c
index da6e635..d074727 100644
--- a/sound/pci/ctxfi/ctamixer.c
+++ b/sound/pci/ctxfi/ctamixer.c
@@ -23,16 +23,15 @@
 
 #define BLANK_SLOT		4094
 
-static int amixer_master(struct rsc *rsc)
+static void amixer_master(struct rsc *rsc)
 {
 	rsc->conj = 0;
-	return rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
+	rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
 }
 
-static int amixer_next_conj(struct rsc *rsc)
+static void amixer_next_conj(struct rsc *rsc)
 {
 	rsc->conj++;
-	return container_of(rsc, struct amixer, rsc)->idx[rsc->conj];
 }
 
 static int amixer_index(const struct rsc *rsc)
@@ -331,16 +330,15 @@
 
 /* SUM resource management */
 
-static int sum_master(struct rsc *rsc)
+static void sum_master(struct rsc *rsc)
 {
 	rsc->conj = 0;
-	return rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
+	rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
 }
 
-static int sum_next_conj(struct rsc *rsc)
+static void sum_next_conj(struct rsc *rsc)
 {
 	rsc->conj++;
-	return container_of(rsc, struct sum, rsc)->idx[rsc->conj];
 }
 
 static int sum_index(const struct rsc *rsc)
diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c
index f589da0..7fc7200 100644
--- a/sound/pci/ctxfi/ctdaio.c
+++ b/sound/pci/ctxfi/ctdaio.c
@@ -51,12 +51,12 @@
 	[SPDIFIO] = {.left = 0x05, .right = 0x85},
 };
 
-static int daio_master(struct rsc *rsc)
+static void daio_master(struct rsc *rsc)
 {
 	/* Actually, this is not the resource index of DAIO.
 	 * For DAO, it is the input mapper index. And, for DAI,
 	 * it is the output time-slot index. */
-	return rsc->conj = rsc->idx;
+	rsc->conj = rsc->idx;
 }
 
 static int daio_index(const struct rsc *rsc)
@@ -64,19 +64,19 @@
 	return rsc->conj;
 }
 
-static int daio_out_next_conj(struct rsc *rsc)
+static void daio_out_next_conj(struct rsc *rsc)
 {
-	return rsc->conj += 2;
+	rsc->conj += 2;
 }
 
-static int daio_in_next_conj_20k1(struct rsc *rsc)
+static void daio_in_next_conj_20k1(struct rsc *rsc)
 {
-	return rsc->conj += 0x200;
+	rsc->conj += 0x200;
 }
 
-static int daio_in_next_conj_20k2(struct rsc *rsc)
+static void daio_in_next_conj_20k2(struct rsc *rsc)
 {
-	return rsc->conj += 0x100;
+	rsc->conj += 0x100;
 }
 
 static const struct rsc_ops daio_out_rsc_ops = {
diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c
index 81ad269..be1d3e6 100644
--- a/sound/pci/ctxfi/ctresource.c
+++ b/sound/pci/ctxfi/ctresource.c
@@ -109,18 +109,17 @@
     return (rsc->conj << 4) + offset_in_audio_slot_block[rsc->type];
 }
 
-static int rsc_next_conj(struct rsc *rsc)
+static void rsc_next_conj(struct rsc *rsc)
 {
 	unsigned int i;
 	for (i = 0; (i < 8) && (!(rsc->msr & (0x1 << i))); )
 		i++;
 	rsc->conj += (AUDIO_SLOT_BLOCK_NUM >> i);
-	return rsc->conj;
 }
 
-static int rsc_master(struct rsc *rsc)
+static void rsc_master(struct rsc *rsc)
 {
-	return rsc->conj = rsc->idx;
+	rsc->conj = rsc->idx;
 }
 
 static const struct rsc_ops rsc_generic_ops = {
diff --git a/sound/pci/ctxfi/ctresource.h b/sound/pci/ctxfi/ctresource.h
index fdbfd80..58553bd 100644
--- a/sound/pci/ctxfi/ctresource.h
+++ b/sound/pci/ctxfi/ctresource.h
@@ -39,8 +39,8 @@
 };
 
 struct rsc_ops {
-	int (*master)(struct rsc *rsc);	/* Move to master resource */
-	int (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
+	void (*master)(struct rsc *rsc); /* Move to master resource */
+	void (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
 	int (*index)(const struct rsc *rsc); /* Return the index of resource */
 	/* Return the output slot number */
 	int (*output_slot)(const struct rsc *rsc);
diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c
index bd4697b..4a94b47 100644
--- a/sound/pci/ctxfi/ctsrc.c
+++ b/sound/pci/ctxfi/ctsrc.c
@@ -590,16 +590,15 @@
 
 /* SRCIMP resource manager operations */
 
-static int srcimp_master(struct rsc *rsc)
+static void srcimp_master(struct rsc *rsc)
 {
 	rsc->conj = 0;
-	return rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
+	rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
 }
 
-static int srcimp_next_conj(struct rsc *rsc)
+static void srcimp_next_conj(struct rsc *rsc)
 {
 	rsc->conj++;
-	return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj];
 }
 
 static int srcimp_index(const struct rsc *rsc)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 7762718c..1b46b599 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -335,7 +335,10 @@
 					((pci)->device == 0x0c0c) || \
 					((pci)->device == 0x0d0c) || \
 					((pci)->device == 0x160c) || \
-					((pci)->device == 0x490d))
+					((pci)->device == 0x490d) || \
+					((pci)->device == 0x4f90) || \
+					((pci)->device == 0x4f91) || \
+					((pci)->device == 0x4f92))
 
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
 
@@ -2327,7 +2330,8 @@
 
 out_free:
 	if (err < 0) {
-		azx_free(chip);
+		pci_set_drvdata(pci, NULL);
+		snd_card_free(chip->card);
 		return err;
 	}
 
@@ -2364,6 +2368,7 @@
 		cancel_delayed_work_sync(&hda->probe_work);
 		device_lock(&pci->dev);
 
+		pci_set_drvdata(pci, NULL);
 		snd_card_free(card);
 	}
 }
@@ -2471,6 +2476,13 @@
 	/* DG1 */
 	{ PCI_DEVICE(0x8086, 0x490d),
 	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+	/* DG2 */
+	{ PCI_DEVICE(0x8086, 0x4f90),
+	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+	{ PCI_DEVICE(0x8086, 0x4f91),
+	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+	{ PCI_DEVICE(0x8086, 0x4f92),
+	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
 	/* Alderlake-S */
 	{ PCI_DEVICE(0x8086, 0x7ad0),
 	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index ea8ab8b..d22c96e 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -438,6 +438,15 @@
 #define for_each_hda_codec_node(nid, codec) \
 	for ((nid) = (codec)->core.start_nid; (nid) < (codec)->core.end_nid; (nid)++)
 
+/* Set the codec power_state flag to indicate to allow unsol event handling;
+ * see hda_codec_unsol_event() in hda_bind.c.  Calling this might confuse the
+ * state tracking, so use with care.
+ */
+static inline void snd_hda_codec_allow_unsol_events(struct hda_codec *codec)
+{
+	codec->core.dev.power.power_state = PMSG_ON;
+}
+
 /*
  * get widget capabilities
  */
diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c
index 31ff11a..039b9f2 100644
--- a/sound/pci/hda/patch_cs8409.c
+++ b/sound/pci/hda/patch_cs8409.c
@@ -750,6 +750,11 @@
 	if (cs42l42->full_scale_vol)
 		cs8409_i2c_write(cs42l42, 0x2001, 0x01);
 
+	/* we have to explicitly allow unsol event handling even during the
+	 * resume phase so that the jack event is processed properly
+	 */
+	snd_hda_codec_allow_unsol_events(cs42l42->codec);
+
 	cs42l42_enable_jack_detect(cs42l42);
 }
 
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 65d2c55..415701b 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -4380,10 +4380,11 @@
 HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI",	patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI",	patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI",	patch_i915_tgl_hdmi),
-HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI",	patch_i915_tgl_hdmi),
+HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI",	patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI",	patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI",	patch_i915_icl_hdmi),
+HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",	patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",	patch_i915_byt_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8a3e2fe..9ce7457 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6521,6 +6521,27 @@
 	alc_write_coef_idx(codec, 0x45, 0x5089);
 }
 
+static const struct coef_fw alc233_fixup_no_audio_jack_coefs[] = {
+	WRITE_COEF(0x1a, 0x9003), WRITE_COEF(0x1b, 0x0e2b), WRITE_COEF(0x37, 0xfe06),
+	WRITE_COEF(0x38, 0x4981), WRITE_COEF(0x45, 0xd489), WRITE_COEF(0x46, 0x0074),
+	WRITE_COEF(0x49, 0x0149),
+	{}
+};
+
+static void alc233_fixup_no_audio_jack(struct hda_codec *codec,
+				       const struct hda_fixup *fix,
+				       int action)
+{
+	/*
+	 * The audio jack input and output is not detected on the ASRock NUC Box
+	 * 1100 series when cold booting without this fix. Warm rebooting from a
+	 * certain other OS makes the audio functional, as COEF settings are
+	 * preserved in this case. This fix sets these altered COEF values as
+	 * the default.
+	 */
+	alc_process_coef_fw(codec, alc233_fixup_no_audio_jack_coefs);
+}
+
 enum {
 	ALC269_FIXUP_GPIO2,
 	ALC269_FIXUP_SONY_VAIO,
@@ -6739,6 +6760,8 @@
 	ALC287_FIXUP_YOGA7_14ITL_SPEAKERS,
 	ALC287_FIXUP_13S_GEN2_SPEAKERS,
 	ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS,
+	ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
+	ALC233_FIXUP_NO_AUDIO_JACK,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8450,6 +8473,19 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc245_fixup_hp_gpio_led,
 	},
+	[ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x03a11120 }, /* use as headset mic, without its own jack detect */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
+	},
+	[ALC233_FIXUP_NO_AUDIO_JACK] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc233_fixup_no_audio_jack,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8486,6 +8522,7 @@
 	SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1025, 0x141f, "Acer Spin SP513-54N", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
@@ -8625,8 +8662,10 @@
 	SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8728, "HP EliteBook 840 G7", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+	SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
 	SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED),
@@ -8687,6 +8726,7 @@
 	SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE),
+	SND_PCI_QUIRK(0x1043, 0x1970, "ASUS UX550VE", ALC289_FIXUP_ASUS_GA401),
 	SND_PCI_QUIRK(0x1043, 0x1982, "ASUS B1400CEPE", ALC256_FIXUP_ASUS_HPE),
 	SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE),
 	SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
@@ -8750,11 +8790,15 @@
 	SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x5015, "Clevo NH5[58]H[HJK]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x5017, "Clevo NH7[79]H[HJK]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50a3, "Clevo NJ51GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50b3, "Clevo NK50S[BEZ]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50b6, "Clevo NK50S5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50e1, "Clevo NH5[58]HPQ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50e2, "Clevo NH7[79]HPQ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50f2, "Clevo NH50E[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -8877,6 +8921,7 @@
 	SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
+	SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK),
 	SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
 	SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20),
 	SND_PCI_QUIRK(0x1b35, 0x1236, "CZC TMI", ALC269_FIXUP_CZC_TMI),
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 75aa2ea..96c12df 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -468,8 +468,11 @@
 	unsigned char         ss_out_channels;
 	u32                   io_loopback;          /* output loopback channel states*/
 
-	struct snd_dma_buffer *capture_dma_buf;
-	struct snd_dma_buffer *playback_dma_buf;
+	/* DMA buffers; those are copied instances from the original snd_dma_buf
+	 * objects (which are managed via devres) for the address alignments
+	 */
+	struct snd_dma_buffer capture_dma_buf;
+	struct snd_dma_buffer playback_dma_buf;
 	unsigned char        *capture_buffer;	    /* suitably aligned address */
 	unsigned char        *playback_buffer;	    /* suitably aligned address */
 
@@ -3764,30 +3767,32 @@
 
 static int snd_hdsp_initialize_memory(struct hdsp *hdsp)
 {
-	unsigned long pb_bus, cb_bus;
+	struct snd_dma_buffer *capture_dma, *playback_dma;
 
-	hdsp->capture_dma_buf =
-		snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
-	hdsp->playback_dma_buf =
-		snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
-	if (!hdsp->capture_dma_buf || !hdsp->playback_dma_buf) {
+	capture_dma = snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
+	playback_dma = snd_hammerfall_get_buffer(hdsp->pci, HDSP_DMA_AREA_BYTES);
+	if (!capture_dma || !playback_dma) {
 		dev_err(hdsp->card->dev,
 			"%s: no buffers available\n", hdsp->card_name);
 		return -ENOMEM;
 	}
 
-	/* Align to bus-space 64K boundary */
+	/* copy to the own data for alignment */
+	hdsp->capture_dma_buf = *capture_dma;
+	hdsp->playback_dma_buf = *playback_dma;
 
-	cb_bus = ALIGN(hdsp->capture_dma_buf->addr, 0x10000ul);
-	pb_bus = ALIGN(hdsp->playback_dma_buf->addr, 0x10000ul);
+	/* Align to bus-space 64K boundary */
+	hdsp->capture_dma_buf.addr = ALIGN(capture_dma->addr, 0x10000ul);
+	hdsp->playback_dma_buf.addr = ALIGN(playback_dma->addr, 0x10000ul);
 
 	/* Tell the card where it is */
+	hdsp_write(hdsp, HDSP_inputBufferAddress, hdsp->capture_dma_buf.addr);
+	hdsp_write(hdsp, HDSP_outputBufferAddress, hdsp->playback_dma_buf.addr);
 
-	hdsp_write(hdsp, HDSP_inputBufferAddress, cb_bus);
-	hdsp_write(hdsp, HDSP_outputBufferAddress, pb_bus);
-
-	hdsp->capture_buffer = hdsp->capture_dma_buf->area + (cb_bus - hdsp->capture_dma_buf->addr);
-	hdsp->playback_buffer = hdsp->playback_dma_buf->area + (pb_bus - hdsp->playback_dma_buf->addr);
+	hdsp->capture_dma_buf.area += hdsp->capture_dma_buf.addr - capture_dma->addr;
+	hdsp->playback_dma_buf.area += hdsp->playback_dma_buf.addr - playback_dma->addr;
+	hdsp->capture_buffer = hdsp->capture_dma_buf.area;
+	hdsp->playback_buffer = hdsp->playback_dma_buf.area;
 
 	return 0;
 }
@@ -4507,7 +4512,7 @@
 	snd_pcm_set_sync(substream);
 
         runtime->hw = snd_hdsp_playback_subinfo;
-	snd_pcm_set_runtime_buffer(substream, hdsp->playback_dma_buf);
+	snd_pcm_set_runtime_buffer(substream, &hdsp->playback_dma_buf);
 
 	hdsp->playback_pid = current->pid;
 	hdsp->playback_substream = substream;
@@ -4583,7 +4588,7 @@
 	snd_pcm_set_sync(substream);
 
 	runtime->hw = snd_hdsp_capture_subinfo;
-	snd_pcm_set_runtime_buffer(substream, hdsp->capture_dma_buf);
+	snd_pcm_set_runtime_buffer(substream, &hdsp->capture_dma_buf);
 
 	hdsp->capture_pid = current->pid;
 	hdsp->capture_substream = substream;
diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c
index e76f737..7755e19 100644
--- a/sound/pci/rme9652/rme9652.c
+++ b/sound/pci/rme9652/rme9652.c
@@ -208,8 +208,11 @@
 	unsigned char ds_channels;
 	unsigned char ss_channels;	/* different for hammerfall/hammerfall-light */
 
-	struct snd_dma_buffer *playback_dma_buf;
-	struct snd_dma_buffer *capture_dma_buf;
+	/* DMA buffers; those are copied instances from the original snd_dma_buf
+	 * objects (which are managed via devres) for the address alignments
+	 */
+	struct snd_dma_buffer playback_dma_buf;
+	struct snd_dma_buffer capture_dma_buf;
 
 	unsigned char *capture_buffer;	/* suitably aligned address */
 	unsigned char *playback_buffer;	/* suitably aligned address */
@@ -1719,30 +1722,32 @@
 
 static int snd_rme9652_initialize_memory(struct snd_rme9652 *rme9652)
 {
-	unsigned long pb_bus, cb_bus;
+	struct snd_dma_buffer *capture_dma, *playback_dma;
 
-	rme9652->capture_dma_buf =
-		snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
-	rme9652->playback_dma_buf =
-		snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
-	if (!rme9652->capture_dma_buf || !rme9652->playback_dma_buf) {
+	capture_dma = snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
+	playback_dma = snd_hammerfall_get_buffer(rme9652->pci, RME9652_DMA_AREA_BYTES);
+	if (!capture_dma || !playback_dma) {
 		dev_err(rme9652->card->dev,
 			"%s: no buffers available\n", rme9652->card_name);
 		return -ENOMEM;
 	}
 
-	/* Align to bus-space 64K boundary */
+	/* copy to the own data for alignment */
+	rme9652->capture_dma_buf = *capture_dma;
+	rme9652->playback_dma_buf = *playback_dma;
 
-	cb_bus = ALIGN(rme9652->capture_dma_buf->addr, 0x10000ul);
-	pb_bus = ALIGN(rme9652->playback_dma_buf->addr, 0x10000ul);
+	/* Align to bus-space 64K boundary */
+	rme9652->capture_dma_buf.addr = ALIGN(capture_dma->addr, 0x10000ul);
+	rme9652->playback_dma_buf.addr = ALIGN(playback_dma->addr, 0x10000ul);
 
 	/* Tell the card where it is */
+	rme9652_write(rme9652, RME9652_rec_buffer, rme9652->capture_dma_buf.addr);
+	rme9652_write(rme9652, RME9652_play_buffer, rme9652->playback_dma_buf.addr);
 
-	rme9652_write(rme9652, RME9652_rec_buffer, cb_bus);
-	rme9652_write(rme9652, RME9652_play_buffer, pb_bus);
-
-	rme9652->capture_buffer = rme9652->capture_dma_buf->area + (cb_bus - rme9652->capture_dma_buf->addr);
-	rme9652->playback_buffer = rme9652->playback_dma_buf->area + (pb_bus - rme9652->playback_dma_buf->addr);
+	rme9652->capture_dma_buf.area += rme9652->capture_dma_buf.addr - capture_dma->addr;
+	rme9652->playback_dma_buf.area += rme9652->playback_dma_buf.addr - playback_dma->addr;
+	rme9652->capture_buffer = rme9652->capture_dma_buf.area;
+	rme9652->playback_buffer = rme9652->playback_dma_buf.area;
 
 	return 0;
 }
@@ -2259,7 +2264,7 @@
 	snd_pcm_set_sync(substream);
 
         runtime->hw = snd_rme9652_playback_subinfo;
-	snd_pcm_set_runtime_buffer(substream, rme9652->playback_dma_buf);
+	snd_pcm_set_runtime_buffer(substream, &rme9652->playback_dma_buf);
 
 	if (rme9652->capture_substream == NULL) {
 		rme9652_stop(rme9652);
@@ -2318,7 +2323,7 @@
 	snd_pcm_set_sync(substream);
 
 	runtime->hw = snd_rme9652_capture_subinfo;
-	snd_pcm_set_runtime_buffer(substream, rme9652->capture_dma_buf);
+	snd_pcm_set_runtime_buffer(substream, &rme9652->capture_dma_buf);
 
 	if (rme9652->playback_substream == NULL) {
 		rme9652_stop(rme9652);
diff --git a/sound/soc/codecs/cs35l41-spi.c b/sound/soc/codecs/cs35l41-spi.c
index 90a921f..3fa9974 100644
--- a/sound/soc/codecs/cs35l41-spi.c
+++ b/sound/soc/codecs/cs35l41-spi.c
@@ -42,34 +42,6 @@
 
 MODULE_DEVICE_TABLE(spi, cs35l41_id_spi);
 
-static void cs35l41_spi_otp_setup(struct cs35l41_private *cs35l41,
-				  bool is_pre_setup, unsigned int *freq)
-{
-	struct spi_device *spi;
-	u32 orig_spi_freq;
-
-	spi = to_spi_device(cs35l41->dev);
-
-	if (!spi) {
-		dev_err(cs35l41->dev, "%s: No SPI device\n", __func__);
-		return;
-	}
-
-	if (is_pre_setup) {
-		orig_spi_freq = spi->max_speed_hz;
-		if (orig_spi_freq > CS35L41_SPI_MAX_FREQ_OTP) {
-			spi->max_speed_hz = CS35L41_SPI_MAX_FREQ_OTP;
-			spi_setup(spi);
-		}
-		*freq = orig_spi_freq;
-	} else {
-		if (spi->max_speed_hz != *freq) {
-			spi->max_speed_hz = *freq;
-			spi_setup(spi);
-		}
-	}
-}
-
 static int cs35l41_spi_probe(struct spi_device *spi)
 {
 	const struct regmap_config *regmap_config = &cs35l41_regmap_spi;
@@ -81,6 +53,9 @@
 	if (!cs35l41)
 		return -ENOMEM;
 
+	spi->max_speed_hz = CS35L41_SPI_MAX_FREQ;
+	spi_setup(spi);
+
 	spi_set_drvdata(spi, cs35l41);
 	cs35l41->regmap = devm_regmap_init_spi(spi, regmap_config);
 	if (IS_ERR(cs35l41->regmap)) {
@@ -91,7 +66,6 @@
 
 	cs35l41->dev = &spi->dev;
 	cs35l41->irq = spi->irq;
-	cs35l41->otp_setup = cs35l41_spi_otp_setup;
 
 	return cs35l41_probe(cs35l41, pdata);
 }
diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c
index 94ed21d..9c4d481 100644
--- a/sound/soc/codecs/cs35l41.c
+++ b/sound/soc/codecs/cs35l41.c
@@ -302,7 +302,6 @@
 	const struct cs35l41_otp_packed_element_t *otp_map;
 	struct cs35l41_private *cs35l41 = data;
 	int bit_offset, word_offset, ret, i;
-	unsigned int orig_spi_freq;
 	unsigned int bit_sum = 8;
 	u32 otp_val, otp_id_reg;
 	u32 *otp_mem;
@@ -326,9 +325,6 @@
 		goto err_otp_unpack;
 	}
 
-	if (cs35l41->otp_setup)
-		cs35l41->otp_setup(cs35l41, true, &orig_spi_freq);
-
 	ret = regmap_bulk_read(cs35l41->regmap, CS35L41_OTP_MEM0, otp_mem,
 			       CS35L41_OTP_SIZE_WORDS);
 	if (ret < 0) {
@@ -336,9 +332,6 @@
 		goto err_otp_unpack;
 	}
 
-	if (cs35l41->otp_setup)
-		cs35l41->otp_setup(cs35l41, false, &orig_spi_freq);
-
 	otp_map = otp_map_match->map;
 
 	bit_offset = otp_map_match->bit_offset;
@@ -612,6 +605,12 @@
 	SND_SOC_DAPM_AIF_OUT("ASPTX3", NULL, 0, CS35L41_SP_ENABLES, 2, 0),
 	SND_SOC_DAPM_AIF_OUT("ASPTX4", NULL, 0, CS35L41_SP_ENABLES, 3, 0),
 
+	SND_SOC_DAPM_SIGGEN("VSENSE"),
+	SND_SOC_DAPM_SIGGEN("ISENSE"),
+	SND_SOC_DAPM_SIGGEN("VP"),
+	SND_SOC_DAPM_SIGGEN("VBST"),
+	SND_SOC_DAPM_SIGGEN("TEMP"),
+
 	SND_SOC_DAPM_ADC("VMON ADC", NULL, CS35L41_PWR_CTRL2, 12, 0),
 	SND_SOC_DAPM_ADC("IMON ADC", NULL, CS35L41_PWR_CTRL2, 13, 0),
 	SND_SOC_DAPM_ADC("VPMON ADC", NULL, CS35L41_PWR_CTRL2, 8, 0),
@@ -623,12 +622,6 @@
 			       cs35l41_main_amp_event,
 			       SND_SOC_DAPM_POST_PMD |	SND_SOC_DAPM_POST_PMU),
 
-	SND_SOC_DAPM_INPUT("VP"),
-	SND_SOC_DAPM_INPUT("VBST"),
-	SND_SOC_DAPM_INPUT("ISENSE"),
-	SND_SOC_DAPM_INPUT("VSENSE"),
-	SND_SOC_DAPM_INPUT("TEMP"),
-
 	SND_SOC_DAPM_MUX("ASP TX1 Source", SND_SOC_NOPM, 0, 0, &asp_tx1_mux),
 	SND_SOC_DAPM_MUX("ASP TX2 Source", SND_SOC_NOPM, 0, 0, &asp_tx2_mux),
 	SND_SOC_DAPM_MUX("ASP TX3 Source", SND_SOC_NOPM, 0, 0, &asp_tx3_mux),
@@ -674,8 +667,8 @@
 	{"VMON ADC", NULL, "VSENSE"},
 	{"IMON ADC", NULL, "ISENSE"},
 	{"VPMON ADC", NULL, "VP"},
-	{"TEMPMON ADC", NULL, "TEMP"},
 	{"VBSTMON ADC", NULL, "VBST"},
+	{"TEMPMON ADC", NULL, "TEMP"},
 
 	{"ASPRX1", NULL, "AMP Playback"},
 	{"ASPRX2", NULL, "AMP Playback"},
diff --git a/sound/soc/codecs/cs35l41.h b/sound/soc/codecs/cs35l41.h
index 6cffe8a..48485b0 100644
--- a/sound/soc/codecs/cs35l41.h
+++ b/sound/soc/codecs/cs35l41.h
@@ -726,7 +726,7 @@
 #define CS35L41_FS2_WINDOW_MASK		0x00FFF800
 #define CS35L41_FS2_WINDOW_SHIFT	12
 
-#define CS35L41_SPI_MAX_FREQ_OTP	4000000
+#define CS35L41_SPI_MAX_FREQ		4000000
 
 #define CS35L41_RX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
 #define CS35L41_TX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
@@ -764,8 +764,6 @@
 	int irq;
 	/* GPIO for /RST */
 	struct gpio_desc *reset_gpio;
-	void (*otp_setup)(struct cs35l41_private *cs35l41, bool is_pre_setup,
-			  unsigned int *freq);
 };
 
 int cs35l41_probe(struct cs35l41_private *cs35l41,
diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
index 2bed5cf..aec51272 100644
--- a/sound/soc/codecs/lpass-rx-macro.c
+++ b/sound/soc/codecs/lpass-rx-macro.c
@@ -2188,7 +2188,7 @@
 		snd_soc_component_update_bits(component,
 				CDC_RX_CLSH_DECAY_CTRL,
 				CDC_RX_CLSH_DECAY_RATE_MASK, 0x0);
-		snd_soc_component_update_bits(component,
+		snd_soc_component_write_field(component,
 				CDC_RX_RX1_RX_PATH_CFG0,
 				CDC_RX_RXn_CLSH_EN_MASK, 0x1);
 		break;
diff --git a/sound/soc/codecs/rk817_codec.c b/sound/soc/codecs/rk817_codec.c
index 943d7d9..03f24edf 100644
--- a/sound/soc/codecs/rk817_codec.c
+++ b/sound/soc/codecs/rk817_codec.c
@@ -539,3 +539,4 @@
 MODULE_DESCRIPTION("ASoC RK817 codec driver");
 MODULE_AUTHOR("binyuan <kevan.lan@rock-chips.com>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:rk817-codec");
diff --git a/sound/soc/codecs/rt1011.c b/sound/soc/codecs/rt1011.c
index 297af7f..b62301a 100644
--- a/sound/soc/codecs/rt1011.c
+++ b/sound/soc/codecs/rt1011.c
@@ -1311,13 +1311,54 @@
 	.put = rt1011_r0_load_mode_put \
 }
 
-static const char * const rt1011_i2s_ref_texts[] = {
-	"Left Channel", "Right Channel"
+static const char * const rt1011_i2s_ref[] = {
+	"None", "Left Channel", "Right Channel"
 };
 
-static SOC_ENUM_SINGLE_DECL(rt1011_i2s_ref_enum,
-			    RT1011_TDM1_SET_1, 7,
-			    rt1011_i2s_ref_texts);
+static SOC_ENUM_SINGLE_DECL(rt1011_i2s_ref_enum, 0, 0,
+	rt1011_i2s_ref);
+
+static int rt1011_i2s_ref_put(struct snd_kcontrol *kcontrol,
+		struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component =
+		snd_soc_kcontrol_component(kcontrol);
+	struct rt1011_priv *rt1011 =
+		snd_soc_component_get_drvdata(component);
+
+	rt1011->i2s_ref = ucontrol->value.enumerated.item[0];
+	switch (rt1011->i2s_ref) {
+	case RT1011_I2S_REF_LEFT_CH:
+		regmap_write(rt1011->regmap, RT1011_TDM_TOTAL_SET, 0x0240);
+		regmap_write(rt1011->regmap, RT1011_TDM1_SET_2, 0x8);
+		regmap_write(rt1011->regmap, RT1011_TDM1_SET_1, 0x1022);
+		regmap_write(rt1011->regmap, RT1011_ADCDAT_OUT_SOURCE, 0x4);
+		break;
+	case RT1011_I2S_REF_RIGHT_CH:
+		regmap_write(rt1011->regmap, RT1011_TDM_TOTAL_SET, 0x0240);
+		regmap_write(rt1011->regmap, RT1011_TDM1_SET_2, 0x8);
+		regmap_write(rt1011->regmap, RT1011_TDM1_SET_1, 0x10a2);
+		regmap_write(rt1011->regmap, RT1011_ADCDAT_OUT_SOURCE, 0x4);
+		break;
+	default:
+		dev_info(component->dev, "I2S Reference: Do nothing\n");
+	}
+
+	return 0;
+}
+
+static int rt1011_i2s_ref_get(struct snd_kcontrol *kcontrol,
+		struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component =
+		snd_soc_kcontrol_component(kcontrol);
+	struct rt1011_priv *rt1011 =
+		snd_soc_component_get_drvdata(component);
+
+	ucontrol->value.enumerated.item[0] = rt1011->i2s_ref;
+
+	return 0;
+}
 
 static const struct snd_kcontrol_new rt1011_snd_controls[] = {
 	/* I2S Data In Selection */
@@ -1358,7 +1399,8 @@
 	SOC_SINGLE("R0 Temperature", RT1011_STP_INITIAL_RESISTANCE_TEMP,
 		2, 255, 0),
 	/* I2S Reference */
-	SOC_ENUM("I2S Reference", rt1011_i2s_ref_enum),
+	SOC_ENUM_EXT("I2S Reference", rt1011_i2s_ref_enum,
+		rt1011_i2s_ref_get, rt1011_i2s_ref_put),
 };
 
 static int rt1011_is_sys_clk_from_pll(struct snd_soc_dapm_widget *source,
@@ -2017,6 +2059,7 @@
 
 	schedule_work(&rt1011->cali_work);
 
+	rt1011->i2s_ref = 0;
 	rt1011->bq_drc_params = devm_kcalloc(component->dev,
 		RT1011_ADVMODE_NUM, sizeof(struct rt1011_bq_drc_params *),
 		GFP_KERNEL);
diff --git a/sound/soc/codecs/rt1011.h b/sound/soc/codecs/rt1011.h
index 68fadc1..4d6e749 100644
--- a/sound/soc/codecs/rt1011.h
+++ b/sound/soc/codecs/rt1011.h
@@ -654,6 +654,12 @@
 	RT1011_AIFS
 };
 
+enum {
+	RT1011_I2S_REF_NONE,
+	RT1011_I2S_REF_LEFT_CH,
+	RT1011_I2S_REF_RIGHT_CH,
+};
+
 /* BiQual & DRC related settings */
 #define RT1011_BQ_DRC_NUM 128
 struct rt1011_bq_drc_params {
@@ -692,6 +698,7 @@
 	unsigned int r0_reg, cali_done;
 	unsigned int r0_calib, temperature_calib;
 	int recv_spk_mode;
+	int i2s_ref;
 };
 
 #endif		/* end of _RT1011_H_ */
diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c
index 983347b..20e0f90 100644
--- a/sound/soc/codecs/rt5682-i2c.c
+++ b/sound/soc/codecs/rt5682-i2c.c
@@ -198,6 +198,7 @@
 	}
 
 	mutex_init(&rt5682->calibrate_mutex);
+	mutex_init(&rt5682->jdet_mutex);
 	rt5682_calibrate(rt5682);
 
 	rt5682_apply_patch_list(rt5682, &i2c->dev);
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index 78b4cb5..04cb747 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -48,6 +48,8 @@
 	{RT5682_SAR_IL_CMD_6, 0x0110},
 	{RT5682_CHARGE_PUMP_1, 0x0210},
 	{RT5682_HP_LOGIC_CTRL_2, 0x0007},
+	{RT5682_SAR_IL_CMD_2, 0xac00},
+	{RT5682_CBJ_CTRL_7, 0x0104},
 };
 
 void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev)
@@ -940,6 +942,10 @@
 		snd_soc_component_update_bits(component,
 			RT5682_HP_CHARGE_PUMP_1,
 			RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0);
+		rt5682_enable_push_button_irq(component, false);
+		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+			RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
+		usleep_range(55000, 60000);
 		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
 			RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH);
 
@@ -1092,6 +1098,7 @@
 	while (!rt5682->component->card->instantiated)
 		usleep_range(10000, 15000);
 
+	mutex_lock(&rt5682->jdet_mutex);
 	mutex_lock(&rt5682->calibrate_mutex);
 
 	val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
@@ -1165,6 +1172,7 @@
 	}
 
 	mutex_unlock(&rt5682->calibrate_mutex);
+	mutex_unlock(&rt5682->jdet_mutex);
 }
 EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler);
 
@@ -1514,6 +1522,7 @@
 {
 	struct snd_soc_component *component =
 		snd_soc_dapm_to_component(w->dapm);
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
 
 	switch (event) {
 	case SND_SOC_DAPM_PRE_PMU:
@@ -1525,12 +1534,17 @@
 			RT5682_DEPOP_1, 0x60, 0x60);
 		snd_soc_component_update_bits(component,
 			RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080);
+
+		mutex_lock(&rt5682->jdet_mutex);
+
 		snd_soc_component_update_bits(component, RT5682_HP_CTRL_2,
 			RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN,
 			RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN);
 		usleep_range(5000, 10000);
 		snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1,
 			RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L);
+
+		mutex_unlock(&rt5682->jdet_mutex);
 		break;
 
 	case SND_SOC_DAPM_POST_PMD:
@@ -2942,10 +2956,7 @@
 
 	cancel_delayed_work_sync(&rt5682->jack_detect_work);
 	cancel_delayed_work_sync(&rt5682->jd_check_work);
-	if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) {
-		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
-			RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
-			RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG);
+	if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) {
 		val = snd_soc_component_read(component,
 				RT5682_CBJ_CTRL_2) & RT5682_JACK_TYPE_MASK;
 
@@ -2967,10 +2978,17 @@
 		/* enter SAR ADC power saving mode */
 		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
 			RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK |
-			RT5682_SAR_BUTDET_RST_MASK | RT5682_SAR_SEL_MB1_MB2_MASK, 0);
+			RT5682_SAR_SEL_MB1_MB2_MASK, 0);
+		usleep_range(5000, 6000);
+		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+			RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
+			RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG);
+		usleep_range(10000, 12000);
 		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
-			RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_BUTDET_RST_MASK,
-			RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV | RT5682_SAR_BUTDET_RST_NORMAL);
+			RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK,
+			RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV);
+		snd_soc_component_update_bits(component, RT5682_HP_CHARGE_PUMP_1,
+			RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0);
 	}
 
 	regcache_cache_only(rt5682->regmap, true);
@@ -2988,10 +3006,11 @@
 	regcache_cache_only(rt5682->regmap, false);
 	regcache_sync(rt5682->regmap);
 
-	if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) {
+	if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) {
 		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
 			RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_SEL_MB1_MB2_MASK,
 			RT5682_SAR_BUTDET_POW_NORM | RT5682_SAR_SEL_MB1_MB2_AUTO);
+		usleep_range(5000, 6000);
 		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
 			RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
 			RT5682_CTRL_MB1_FSM | RT5682_CTRL_MB2_FSM);
@@ -2999,8 +3018,9 @@
 			RT5682_PWR_CBJ, RT5682_PWR_CBJ);
 	}
 
+	rt5682->jack_type = 0;
 	mod_delayed_work(system_power_efficient_wq,
-		&rt5682->jack_detect_work, msecs_to_jiffies(250));
+		&rt5682->jack_detect_work, msecs_to_jiffies(0));
 
 	return 0;
 }
diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h
index d93829c..c917c76 100644
--- a/sound/soc/codecs/rt5682.h
+++ b/sound/soc/codecs/rt5682.h
@@ -1463,6 +1463,7 @@
 
 	int jack_type;
 	int irq_work_delay_time;
+	struct mutex jdet_mutex;
 };
 
 extern const char *rt5682_supply_names[RT5682_NUM_SUPPLIES];
diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c
index f957498..7aa1772 100644
--- a/sound/soc/codecs/rt9120.c
+++ b/sound/soc/codecs/rt9120.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bits.h>
+#include <linux/bitfield.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
@@ -23,9 +24,11 @@
 #define RT9120_REG_ERRRPT	0x10
 #define RT9120_REG_MSVOL	0x20
 #define RT9120_REG_SWRESET	0x40
+#define RT9120_REG_INTERCFG	0x63
 #define RT9120_REG_INTERNAL0	0x65
 #define RT9120_REG_INTERNAL1	0x69
 #define RT9120_REG_UVPOPT	0x6C
+#define RT9120_REG_DIGCFG	0xF8
 
 #define RT9120_VID_MASK		GENMASK(15, 8)
 #define RT9120_SWRST_MASK	BIT(7)
@@ -46,8 +49,10 @@
 #define RT9120_CFG_WORDLEN_24	24
 #define RT9120_CFG_WORDLEN_32	32
 #define RT9120_DVDD_UVSEL_MASK	GENMASK(5, 4)
+#define RT9120_AUTOSYNC_MASK	BIT(6)
 
-#define RT9120_VENDOR_ID	0x4200
+#define RT9120_VENDOR_ID	0x42
+#define RT9120S_VENDOR_ID	0x43
 #define RT9120_RESET_WAITMS	20
 #define RT9120_CHIPON_WAITMS	20
 #define RT9120_AMPON_WAITMS	50
@@ -61,9 +66,16 @@
 				 SNDRV_PCM_FMTBIT_S24_LE |\
 				 SNDRV_PCM_FMTBIT_S32_LE)
 
+enum {
+	CHIP_IDX_RT9120 = 0,
+	CHIP_IDX_RT9120S,
+	CHIP_IDX_MAX
+};
+
 struct rt9120_data {
 	struct device *dev;
 	struct regmap *regmap;
+	int chip_idx;
 };
 
 /* 11bit [min,max,step] = [-103.9375dB, 24dB, 0.0625dB] */
@@ -149,8 +161,12 @@
 	snd_soc_component_init_regmap(comp, data->regmap);
 
 	/* Internal setting */
-	snd_soc_component_write(comp, RT9120_REG_INTERNAL1, 0x03);
-	snd_soc_component_write(comp, RT9120_REG_INTERNAL0, 0x69);
+	if (data->chip_idx == CHIP_IDX_RT9120S) {
+		snd_soc_component_write(comp, RT9120_REG_INTERCFG, 0xde);
+		snd_soc_component_write(comp, RT9120_REG_INTERNAL0, 0x66);
+	} else
+		snd_soc_component_write(comp, RT9120_REG_INTERNAL0, 0x04);
+
 	return 0;
 }
 
@@ -201,8 +217,8 @@
 			    struct snd_soc_dai *dai)
 {
 	struct snd_soc_component *comp = dai->component;
-	unsigned int param_width, param_slot_width;
-	int width;
+	unsigned int param_width, param_slot_width, auto_sync;
+	int width, fs;
 
 	switch (width = params_width(param)) {
 	case 16:
@@ -240,6 +256,16 @@
 
 	snd_soc_component_update_bits(comp, RT9120_REG_I2SWL,
 				      RT9120_AUDWL_MASK, param_slot_width);
+
+	fs = width * params_channels(param);
+	/* If fs is divided by 48, disable auto sync */
+	if (fs % 48 == 0)
+		auto_sync = 0;
+	else
+		auto_sync = RT9120_AUTOSYNC_MASK;
+
+	snd_soc_component_update_bits(comp, RT9120_REG_DIGCFG,
+				      RT9120_AUTOSYNC_MASK, auto_sync);
 	return 0;
 }
 
@@ -279,9 +305,11 @@
 	regmap_reg_range(0x20, 0x27),
 	regmap_reg_range(0x30, 0x38),
 	regmap_reg_range(0x3A, 0x40),
+	regmap_reg_range(0x63, 0x63),
 	regmap_reg_range(0x65, 0x65),
 	regmap_reg_range(0x69, 0x69),
-	regmap_reg_range(0x6C, 0x6C)
+	regmap_reg_range(0x6C, 0x6C),
+	regmap_reg_range(0xF8, 0xF8)
 };
 
 static const struct regmap_access_table rt9120_rd_table = {
@@ -297,9 +325,11 @@
 	regmap_reg_range(0x30, 0x38),
 	regmap_reg_range(0x3A, 0x3D),
 	regmap_reg_range(0x40, 0x40),
+	regmap_reg_range(0x63, 0x63),
 	regmap_reg_range(0x65, 0x65),
 	regmap_reg_range(0x69, 0x69),
-	regmap_reg_range(0x6C, 0x6C)
+	regmap_reg_range(0x6C, 0x6C),
+	regmap_reg_range(0xF8, 0xF8)
 };
 
 static const struct regmap_access_table rt9120_wr_table = {
@@ -370,7 +400,7 @@
 static const struct regmap_config rt9120_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 32,
-	.max_register = RT9120_REG_UVPOPT,
+	.max_register = RT9120_REG_DIGCFG,
 
 	.reg_read = rt9120_reg_read,
 	.reg_write = rt9120_reg_write,
@@ -388,8 +418,16 @@
 	if (ret)
 		return ret;
 
-	if ((devid & RT9120_VID_MASK) != RT9120_VENDOR_ID) {
-		dev_err(data->dev, "DEVID not correct [0x%04x]\n", devid);
+	devid = FIELD_GET(RT9120_VID_MASK, devid);
+	switch (devid) {
+	case RT9120_VENDOR_ID:
+		data->chip_idx = CHIP_IDX_RT9120;
+		break;
+	case RT9120S_VENDOR_ID:
+		data->chip_idx = CHIP_IDX_RT9120S;
+		break;
+	default:
+		dev_err(data->dev, "DEVID not correct [0x%0x]\n", devid);
 		return -ENODEV;
 	}
 
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c
index c496b359f..4f568ab 100644
--- a/sound/soc/codecs/wcd934x.c
+++ b/sound/soc/codecs/wcd934x.c
@@ -1896,9 +1896,8 @@
 	}
 
 	wcd->dai[dai->id].sconfig.rate = params_rate(params);
-	wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream);
 
-	return 0;
+	return wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream);
 }
 
 static int wcd934x_hw_free(struct snd_pcm_substream *substream,
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index 52de7d1..67151c7 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -1174,6 +1174,9 @@
 	case WCD938X_DIGITAL_INTR_STATUS_0:
 	case WCD938X_DIGITAL_INTR_STATUS_1:
 	case WCD938X_DIGITAL_INTR_STATUS_2:
+	case WCD938X_DIGITAL_INTR_CLEAR_0:
+	case WCD938X_DIGITAL_INTR_CLEAR_1:
+	case WCD938X_DIGITAL_INTR_CLEAR_2:
 	case WCD938X_DIGITAL_SWR_HM_TEST_0:
 	case WCD938X_DIGITAL_SWR_HM_TEST_1:
 	case WCD938X_DIGITAL_EFUSE_T_DATA_0:
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index d4f0d72c..6cb01a8 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -617,8 +617,9 @@
 	switch (cs_dsp->fw_ver) {
 	case 0:
 	case 1:
-		snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s %x",
-			 cs_dsp->name, region_name, cs_ctl->alg_region.alg);
+		ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN,
+				"%s %s %x", cs_dsp->name, region_name,
+				cs_ctl->alg_region.alg);
 		break;
 	case 2:
 		ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN,
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index f104962..77219c3 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -248,6 +248,75 @@
 					SOF_BT_OFFLOAD_SSP(2) |
 					SOF_SSP_BT_OFFLOAD_PRESENT),
 	},
+	{
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0AF3"),
+		},
+		/* No Jack */
+		.driver_data = (void *)(SOF_SDW_TGL_HDMI |
+					SOF_SDW_FOUR_SPK),
+	},
+	{
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B00")
+		},
+		.driver_data = (void *)(SOF_SDW_TGL_HDMI |
+					RT711_JD2 |
+					SOF_SDW_FOUR_SPK),
+	},
+	{
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B01")
+		},
+		.driver_data = (void *)(SOF_SDW_TGL_HDMI |
+					RT711_JD2 |
+					SOF_SDW_FOUR_SPK),
+	},
+	{
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B11")
+		},
+		.driver_data = (void *)(SOF_SDW_TGL_HDMI |
+					RT711_JD2 |
+					SOF_SDW_FOUR_SPK),
+	},
+	{
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B12")
+		},
+		.driver_data = (void *)(SOF_SDW_TGL_HDMI |
+					RT711_JD2 |
+					SOF_SDW_FOUR_SPK),
+	},
+	{
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B13"),
+		},
+		/* No Jack */
+		.driver_data = (void *)SOF_SDW_TGL_HDMI,
+	},
+	{
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B29"),
+		},
+		.driver_data = (void *)(SOF_SDW_TGL_HDMI |
+					RT711_JD2 |
+					SOF_SDW_FOUR_SPK),
+	},
 	{}
 };
 
diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
index 06f5034..b61a778 100644
--- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
@@ -74,6 +74,15 @@
 	}
 };
 
+static const struct snd_soc_acpi_adr_device rt711_sdca_2_adr[] = {
+	{
+		.adr = 0x000230025D071101ull,
+		.num_endpoints = 1,
+		.endpoints = &single_endpoint,
+		.name_prefix = "rt711"
+	}
+};
+
 static const struct snd_soc_acpi_adr_device rt1316_1_group1_adr[] = {
 	{
 		.adr = 0x000131025D131601ull, /* unique ID is set for some reason */
@@ -101,6 +110,24 @@
 	}
 };
 
+static const struct snd_soc_acpi_adr_device rt1316_0_group2_adr[] = {
+	{
+		.adr = 0x000031025D131601ull,
+		.num_endpoints = 1,
+		.endpoints = &spk_l_endpoint,
+		.name_prefix = "rt1316-1"
+	}
+};
+
+static const struct snd_soc_acpi_adr_device rt1316_1_group2_adr[] = {
+	{
+		.adr = 0x000130025D131601ull,
+		.num_endpoints = 1,
+		.endpoints = &spk_r_endpoint,
+		.name_prefix = "rt1316-2"
+	}
+};
+
 static const struct snd_soc_acpi_adr_device rt1316_2_single_adr[] = {
 	{
 		.adr = 0x000230025D131601ull,
@@ -209,6 +236,63 @@
 	{}
 };
 
+static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link2_rt1316_link01_rt714_link3[] = {
+	{
+		.mask = BIT(2),
+		.num_adr = ARRAY_SIZE(rt711_sdca_2_adr),
+		.adr_d = rt711_sdca_2_adr,
+	},
+	{
+		.mask = BIT(0),
+		.num_adr = ARRAY_SIZE(rt1316_0_group2_adr),
+		.adr_d = rt1316_0_group2_adr,
+	},
+	{
+		.mask = BIT(1),
+		.num_adr = ARRAY_SIZE(rt1316_1_group2_adr),
+		.adr_d = rt1316_1_group2_adr,
+	},
+	{
+		.mask = BIT(3),
+		.num_adr = ARRAY_SIZE(rt714_3_adr),
+		.adr_d = rt714_3_adr,
+	},
+	{}
+};
+
+static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link12_rt714_link0[] = {
+	{
+		.mask = BIT(1),
+		.num_adr = ARRAY_SIZE(rt1316_1_group1_adr),
+		.adr_d = rt1316_1_group1_adr,
+	},
+	{
+		.mask = BIT(2),
+		.num_adr = ARRAY_SIZE(rt1316_2_group1_adr),
+		.adr_d = rt1316_2_group1_adr,
+	},
+	{
+		.mask = BIT(0),
+		.num_adr = ARRAY_SIZE(rt714_0_adr),
+		.adr_d = rt714_0_adr,
+	},
+	{}
+};
+
+static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link2_rt714_link3[] = {
+	{
+		.mask = BIT(2),
+		.num_adr = ARRAY_SIZE(rt1316_2_single_adr),
+		.adr_d = rt1316_2_single_adr,
+	},
+	{
+		.mask = BIT(3),
+		.num_adr = ARRAY_SIZE(rt714_3_adr),
+		.adr_d = rt714_3_adr,
+	},
+	{}
+};
+
 static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link2_rt714_link0[] = {
 	{
 		.mask = BIT(2),
@@ -340,6 +424,27 @@
 		.sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l13-rt714-l2.tplg",
 	},
 	{
+		.link_mask = 0xF, /* 4 active links required */
+		.links = adl_sdw_rt711_link2_rt1316_link01_rt714_link3,
+		.drv_name = "sof_sdw",
+		.sof_fw_filename = "sof-adl.ri",
+		.sof_tplg_filename = "sof-adl-rt711-l2-rt1316-l01-rt714-l3.tplg",
+	},
+	{
+		.link_mask = 0xC, /* rt1316 on link2 & rt714 on link3 */
+		.links = adl_sdw_rt1316_link2_rt714_link3,
+		.drv_name = "sof_sdw",
+		.sof_fw_filename = "sof-adl.ri",
+		.sof_tplg_filename = "sof-adl-rt1316-l2-mono-rt714-l3.tplg",
+	},
+	{
+		.link_mask = 0x7, /* rt714 on link0 & two rt1316s on link1 and link2 */
+		.links = adl_sdw_rt1316_link12_rt714_link0,
+		.drv_name = "sof_sdw",
+		.sof_fw_filename = "sof-adl.ri",
+		.sof_tplg_filename = "sof-adl-rt1316-l12-rt714-l0.tplg",
+	},
+	{
 		.link_mask = 0x5, /* 2 active links required */
 		.links = adl_sdw_rt1316_link2_rt714_link0,
 		.drv_name = "sof_sdw",
diff --git a/sound/soc/intel/common/soc-acpi-intel-cml-match.c b/sound/soc/intel/common/soc-acpi-intel-cml-match.c
index b4eb0c9..4eebc79 100644
--- a/sound/soc/intel/common/soc-acpi-intel-cml-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-cml-match.c
@@ -81,6 +81,12 @@
 		.sof_fw_filename = "sof-cml.ri",
 		.sof_tplg_filename = "sof-cml-da7219-max98390.tplg",
 	},
+	{
+		.id = "ESSX8336",
+		.drv_name = "sof-essx8336",
+		.sof_fw_filename = "sof-cml.ri",
+		.sof_tplg_filename = "sof-cml-es8336.tplg",
+	},
 	{},
 };
 EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_cml_machines);
diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
index 6350390..31494930 100644
--- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
+++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
@@ -1054,6 +1054,7 @@
 	int irq_id;
 	struct mtk_base_afe *afe;
 	struct mt8173_afe_private *afe_priv;
+	struct snd_soc_component *comp_pcm, *comp_hdmi;
 
 	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(33));
 	if (ret)
@@ -1142,23 +1143,55 @@
 	if (ret)
 		goto err_pm_disable;
 
-	ret = devm_snd_soc_register_component(&pdev->dev,
-					 &mt8173_afe_pcm_dai_component,
-					 mt8173_afe_pcm_dais,
-					 ARRAY_SIZE(mt8173_afe_pcm_dais));
+	comp_pcm = devm_kzalloc(&pdev->dev, sizeof(*comp_pcm), GFP_KERNEL);
+	if (!comp_pcm) {
+		ret = -ENOMEM;
+		goto err_pm_disable;
+	}
+
+	ret = snd_soc_component_initialize(comp_pcm,
+					   &mt8173_afe_pcm_dai_component,
+					   &pdev->dev);
 	if (ret)
 		goto err_pm_disable;
 
-	ret = devm_snd_soc_register_component(&pdev->dev,
-					 &mt8173_afe_hdmi_dai_component,
-					 mt8173_afe_hdmi_dais,
-					 ARRAY_SIZE(mt8173_afe_hdmi_dais));
+#ifdef CONFIG_DEBUG_FS
+	comp_pcm->debugfs_prefix = "pcm";
+#endif
+
+	ret = snd_soc_add_component(comp_pcm,
+				    mt8173_afe_pcm_dais,
+				    ARRAY_SIZE(mt8173_afe_pcm_dais));
 	if (ret)
 		goto err_pm_disable;
 
+	comp_hdmi = devm_kzalloc(&pdev->dev, sizeof(*comp_hdmi), GFP_KERNEL);
+	if (!comp_hdmi) {
+		ret = -ENOMEM;
+		goto err_pm_disable;
+	}
+
+	ret = snd_soc_component_initialize(comp_hdmi,
+					   &mt8173_afe_hdmi_dai_component,
+					   &pdev->dev);
+	if (ret)
+		goto err_pm_disable;
+
+#ifdef CONFIG_DEBUG_FS
+	comp_hdmi->debugfs_prefix = "hdmi";
+#endif
+
+	ret = snd_soc_add_component(comp_hdmi,
+				    mt8173_afe_hdmi_dais,
+				    ARRAY_SIZE(mt8173_afe_hdmi_dais));
+	if (ret)
+		goto err_cleanup_components;
+
 	dev_info(&pdev->dev, "MT8173 AFE driver initialized.\n");
 	return 0;
 
+err_cleanup_components:
+	snd_soc_unregister_component(&pdev->dev);
 err_pm_disable:
 	pm_runtime_disable(&pdev->dev);
 	return ret;
@@ -1166,6 +1199,8 @@
 
 static int mt8173_afe_pcm_dev_remove(struct platform_device *pdev)
 {
+	snd_soc_unregister_component(&pdev->dev);
+
 	pm_runtime_disable(&pdev->dev);
 	if (!pm_runtime_status_suspended(&pdev->dev))
 		mt8173_afe_runtime_suspend(&pdev->dev);
diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
index c28ebf8..2cbf679 100644
--- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c
+++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
@@ -30,15 +30,15 @@
 };
 
 static const struct snd_soc_dapm_widget mt8173_rt5650_widgets[] = {
-	SND_SOC_DAPM_SPK("Speaker", NULL),
+	SND_SOC_DAPM_SPK("Ext Spk", NULL),
 	SND_SOC_DAPM_MIC("Int Mic", NULL),
 	SND_SOC_DAPM_HP("Headphone", NULL),
 	SND_SOC_DAPM_MIC("Headset Mic", NULL),
 };
 
 static const struct snd_soc_dapm_route mt8173_rt5650_routes[] = {
-	{"Speaker", NULL, "SPOL"},
-	{"Speaker", NULL, "SPOR"},
+	{"Ext Spk", NULL, "SPOL"},
+	{"Ext Spk", NULL, "SPOR"},
 	{"DMIC L1", NULL, "Int Mic"},
 	{"DMIC R1", NULL, "Int Mic"},
 	{"Headphone", NULL, "HPOL"},
@@ -48,7 +48,7 @@
 };
 
 static const struct snd_kcontrol_new mt8173_rt5650_controls[] = {
-	SOC_DAPM_PIN_SWITCH("Speaker"),
+	SOC_DAPM_PIN_SWITCH("Ext Spk"),
 	SOC_DAPM_PIN_SWITCH("Int Mic"),
 	SOC_DAPM_PIN_SWITCH("Headphone"),
 	SOC_DAPM_PIN_SWITCH("Headset Mic"),
diff --git a/sound/soc/qcom/qdsp6/audioreach.h b/sound/soc/qcom/qdsp6/audioreach.h
index 4f693a26..3ee8bfc 100644
--- a/sound/soc/qcom/qdsp6/audioreach.h
+++ b/sound/soc/qcom/qdsp6/audioreach.h
@@ -550,6 +550,10 @@
 	uint32_t clock_root;
 } __packed;
 
+struct audio_hw_clk_rel_cfg {
+	uint32_t clock_id;
+} __packed;
+
 #define PARAM_ID_HW_EP_POWER_MODE_CFG	0x8001176
 #define AR_HW_EP_POWER_MODE_0	0 /* default */
 #define AR_HW_EP_POWER_MODE_1	1 /* XO Shutdown allowed */
diff --git a/sound/soc/qcom/qdsp6/q6adm.c b/sound/soc/qcom/qdsp6/q6adm.c
index 3d831b6..72c5719 100644
--- a/sound/soc/qcom/qdsp6/q6adm.c
+++ b/sound/soc/qcom/qdsp6/q6adm.c
@@ -390,7 +390,7 @@
 	int ret = 0;
 
 	if (port_id < 0) {
-		dev_err(dev, "Invalid port_id 0x%x\n", port_id);
+		dev_err(dev, "Invalid port_id %d\n", port_id);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -508,7 +508,7 @@
 		int port_idx = payload_map.port_id[i];
 
 		if (port_idx < 0) {
-			dev_err(dev, "Invalid port_id 0x%x\n",
+			dev_err(dev, "Invalid port_id %d\n",
 				payload_map.port_id[i]);
 			kfree(pkt);
 			return -EINVAL;
diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
index 46f3655..b74b677 100644
--- a/sound/soc/qcom/qdsp6/q6asm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
@@ -269,9 +269,7 @@
 
 	if (ret < 0) {
 		dev_err(dev, "%s: q6asm_open_write failed\n", __func__);
-		q6asm_audio_client_free(prtd->audio_client);
-		prtd->audio_client = NULL;
-		return -ENOMEM;
+		goto open_err;
 	}
 
 	prtd->session_id = q6asm_get_session_id(prtd->audio_client);
@@ -279,7 +277,7 @@
 			      prtd->session_id, substream->stream);
 	if (ret) {
 		dev_err(dev, "%s: stream reg failed ret:%d\n", __func__, ret);
-		return ret;
+		goto routing_err;
 	}
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
@@ -301,10 +299,19 @@
 	}
 	if (ret < 0)
 		dev_info(dev, "%s: CMD Format block failed\n", __func__);
+	else
+		prtd->state = Q6ASM_STREAM_RUNNING;
 
-	prtd->state = Q6ASM_STREAM_RUNNING;
+	return ret;
 
-	return 0;
+routing_err:
+	q6asm_cmd(prtd->audio_client, prtd->stream_id,  CMD_CLOSE);
+open_err:
+	q6asm_unmap_memory_regions(substream->stream, prtd->audio_client);
+	q6asm_audio_client_free(prtd->audio_client);
+	prtd->audio_client = NULL;
+
+	return ret;
 }
 
 static int q6asm_dai_trigger(struct snd_soc_component *component,
diff --git a/sound/soc/qcom/qdsp6/q6prm.c b/sound/soc/qcom/qdsp6/q6prm.c
index 82c40f2..cda33ded 100644
--- a/sound/soc/qcom/qdsp6/q6prm.c
+++ b/sound/soc/qcom/qdsp6/q6prm.c
@@ -42,6 +42,12 @@
 	struct audio_hw_clk_cfg clock_id;
 } __packed;
 
+struct prm_cmd_release_rsc {
+	struct apm_module_param_data param_data;
+	uint32_t num_clk_id;
+	struct audio_hw_clk_rel_cfg clock_id;
+} __packed;
+
 static int q6prm_send_cmd_sync(struct q6prm *prm, struct gpr_pkt *pkt, uint32_t rsp_opcode)
 {
 	return audioreach_send_cmd_sync(prm->dev, prm->gdev, &prm->result, &prm->lock,
@@ -102,8 +108,8 @@
 }
 EXPORT_SYMBOL_GPL(q6prm_unvote_lpass_core_hw);
 
-int q6prm_set_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
-			  unsigned int freq)
+static int q6prm_request_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
+				     unsigned int freq)
 {
 	struct q6prm *prm = dev_get_drvdata(dev->parent);
 	struct apm_module_param_data *param_data;
@@ -138,6 +144,49 @@
 
 	return rc;
 }
+
+static int q6prm_release_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
+			  unsigned int freq)
+{
+	struct q6prm *prm = dev_get_drvdata(dev->parent);
+	struct apm_module_param_data *param_data;
+	struct prm_cmd_release_rsc *rel;
+	gpr_device_t *gdev = prm->gdev;
+	struct gpr_pkt *pkt;
+	int rc;
+
+	pkt = audioreach_alloc_cmd_pkt(sizeof(*rel), PRM_CMD_RELEASE_HW_RSC, 0, gdev->svc.id,
+				       GPR_PRM_MODULE_IID);
+	if (IS_ERR(pkt))
+		return PTR_ERR(pkt);
+
+	rel = (void *)pkt + GPR_HDR_SIZE + APM_CMD_HDR_SIZE;
+
+	param_data = &rel->param_data;
+
+	param_data->module_instance_id = GPR_PRM_MODULE_IID;
+	param_data->error_code = 0;
+	param_data->param_id = PARAM_ID_RSC_AUDIO_HW_CLK;
+	param_data->param_size = sizeof(*rel) - APM_MODULE_PARAM_DATA_SIZE;
+
+	rel->num_clk_id = 1;
+	rel->clock_id.clock_id = clk_id;
+
+	rc = q6prm_send_cmd_sync(prm, pkt, PRM_CMD_RSP_RELEASE_HW_RSC);
+
+	kfree(pkt);
+
+	return rc;
+}
+
+int q6prm_set_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
+			  unsigned int freq)
+{
+	if (freq)
+		return q6prm_request_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq);
+
+	return q6prm_release_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq);
+}
 EXPORT_SYMBOL_GPL(q6prm_set_lpass_clock);
 
 static int prm_callback(struct gpr_resp_pkt *data, void *priv, int op)
diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c
index 3390ebe..cd74681 100644
--- a/sound/soc/qcom/qdsp6/q6routing.c
+++ b/sound/soc/qcom/qdsp6/q6routing.c
@@ -372,6 +372,12 @@
 	}
 
 	session = &routing_data->sessions[stream_id - 1];
+	if (session->port_id < 0) {
+		dev_err(routing_data->dev, "Routing not setup for MultiMedia%d Session\n",
+			session->fedai_id);
+		return -EINVAL;
+	}
+
 	pdata = &routing_data->port_data[session->port_id];
 
 	mutex_lock(&routing_data->lock);
@@ -495,7 +501,11 @@
 		session->port_id = be_id;
 		snd_soc_dapm_mixer_update_power(dapm, kcontrol, 1, update);
 	} else {
-		session->port_id = -1;
+		if (session->port_id == be_id) {
+			session->port_id = -1;
+			return 0;
+		}
+
 		snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update);
 	}
 
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index 16c6e02..03e0d4e 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -102,7 +102,7 @@
 	struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
 
 	if (dmaen->chan)
-		dmaengine_terminate_sync(dmaen->chan);
+		dmaengine_terminate_async(dmaen->chan);
 
 	return 0;
 }
diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c
index 2ae99b4..cbd7ea4 100644
--- a/sound/soc/soc-acpi.c
+++ b/sound/soc/soc-acpi.c
@@ -20,8 +20,10 @@
 
 	if (comp_ids) {
 		for (i = 0; i < comp_ids->num_codecs; i++) {
-			if (acpi_dev_present(comp_ids->codecs[i], NULL, -1))
+			if (acpi_dev_present(comp_ids->codecs[i], NULL, -1)) {
+				strscpy(machine->id, comp_ids->codecs[i], ACPI_ID_LEN);
 				return true;
+			}
 		}
 	}
 
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 2892b0a..b06c5682 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2559,8 +2559,13 @@
 	return NULL;
 }
 
-static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
-				const char *pin, int status)
+/*
+ * set the DAPM pin status:
+ * returns 1 when the value has been updated, 0 when unchanged, or a negative
+ * error code; called from kcontrol put callback
+ */
+static int __snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
+				  const char *pin, int status)
 {
 	struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true);
 	int ret = 0;
@@ -2586,6 +2591,18 @@
 	return ret;
 }
 
+/*
+ * similar as __snd_soc_dapm_set_pin(), but returns 0 when successful;
+ * called from several API functions below
+ */
+static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
+				const char *pin, int status)
+{
+	int ret = __snd_soc_dapm_set_pin(dapm, pin, status);
+
+	return ret < 0 ? ret : 0;
+}
+
 /**
  * snd_soc_dapm_sync_unlocked - scan and power dapm paths
  * @dapm: DAPM context
@@ -3589,10 +3606,10 @@
 	const char *pin = (const char *)kcontrol->private_value;
 	int ret;
 
-	if (ucontrol->value.integer.value[0])
-		ret = snd_soc_dapm_enable_pin(&card->dapm, pin);
-	else
-		ret = snd_soc_dapm_disable_pin(&card->dapm, pin);
+	mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
+	ret = __snd_soc_dapm_set_pin(&card->dapm, pin,
+				     !!ucontrol->value.integer.value[0]);
+	mutex_unlock(&card->dapm_mutex);
 
 	snd_soc_dapm_sync(&card->dapm);
 	return ret;
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 557e22c..f5b9e66 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -2700,6 +2700,7 @@
 /* remove dynamic controls from the component driver */
 int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
 {
+	struct snd_card *card = comp->card->snd_card;
 	struct snd_soc_dobj *dobj, *next_dobj;
 	int pass = SOC_TPLG_PASS_END;
 
@@ -2707,6 +2708,7 @@
 	while (pass >= SOC_TPLG_PASS_START) {
 
 		/* remove mixer controls */
+		down_write(&card->controls_rwsem);
 		list_for_each_entry_safe(dobj, next_dobj, &comp->dobj_list,
 			list) {
 
@@ -2745,6 +2747,7 @@
 				break;
 			}
 		}
+		up_write(&card->controls_rwsem);
 		pass--;
 	}
 
diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig
index 6bb4db8..041c546 100644
--- a/sound/soc/sof/Kconfig
+++ b/sound/soc/sof/Kconfig
@@ -47,7 +47,7 @@
 	  Say Y if you need this option. If unsure select "N".
 
 config SND_SOC_SOF_COMPRESS
-	tristate
+	bool
 	select SND_SOC_COMPRESS
 
 config SND_SOC_SOF_DEBUG_PROBES
diff --git a/sound/soc/sof/control.c b/sound/soc/sof/control.c
index 58bb89a..bb1dfe4 100644
--- a/sound/soc/sof/control.c
+++ b/sound/soc/sof/control.c
@@ -69,7 +69,7 @@
 {
 	struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
 	struct snd_soc_component *scomp = scontrol->scomp;
-	enum sof_ipc_ctrl_type ctrl_type;
+	u32 ipc_cmd;
 	int ret;
 
 	if (!scontrol->comp_data_dirty)
@@ -79,9 +79,9 @@
 		return;
 
 	if (scontrol->cmd == SOF_CTRL_CMD_BINARY)
-		ctrl_type = SOF_IPC_COMP_GET_DATA;
+		ipc_cmd = SOF_IPC_COMP_GET_DATA;
 	else
-		ctrl_type = SOF_IPC_COMP_GET_VALUE;
+		ipc_cmd = SOF_IPC_COMP_GET_VALUE;
 
 	/* set the ABI header values */
 	cdata->data->magic = SOF_ABI_MAGIC;
@@ -89,7 +89,7 @@
 
 	/* refresh the component data from DSP */
 	scontrol->comp_data_dirty = false;
-	ret = snd_sof_ipc_set_get_comp_data(scontrol, ctrl_type,
+	ret = snd_sof_ipc_set_get_comp_data(scontrol, ipc_cmd,
 					    SOF_CTRL_TYPE_VALUE_CHAN_GET,
 					    scontrol->cmd, false);
 	if (ret < 0) {
diff --git a/sound/soc/sof/intel/hda-bus.c b/sound/soc/sof/intel/hda-bus.c
index 30025d3..0862ff8 100644
--- a/sound/soc/sof/intel/hda-bus.c
+++ b/sound/soc/sof/intel/hda-bus.c
@@ -10,6 +10,8 @@
 #include <linux/io.h>
 #include <sound/hdaudio.h>
 #include <sound/hda_i915.h>
+#include <sound/hda_codec.h>
+#include <sound/hda_register.h>
 #include "../sof-priv.h"
 #include "hda.h"
 
@@ -21,6 +23,18 @@
 #endif
 
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+static void update_codec_wake_enable(struct hdac_bus *bus, unsigned int addr, bool link_power)
+{
+	unsigned int mask = snd_hdac_chip_readw(bus, WAKEEN);
+
+	if (link_power)
+		mask &= ~BIT(addr);
+	else
+		mask |= BIT(addr);
+
+	snd_hdac_chip_updatew(bus, WAKEEN, STATESTS_INT_MASK, mask);
+}
+
 static void sof_hda_bus_link_power(struct hdac_device *codec, bool enable)
 {
 	struct hdac_bus *bus = codec->bus;
@@ -41,6 +55,9 @@
 	 */
 	if (codec->addr == HDA_IDISP_ADDR && !enable)
 		snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false);
+
+	/* WAKEEN needs to be set for disabled links */
+	update_codec_wake_enable(bus, codec->addr, enable);
 }
 
 static const struct hdac_bus_ops bus_core_ops = {
diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
index 058baca..287dc0e 100644
--- a/sound/soc/sof/intel/hda-dsp.c
+++ b/sound/soc/sof/intel/hda-dsp.c
@@ -622,8 +622,7 @@
 	hda_dsp_ipc_int_disable(sdev);
 
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
-	if (runtime_suspend)
-		hda_codec_jack_wake_enable(sdev, true);
+	hda_codec_jack_wake_enable(sdev, runtime_suspend);
 
 	/* power down all hda link */
 	snd_hdac_ext_bus_link_power_down_all(bus);
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 883d78d..2c0d4d0 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -58,6 +58,13 @@
 		return -EINVAL;
 	}
 
+	/* DAI already configured, reset it before reconfiguring it */
+	if (sof_dai->configured) {
+		ret = hda_ctrl_dai_widget_free(w);
+		if (ret < 0)
+			return ret;
+	}
+
 	config = &sof_dai->dai_config[sof_dai->current_config];
 
 	/*
@@ -810,6 +817,20 @@
 	return 0;
 }
 
+static void hda_check_for_state_change(struct snd_sof_dev *sdev)
+{
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+	struct hdac_bus *bus = sof_to_bus(sdev);
+	unsigned int codec_mask;
+
+	codec_mask = snd_hdac_chip_readw(bus, STATESTS);
+	if (codec_mask) {
+		hda_codec_jack_check(sdev);
+		snd_hdac_chip_writew(bus, STATESTS, codec_mask);
+	}
+#endif
+}
+
 static irqreturn_t hda_dsp_interrupt_handler(int irq, void *context)
 {
 	struct snd_sof_dev *sdev = context;
@@ -851,6 +872,8 @@
 	if (hda_sdw_check_wakeen_irq(sdev))
 		hda_sdw_process_wakeen(sdev);
 
+	hda_check_for_state_change(sdev);
+
 	/* enable GIE interrupt */
 	snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
 				SOF_HDA_INTCTL,
diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c
index 6254bac..717f45a 100644
--- a/sound/soc/stm/stm32_i2s.c
+++ b/sound/soc/stm/stm32_i2s.c
@@ -700,7 +700,7 @@
 		if (ret < 0)
 			return ret;
 
-		nb_bits = frame_len * ((cgfr & I2S_CGFR_CHLEN) + 1);
+		nb_bits = frame_len * (FIELD_GET(I2S_CGFR_CHLEN, cgfr) + 1);
 		ret = stm32_i2s_calc_clk_div(i2s, i2s_clock_rate,
 					     (nb_bits * rate));
 		if (ret)
diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c
index 8ee9a77..a74c980 100644
--- a/sound/soc/tegra/tegra186_dspk.c
+++ b/sound/soc/tegra/tegra186_dspk.c
@@ -26,51 +26,162 @@
 	{ TEGRA186_DSPK_CODEC_CTRL,  0x03000000 },
 };
 
-static int tegra186_dspk_get_control(struct snd_kcontrol *kcontrol,
+static int tegra186_dspk_get_fifo_th(struct snd_kcontrol *kcontrol,
 				     struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
 	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
 
-	if (strstr(kcontrol->id.name, "FIFO Threshold"))
-		ucontrol->value.integer.value[0] = dspk->rx_fifo_th;
-	else if (strstr(kcontrol->id.name, "OSR Value"))
-		ucontrol->value.integer.value[0] = dspk->osr_val;
-	else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-		ucontrol->value.integer.value[0] = dspk->lrsel;
-	else if (strstr(kcontrol->id.name, "Channel Select"))
-		ucontrol->value.integer.value[0] = dspk->ch_sel;
-	else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-		ucontrol->value.integer.value[0] = dspk->mono_to_stereo;
-	else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-		ucontrol->value.integer.value[0] = dspk->stereo_to_mono;
+	ucontrol->value.integer.value[0] = dspk->rx_fifo_th;
 
 	return 0;
 }
 
-static int tegra186_dspk_put_control(struct snd_kcontrol *kcontrol,
+static int tegra186_dspk_put_fifo_th(struct snd_kcontrol *kcontrol,
 				     struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
 	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
-	int val = ucontrol->value.integer.value[0];
+	int value = ucontrol->value.integer.value[0];
 
-	if (strstr(kcontrol->id.name, "FIFO Threshold"))
-		dspk->rx_fifo_th = val;
-	else if (strstr(kcontrol->id.name, "OSR Value"))
-		dspk->osr_val = val;
-	else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-		dspk->lrsel = val;
-	else if (strstr(kcontrol->id.name, "Channel Select"))
-		dspk->ch_sel = val;
-	else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-		dspk->mono_to_stereo = val;
-	else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-		dspk->stereo_to_mono = val;
+	if (value == dspk->rx_fifo_th)
+		return 0;
+
+	dspk->rx_fifo_th = value;
+
+	return 1;
+}
+
+static int tegra186_dspk_get_osr_val(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = dspk->osr_val;
 
 	return 0;
 }
 
+static int tegra186_dspk_put_osr_val(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dspk->osr_val)
+		return 0;
+
+	dspk->osr_val = value;
+
+	return 1;
+}
+
+static int tegra186_dspk_get_pol_sel(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = dspk->lrsel;
+
+	return 0;
+}
+
+static int tegra186_dspk_put_pol_sel(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dspk->lrsel)
+		return 0;
+
+	dspk->lrsel = value;
+
+	return 1;
+}
+
+static int tegra186_dspk_get_ch_sel(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = dspk->ch_sel;
+
+	return 0;
+}
+
+static int tegra186_dspk_put_ch_sel(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dspk->ch_sel)
+		return 0;
+
+	dspk->ch_sel = value;
+
+	return 1;
+}
+
+static int tegra186_dspk_get_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = dspk->mono_to_stereo;
+
+	return 0;
+}
+
+static int tegra186_dspk_put_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dspk->mono_to_stereo)
+		return 0;
+
+	dspk->mono_to_stereo = value;
+
+	return 1;
+}
+
+static int tegra186_dspk_get_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+	ucontrol->value.enumerated.item[0] = dspk->stereo_to_mono;
+
+	return 0;
+}
+
+static int tegra186_dspk_put_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+	struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dspk->stereo_to_mono)
+		return 0;
+
+	dspk->stereo_to_mono = value;
+
+	return 1;
+}
+
 static int __maybe_unused tegra186_dspk_runtime_suspend(struct device *dev)
 {
 	struct tegra186_dspk *dspk = dev_get_drvdata(dev);
@@ -279,17 +390,19 @@
 static const struct snd_kcontrol_new tegrat186_dspk_controls[] = {
 	SOC_SINGLE_EXT("FIFO Threshold", SND_SOC_NOPM, 0,
 		       TEGRA186_DSPK_RX_FIFO_DEPTH - 1, 0,
-		       tegra186_dspk_get_control, tegra186_dspk_put_control),
+		       tegra186_dspk_get_fifo_th, tegra186_dspk_put_fifo_th),
 	SOC_ENUM_EXT("OSR Value", tegra186_dspk_osr_enum,
-		     tegra186_dspk_get_control, tegra186_dspk_put_control),
+		     tegra186_dspk_get_osr_val, tegra186_dspk_put_osr_val),
 	SOC_ENUM_EXT("LR Polarity Select", tegra186_dspk_lrsel_enum,
-		     tegra186_dspk_get_control, tegra186_dspk_put_control),
+		     tegra186_dspk_get_pol_sel, tegra186_dspk_put_pol_sel),
 	SOC_ENUM_EXT("Channel Select", tegra186_dspk_ch_sel_enum,
-		     tegra186_dspk_get_control, tegra186_dspk_put_control),
+		     tegra186_dspk_get_ch_sel, tegra186_dspk_put_ch_sel),
 	SOC_ENUM_EXT("Mono To Stereo", tegra186_dspk_mono_conv_enum,
-		     tegra186_dspk_get_control, tegra186_dspk_put_control),
+		     tegra186_dspk_get_mono_to_stereo,
+		     tegra186_dspk_put_mono_to_stereo),
 	SOC_ENUM_EXT("Stereo To Mono", tegra186_dspk_stereo_conv_enum,
-		     tegra186_dspk_get_control, tegra186_dspk_put_control),
+		     tegra186_dspk_get_stereo_to_mono,
+		     tegra186_dspk_put_stereo_to_mono),
 };
 
 static const struct snd_soc_component_driver tegra186_dspk_cmpnt = {
diff --git a/sound/soc/tegra/tegra210_admaif.c b/sound/soc/tegra/tegra210_admaif.c
index bcccdf3..1a2e868 100644
--- a/sound/soc/tegra/tegra210_admaif.c
+++ b/sound/soc/tegra/tegra210_admaif.c
@@ -424,46 +424,122 @@
 	.trigger	= tegra_admaif_trigger,
 };
 
-static int tegra_admaif_get_control(struct snd_kcontrol *kcontrol,
-				    struct snd_ctl_elem_value *ucontrol)
+static int tegra210_admaif_pget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
-	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
 	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
-	long *uctl_val = &ucontrol->value.integer.value[0];
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
 
-	if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
-		*uctl_val = admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg];
-	else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
-		*uctl_val = admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg];
-	else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
-		*uctl_val = admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg];
-	else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
-		*uctl_val = admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg];
+	ucontrol->value.enumerated.item[0] =
+		admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg];
 
 	return 0;
 }
 
-static int tegra_admaif_put_control(struct snd_kcontrol *kcontrol,
-				    struct snd_ctl_elem_value *ucontrol)
+static int tegra210_admaif_pput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
-	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
 	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
-	int value = ucontrol->value.integer.value[0];
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+	unsigned int value = ucontrol->value.enumerated.item[0];
 
-	if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
-		admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value;
-	else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
-		admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value;
-	else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
-		admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value;
-	else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
-		admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value;
+	if (value == admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg])
+		return 0;
+
+	admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value;
+
+	return 1;
+}
+
+static int tegra210_admaif_cget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+
+	ucontrol->value.enumerated.item[0] =
+		admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg];
 
 	return 0;
 }
 
+static int tegra210_admaif_cput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg])
+		return 0;
+
+	admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value;
+
+	return 1;
+}
+
+static int tegra210_admaif_pget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+
+	ucontrol->value.enumerated.item[0] =
+		admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg];
+
+	return 0;
+}
+
+static int tegra210_admaif_pput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg])
+		return 0;
+
+	admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value;
+
+	return 1;
+}
+
+static int tegra210_admaif_cget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+
+	ucontrol->value.enumerated.item[0] =
+		admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg];
+
+	return 0;
+}
+
+static int tegra210_admaif_cput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+	struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg])
+		return 0;
+
+	admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value;
+
+	return 1;
+}
+
 static int tegra_admaif_dai_probe(struct snd_soc_dai *dai)
 {
 	struct tegra_admaif *admaif = snd_soc_dai_get_drvdata(dai);
@@ -559,17 +635,21 @@
 }
 
 #define TEGRA_ADMAIF_CIF_CTRL(reg)					       \
-	NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1,\
-			tegra_admaif_get_control, tegra_admaif_put_control,    \
+	NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1,     \
+			tegra210_admaif_pget_mono_to_stereo,		       \
+			tegra210_admaif_pput_mono_to_stereo,		       \
 			tegra_admaif_mono_conv_text),			       \
-	NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1,\
-			tegra_admaif_get_control, tegra_admaif_put_control,    \
+	NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1,     \
+			tegra210_admaif_pget_stereo_to_mono,		       \
+			tegra210_admaif_pput_stereo_to_mono,		       \
 			tegra_admaif_stereo_conv_text),			       \
-	NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1, \
-			tegra_admaif_get_control, tegra_admaif_put_control,    \
+	NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1,      \
+			tegra210_admaif_cget_mono_to_stereo,		       \
+			tegra210_admaif_cput_mono_to_stereo,		       \
 			tegra_admaif_mono_conv_text),			       \
-	NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1, \
-			tegra_admaif_get_control, tegra_admaif_put_control,    \
+	NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1,      \
+			tegra210_admaif_cget_stereo_to_mono,		       \
+			tegra210_admaif_cput_stereo_to_mono,		       \
 			tegra_admaif_stereo_conv_text)
 
 static struct snd_kcontrol_new tegra210_admaif_controls[] = {
diff --git a/sound/soc/tegra/tegra210_adx.c b/sound/soc/tegra/tegra210_adx.c
index d7c7849..933c450 100644
--- a/sound/soc/tegra/tegra210_adx.c
+++ b/sound/soc/tegra/tegra210_adx.c
@@ -193,6 +193,9 @@
 	struct soc_mixer_control *mc =
 		(struct soc_mixer_control *)kcontrol->private_value;;
 
+	if (value == bytes_map[mc->reg])
+		return 0;
+
 	if (value >= 0 && value <= 255) {
 		/* update byte map and enable slot */
 		bytes_map[mc->reg] = value;
diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c
index a1989ea..388b815 100644
--- a/sound/soc/tegra/tegra210_ahub.c
+++ b/sound/soc/tegra/tegra210_ahub.c
@@ -62,6 +62,7 @@
 	unsigned int *item = uctl->value.enumerated.item;
 	unsigned int value = e->values[item[0]];
 	unsigned int i, bit_pos, reg_idx = 0, reg_val = 0;
+	int change = 0;
 
 	if (item[0] >= e->items)
 		return -EINVAL;
@@ -86,12 +87,14 @@
 
 		/* Update widget power if state has changed */
 		if (snd_soc_component_test_bits(cmpnt, update[i].reg,
-						update[i].mask, update[i].val))
-			snd_soc_dapm_mux_update_power(dapm, kctl, item[0], e,
-						      &update[i]);
+						update[i].mask,
+						update[i].val))
+			change |= snd_soc_dapm_mux_update_power(dapm, kctl,
+								item[0], e,
+								&update[i]);
 	}
 
-	return 0;
+	return change;
 }
 
 static struct snd_soc_dai_driver tegra210_ahub_dais[] = {
diff --git a/sound/soc/tegra/tegra210_amx.c b/sound/soc/tegra/tegra210_amx.c
index af9bddf..6895763 100644
--- a/sound/soc/tegra/tegra210_amx.c
+++ b/sound/soc/tegra/tegra210_amx.c
@@ -222,6 +222,9 @@
 	int reg = mc->reg;
 	int value = ucontrol->value.integer.value[0];
 
+	if (value == bytes_map[reg])
+		return 0;
+
 	if (value >= 0 && value <= 255) {
 		/* Update byte map and enable slot */
 		bytes_map[reg] = value;
diff --git a/sound/soc/tegra/tegra210_dmic.c b/sound/soc/tegra/tegra210_dmic.c
index b096478..db95794 100644
--- a/sound/soc/tegra/tegra210_dmic.c
+++ b/sound/soc/tegra/tegra210_dmic.c
@@ -156,51 +156,162 @@
 	return 0;
 }
 
-static int tegra210_dmic_get_control(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
+static int tegra210_dmic_get_boost_gain(struct snd_kcontrol *kcontrol,
+					struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
 
-	if (strstr(kcontrol->id.name, "Boost Gain Volume"))
-		ucontrol->value.integer.value[0] = dmic->boost_gain;
-	else if (strstr(kcontrol->id.name, "Channel Select"))
-		ucontrol->value.integer.value[0] = dmic->ch_select;
-	else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-		ucontrol->value.integer.value[0] = dmic->mono_to_stereo;
-	else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-		ucontrol->value.integer.value[0] = dmic->stereo_to_mono;
-	else if (strstr(kcontrol->id.name, "OSR Value"))
-		ucontrol->value.integer.value[0] = dmic->osr_val;
-	else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-		ucontrol->value.integer.value[0] = dmic->lrsel;
+	ucontrol->value.integer.value[0] = dmic->boost_gain;
 
 	return 0;
 }
 
-static int tegra210_dmic_put_control(struct snd_kcontrol *kcontrol,
-				     struct snd_ctl_elem_value *ucontrol)
+static int tegra210_dmic_put_boost_gain(struct snd_kcontrol *kcontrol,
+					struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
 	int value = ucontrol->value.integer.value[0];
 
-	if (strstr(kcontrol->id.name, "Boost Gain Volume"))
-		dmic->boost_gain = value;
-	else if (strstr(kcontrol->id.name, "Channel Select"))
-		dmic->ch_select = ucontrol->value.integer.value[0];
-	else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-		dmic->mono_to_stereo = value;
-	else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-		dmic->stereo_to_mono = value;
-	else if (strstr(kcontrol->id.name, "OSR Value"))
-		dmic->osr_val = value;
-	else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-		dmic->lrsel = value;
+	if (value == dmic->boost_gain)
+		return 0;
+
+	dmic->boost_gain = value;
+
+	return 1;
+}
+
+static int tegra210_dmic_get_ch_select(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+	ucontrol->value.enumerated.item[0] = dmic->ch_select;
 
 	return 0;
 }
 
+static int tegra210_dmic_put_ch_select(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dmic->ch_select)
+		return 0;
+
+	dmic->ch_select = value;
+
+	return 1;
+}
+
+static int tegra210_dmic_get_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+	ucontrol->value.enumerated.item[0] = dmic->mono_to_stereo;
+
+	return 0;
+}
+
+static int tegra210_dmic_put_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dmic->mono_to_stereo)
+		return 0;
+
+	dmic->mono_to_stereo = value;
+
+	return 1;
+}
+
+static int tegra210_dmic_get_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+	ucontrol->value.enumerated.item[0] = dmic->stereo_to_mono;
+
+	return 0;
+}
+
+static int tegra210_dmic_put_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dmic->stereo_to_mono)
+		return 0;
+
+	dmic->stereo_to_mono = value;
+
+	return 1;
+}
+
+static int tegra210_dmic_get_osr_val(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+	ucontrol->value.enumerated.item[0] = dmic->osr_val;
+
+	return 0;
+}
+
+static int tegra210_dmic_put_osr_val(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dmic->osr_val)
+		return 0;
+
+	dmic->osr_val = value;
+
+	return 1;
+}
+
+static int tegra210_dmic_get_pol_sel(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+	ucontrol->value.enumerated.item[0] = dmic->lrsel;
+
+	return 0;
+}
+
+static int tegra210_dmic_put_pol_sel(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == dmic->lrsel)
+		return 0;
+
+	dmic->lrsel = value;
+
+	return 1;
+}
+
 static const struct snd_soc_dai_ops tegra210_dmic_dai_ops = {
 	.hw_params	= tegra210_dmic_hw_params,
 };
@@ -287,19 +398,22 @@
 
 static const struct snd_kcontrol_new tegra210_dmic_controls[] = {
 	SOC_SINGLE_EXT("Boost Gain Volume", 0, 0, MAX_BOOST_GAIN, 0,
-		       tegra210_dmic_get_control, tegra210_dmic_put_control),
+		       tegra210_dmic_get_boost_gain,
+		       tegra210_dmic_put_boost_gain),
 	SOC_ENUM_EXT("Channel Select", tegra210_dmic_ch_enum,
-		     tegra210_dmic_get_control, tegra210_dmic_put_control),
+		     tegra210_dmic_get_ch_select, tegra210_dmic_put_ch_select),
 	SOC_ENUM_EXT("Mono To Stereo",
-		     tegra210_dmic_mono_conv_enum, tegra210_dmic_get_control,
-		     tegra210_dmic_put_control),
+		     tegra210_dmic_mono_conv_enum,
+		     tegra210_dmic_get_mono_to_stereo,
+		     tegra210_dmic_put_mono_to_stereo),
 	SOC_ENUM_EXT("Stereo To Mono",
-		     tegra210_dmic_stereo_conv_enum, tegra210_dmic_get_control,
-		     tegra210_dmic_put_control),
+		     tegra210_dmic_stereo_conv_enum,
+		     tegra210_dmic_get_stereo_to_mono,
+		     tegra210_dmic_put_stereo_to_mono),
 	SOC_ENUM_EXT("OSR Value", tegra210_dmic_osr_enum,
-		     tegra210_dmic_get_control, tegra210_dmic_put_control),
+		     tegra210_dmic_get_osr_val, tegra210_dmic_put_osr_val),
 	SOC_ENUM_EXT("LR Polarity Select", tegra210_dmic_lrsel_enum,
-		     tegra210_dmic_get_control, tegra210_dmic_put_control),
+		     tegra210_dmic_get_pol_sel, tegra210_dmic_put_pol_sel),
 };
 
 static const struct snd_soc_component_driver tegra210_dmic_compnt = {
diff --git a/sound/soc/tegra/tegra210_i2s.c b/sound/soc/tegra/tegra210_i2s.c
index 45f31cc..9552bbb 100644
--- a/sound/soc/tegra/tegra210_i2s.c
+++ b/sound/soc/tegra/tegra210_i2s.c
@@ -302,6 +302,229 @@
 	return 0;
 }
 
+static int tegra210_i2s_get_loopback(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.integer.value[0] = i2s->loopback;
+
+	return 0;
+}
+
+static int tegra210_i2s_put_loopback(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	int value = ucontrol->value.integer.value[0];
+
+	if (value == i2s->loopback)
+		return 0;
+
+	i2s->loopback = value;
+
+	regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, I2S_CTRL_LPBK_MASK,
+			   i2s->loopback << I2S_CTRL_LPBK_SHIFT);
+
+	return 1;
+}
+
+static int tegra210_i2s_get_fsync_width(struct snd_kcontrol *kcontrol,
+					struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.integer.value[0] = i2s->fsync_width;
+
+	return 0;
+}
+
+static int tegra210_i2s_put_fsync_width(struct snd_kcontrol *kcontrol,
+					struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	int value = ucontrol->value.integer.value[0];
+
+	if (value == i2s->fsync_width)
+		return 0;
+
+	i2s->fsync_width = value;
+
+	/*
+	 * Frame sync width is used only for FSYNC modes and not
+	 * applicable for LRCK modes. Reset value for this field is "0",
+	 * which means the width is one bit clock wide.
+	 * The width requirement may depend on the codec and in such
+	 * cases mixer control is used to update custom values. A value
+	 * of "N" here means, width is "N + 1" bit clock wide.
+	 */
+	regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
+			   I2S_CTRL_FSYNC_WIDTH_MASK,
+			   i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT);
+
+	return 1;
+}
+
+static int tegra210_i2s_cget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_TX_PATH];
+
+	return 0;
+}
+
+static int tegra210_i2s_cput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == i2s->stereo_to_mono[I2S_TX_PATH])
+		return 0;
+
+	i2s->stereo_to_mono[I2S_TX_PATH] = value;
+
+	return 1;
+}
+
+static int tegra210_i2s_cget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_TX_PATH];
+
+	return 0;
+}
+
+static int tegra210_i2s_cput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == i2s->mono_to_stereo[I2S_TX_PATH])
+		return 0;
+
+	i2s->mono_to_stereo[I2S_TX_PATH] = value;
+
+	return 1;
+}
+
+static int tegra210_i2s_pget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_RX_PATH];
+
+	return 0;
+}
+
+static int tegra210_i2s_pput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == i2s->stereo_to_mono[I2S_RX_PATH])
+		return 0;
+
+	i2s->stereo_to_mono[I2S_RX_PATH] = value;
+
+	return 1;
+}
+
+static int tegra210_i2s_pget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_RX_PATH];
+
+	return 0;
+}
+
+static int tegra210_i2s_pput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+					    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == i2s->mono_to_stereo[I2S_RX_PATH])
+		return 0;
+
+	i2s->mono_to_stereo[I2S_RX_PATH] = value;
+
+	return 1;
+}
+
+static int tegra210_i2s_pget_fifo_th(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.integer.value[0] = i2s->rx_fifo_th;
+
+	return 0;
+}
+
+static int tegra210_i2s_pput_fifo_th(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	int value = ucontrol->value.integer.value[0];
+
+	if (value == i2s->rx_fifo_th)
+		return 0;
+
+	i2s->rx_fifo_th = value;
+
+	return 1;
+}
+
+static int tegra210_i2s_get_bclk_ratio(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+	ucontrol->value.integer.value[0] = i2s->bclk_ratio;
+
+	return 0;
+}
+
+static int tegra210_i2s_put_bclk_ratio(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+	int value = ucontrol->value.integer.value[0];
+
+	if (value == i2s->bclk_ratio)
+		return 0;
+
+	i2s->bclk_ratio = value;
+
+	return 1;
+}
+
 static int tegra210_i2s_set_dai_bclk_ratio(struct snd_soc_dai *dai,
 					   unsigned int ratio)
 {
@@ -312,79 +535,6 @@
 	return 0;
 }
 
-static int tegra210_i2s_get_control(struct snd_kcontrol *kcontrol,
-				    struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
-	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
-	long *uctl_val = &ucontrol->value.integer.value[0];
-
-	if (strstr(kcontrol->id.name, "Loopback"))
-		*uctl_val = i2s->loopback;
-	else if (strstr(kcontrol->id.name, "FSYNC Width"))
-		*uctl_val = i2s->fsync_width;
-	else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
-		*uctl_val = i2s->stereo_to_mono[I2S_TX_PATH];
-	else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
-		*uctl_val = i2s->mono_to_stereo[I2S_TX_PATH];
-	else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
-		*uctl_val = i2s->stereo_to_mono[I2S_RX_PATH];
-	else if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
-		*uctl_val = i2s->mono_to_stereo[I2S_RX_PATH];
-	else if (strstr(kcontrol->id.name, "Playback FIFO Threshold"))
-		*uctl_val = i2s->rx_fifo_th;
-	else if (strstr(kcontrol->id.name, "BCLK Ratio"))
-		*uctl_val = i2s->bclk_ratio;
-
-	return 0;
-}
-
-static int tegra210_i2s_put_control(struct snd_kcontrol *kcontrol,
-				    struct snd_ctl_elem_value *ucontrol)
-{
-	struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
-	struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
-	int value = ucontrol->value.integer.value[0];
-
-	if (strstr(kcontrol->id.name, "Loopback")) {
-		i2s->loopback = value;
-
-		regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
-				   I2S_CTRL_LPBK_MASK,
-				   i2s->loopback << I2S_CTRL_LPBK_SHIFT);
-
-	} else if (strstr(kcontrol->id.name, "FSYNC Width")) {
-		/*
-		 * Frame sync width is used only for FSYNC modes and not
-		 * applicable for LRCK modes. Reset value for this field is "0",
-		 * which means the width is one bit clock wide.
-		 * The width requirement may depend on the codec and in such
-		 * cases mixer control is used to update custom values. A value
-		 * of "N" here means, width is "N + 1" bit clock wide.
-		 */
-		i2s->fsync_width = value;
-
-		regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
-				   I2S_CTRL_FSYNC_WIDTH_MASK,
-				   i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT);
-
-	} else if (strstr(kcontrol->id.name, "Capture Stereo To Mono")) {
-		i2s->stereo_to_mono[I2S_TX_PATH] = value;
-	} else if (strstr(kcontrol->id.name, "Capture Mono To Stereo")) {
-		i2s->mono_to_stereo[I2S_TX_PATH] = value;
-	} else if (strstr(kcontrol->id.name, "Playback Stereo To Mono")) {
-		i2s->stereo_to_mono[I2S_RX_PATH] = value;
-	} else if (strstr(kcontrol->id.name, "Playback Mono To Stereo")) {
-		i2s->mono_to_stereo[I2S_RX_PATH] = value;
-	} else if (strstr(kcontrol->id.name, "Playback FIFO Threshold")) {
-		i2s->rx_fifo_th = value;
-	} else if (strstr(kcontrol->id.name, "BCLK Ratio")) {
-		i2s->bclk_ratio = value;
-	}
-
-	return 0;
-}
-
 static int tegra210_i2s_set_timing_params(struct device *dev,
 					  unsigned int sample_size,
 					  unsigned int srate,
@@ -598,22 +748,28 @@
 			tegra210_i2s_stereo_conv_text);
 
 static const struct snd_kcontrol_new tegra210_i2s_controls[] = {
-	SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_control,
-		       tegra210_i2s_put_control),
-	SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0, tegra210_i2s_get_control,
-		       tegra210_i2s_put_control),
+	SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_loopback,
+		       tegra210_i2s_put_loopback),
+	SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0,
+		       tegra210_i2s_get_fsync_width,
+		       tegra210_i2s_put_fsync_width),
 	SOC_ENUM_EXT("Capture Stereo To Mono", tegra210_i2s_stereo_conv_enum,
-		     tegra210_i2s_get_control, tegra210_i2s_put_control),
+		     tegra210_i2s_cget_stereo_to_mono,
+		     tegra210_i2s_cput_stereo_to_mono),
 	SOC_ENUM_EXT("Capture Mono To Stereo", tegra210_i2s_mono_conv_enum,
-		     tegra210_i2s_get_control, tegra210_i2s_put_control),
+		     tegra210_i2s_cget_mono_to_stereo,
+		     tegra210_i2s_cput_mono_to_stereo),
 	SOC_ENUM_EXT("Playback Stereo To Mono", tegra210_i2s_stereo_conv_enum,
-		     tegra210_i2s_get_control, tegra210_i2s_put_control),
+		     tegra210_i2s_pget_mono_to_stereo,
+		     tegra210_i2s_pput_mono_to_stereo),
 	SOC_ENUM_EXT("Playback Mono To Stereo", tegra210_i2s_mono_conv_enum,
-		     tegra210_i2s_get_control, tegra210_i2s_put_control),
+		     tegra210_i2s_pget_stereo_to_mono,
+		     tegra210_i2s_pput_stereo_to_mono),
 	SOC_SINGLE_EXT("Playback FIFO Threshold", 0, 0, I2S_RX_FIFO_DEPTH - 1,
-		       0, tegra210_i2s_get_control, tegra210_i2s_put_control),
-	SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0, tegra210_i2s_get_control,
-		       tegra210_i2s_put_control),
+		       0, tegra210_i2s_pget_fifo_th, tegra210_i2s_pput_fifo_th),
+	SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0,
+		       tegra210_i2s_get_bclk_ratio,
+		       tegra210_i2s_put_bclk_ratio),
 };
 
 static const struct snd_soc_dapm_widget tegra210_i2s_widgets[] = {
diff --git a/sound/soc/tegra/tegra210_mixer.c b/sound/soc/tegra/tegra210_mixer.c
index 55e6177..51d3755 100644
--- a/sound/soc/tegra/tegra210_mixer.c
+++ b/sound/soc/tegra/tegra210_mixer.c
@@ -192,24 +192,24 @@
 	return 0;
 }
 
-static int tegra210_mixer_put_gain(struct snd_kcontrol *kcontrol,
-				   struct snd_ctl_elem_value *ucontrol)
+static int tegra210_mixer_apply_gain(struct snd_kcontrol *kcontrol,
+				     struct snd_ctl_elem_value *ucontrol,
+				     bool instant_gain)
 {
 	struct soc_mixer_control *mc =
 		(struct soc_mixer_control *)kcontrol->private_value;
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_mixer *mixer = snd_soc_component_get_drvdata(cmpnt);
 	unsigned int reg = mc->reg, id;
-	bool instant_gain = false;
 	int err;
 
-	if (strstr(kcontrol->id.name, "Instant Gain Volume"))
-		instant_gain = true;
-
 	/* Save gain value for specific MIXER input */
 	id = (reg - TEGRA210_MIXER_GAIN_CFG_RAM_ADDR_0) /
 	     TEGRA210_MIXER_GAIN_CFG_RAM_ADDR_STRIDE;
 
+	if (mixer->gain_value[id] == ucontrol->value.integer.value[0])
+		return 0;
+
 	mixer->gain_value[id] = ucontrol->value.integer.value[0];
 
 	err = tegra210_mixer_configure_gain(cmpnt, id, instant_gain);
@@ -221,6 +221,18 @@
 	return 1;
 }
 
+static int tegra210_mixer_put_gain(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	return tegra210_mixer_apply_gain(kcontrol, ucontrol, false);
+}
+
+static int tegra210_mixer_put_instant_gain(struct snd_kcontrol *kcontrol,
+					   struct snd_ctl_elem_value *ucontrol)
+{
+	return tegra210_mixer_apply_gain(kcontrol, ucontrol, true);
+}
+
 static int tegra210_mixer_set_audio_cif(struct tegra210_mixer *mixer,
 					struct snd_pcm_hw_params *params,
 					unsigned int reg,
@@ -388,7 +400,7 @@
 	SOC_SINGLE_EXT("RX" #id " Instant Gain Volume",		\
 		       MIXER_GAIN_CFG_RAM_ADDR((id) - 1), 0,	\
 		       0x20000, 0, tegra210_mixer_get_gain,	\
-		       tegra210_mixer_put_gain),
+		       tegra210_mixer_put_instant_gain),
 
 /* Volume controls for all MIXER inputs */
 static const struct snd_kcontrol_new tegra210_mixer_gain_ctls[] = {
diff --git a/sound/soc/tegra/tegra210_mvc.c b/sound/soc/tegra/tegra210_mvc.c
index 7b9c700..85b1558 100644
--- a/sound/soc/tegra/tegra210_mvc.c
+++ b/sound/soc/tegra/tegra210_mvc.c
@@ -136,7 +136,7 @@
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_mvc *mvc = snd_soc_component_get_drvdata(cmpnt);
 	unsigned int value;
-	u8 mute_mask;
+	u8 new_mask, old_mask;
 	int err;
 
 	pm_runtime_get_sync(cmpnt->dev);
@@ -148,11 +148,19 @@
 	if (err < 0)
 		goto end;
 
-	mute_mask = ucontrol->value.integer.value[0];
+	regmap_read(mvc->regmap, TEGRA210_MVC_CTRL, &value);
+
+	old_mask = (value >> TEGRA210_MVC_MUTE_SHIFT) & TEGRA210_MUTE_MASK_EN;
+	new_mask = ucontrol->value.integer.value[0];
+
+	if (new_mask == old_mask) {
+		err = 0;
+		goto end;
+	}
 
 	err = regmap_update_bits(mvc->regmap, mc->reg,
 				 TEGRA210_MVC_MUTE_MASK,
-				 mute_mask << TEGRA210_MVC_MUTE_SHIFT);
+				 new_mask << TEGRA210_MVC_MUTE_SHIFT);
 	if (err < 0)
 		goto end;
 
@@ -195,7 +203,7 @@
 	unsigned int reg = mc->reg;
 	unsigned int value;
 	u8 chan;
-	int err;
+	int err, old_volume;
 
 	pm_runtime_get_sync(cmpnt->dev);
 
@@ -207,10 +215,16 @@
 		goto end;
 
 	chan = (reg - TEGRA210_MVC_TARGET_VOL) / REG_SIZE;
+	old_volume = mvc->volume[chan];
 
 	tegra210_mvc_conv_vol(mvc, chan,
 			      ucontrol->value.integer.value[0]);
 
+	if (mvc->volume[chan] == old_volume) {
+		err = 0;
+		goto end;
+	}
+
 	/* Configure init volume same as target volume */
 	regmap_write(mvc->regmap,
 		TEGRA210_MVC_REG_OFFSET(TEGRA210_MVC_INIT_VOL, chan),
@@ -275,7 +289,7 @@
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_mvc *mvc = snd_soc_component_get_drvdata(cmpnt);
 
-	ucontrol->value.integer.value[0] = mvc->curve_type;
+	ucontrol->value.enumerated.item[0] = mvc->curve_type;
 
 	return 0;
 }
@@ -285,7 +299,7 @@
 {
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_mvc *mvc = snd_soc_component_get_drvdata(cmpnt);
-	int value;
+	unsigned int value;
 
 	regmap_read(mvc->regmap, TEGRA210_MVC_ENABLE, &value);
 	if (value & TEGRA210_MVC_EN) {
@@ -294,10 +308,10 @@
 		return -EINVAL;
 	}
 
-	if (mvc->curve_type == ucontrol->value.integer.value[0])
+	if (mvc->curve_type == ucontrol->value.enumerated.item[0])
 		return 0;
 
-	mvc->curve_type = ucontrol->value.integer.value[0];
+	mvc->curve_type = ucontrol->value.enumerated.item[0];
 
 	tegra210_mvc_reset_vol_settings(mvc, cmpnt->dev);
 
diff --git a/sound/soc/tegra/tegra210_sfc.c b/sound/soc/tegra/tegra210_sfc.c
index dc477ee..7a2227e 100644
--- a/sound/soc/tegra/tegra210_sfc.c
+++ b/sound/soc/tegra/tegra210_sfc.c
@@ -3244,46 +3244,107 @@
 	return tegra210_sfc_write_coeff_ram(cmpnt);
 }
 
-static int tegra210_sfc_get_control(struct snd_kcontrol *kcontrol,
+static int tegra210_sfc_iget_stereo_to_mono(struct snd_kcontrol *kcontrol,
 				    struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
 
-	if (strstr(kcontrol->id.name, "Input Stereo To Mono"))
-		ucontrol->value.integer.value[0] =
-			sfc->stereo_to_mono[SFC_RX_PATH];
-	else if (strstr(kcontrol->id.name, "Input Mono To Stereo"))
-		ucontrol->value.integer.value[0] =
-			sfc->mono_to_stereo[SFC_RX_PATH];
-	else if (strstr(kcontrol->id.name, "Output Stereo To Mono"))
-		ucontrol->value.integer.value[0] =
-			sfc->stereo_to_mono[SFC_TX_PATH];
-	else if (strstr(kcontrol->id.name, "Output Mono To Stereo"))
-		ucontrol->value.integer.value[0] =
-			sfc->mono_to_stereo[SFC_TX_PATH];
+	ucontrol->value.enumerated.item[0] = sfc->stereo_to_mono[SFC_RX_PATH];
 
 	return 0;
 }
 
-static int tegra210_sfc_put_control(struct snd_kcontrol *kcontrol,
+static int tegra210_sfc_iput_stereo_to_mono(struct snd_kcontrol *kcontrol,
 				    struct snd_ctl_elem_value *ucontrol)
 {
 	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
 	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
-	int value = ucontrol->value.integer.value[0];
+	unsigned int value = ucontrol->value.enumerated.item[0];
 
-	if (strstr(kcontrol->id.name, "Input Stereo To Mono"))
-		sfc->stereo_to_mono[SFC_RX_PATH] = value;
-	else if (strstr(kcontrol->id.name, "Input Mono To Stereo"))
-		sfc->mono_to_stereo[SFC_RX_PATH] = value;
-	else if (strstr(kcontrol->id.name, "Output Stereo To Mono"))
-		sfc->stereo_to_mono[SFC_TX_PATH] = value;
-	else if (strstr(kcontrol->id.name, "Output Mono To Stereo"))
-		sfc->mono_to_stereo[SFC_TX_PATH] = value;
-	else
+	if (value == sfc->stereo_to_mono[SFC_RX_PATH])
 		return 0;
 
+	sfc->stereo_to_mono[SFC_RX_PATH] = value;
+
+	return 1;
+}
+
+static int tegra210_sfc_iget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+
+	ucontrol->value.enumerated.item[0] = sfc->mono_to_stereo[SFC_RX_PATH];
+
+	return 0;
+}
+
+static int tegra210_sfc_iput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == sfc->mono_to_stereo[SFC_RX_PATH])
+		return 0;
+
+	sfc->mono_to_stereo[SFC_RX_PATH] = value;
+
+	return 1;
+}
+
+static int tegra210_sfc_oget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+
+	ucontrol->value.enumerated.item[0] = sfc->stereo_to_mono[SFC_TX_PATH];
+
+	return 0;
+}
+
+static int tegra210_sfc_oput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == sfc->stereo_to_mono[SFC_TX_PATH])
+		return 0;
+
+	sfc->stereo_to_mono[SFC_TX_PATH] = value;
+
+	return 1;
+}
+
+static int tegra210_sfc_oget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+
+	ucontrol->value.enumerated.item[0] = sfc->mono_to_stereo[SFC_TX_PATH];
+
+	return 0;
+}
+
+static int tegra210_sfc_oput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+				    struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+	struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+	unsigned int value = ucontrol->value.enumerated.item[0];
+
+	if (value == sfc->mono_to_stereo[SFC_TX_PATH])
+		return 0;
+
+	sfc->mono_to_stereo[SFC_TX_PATH] = value;
+
 	return 1;
 }
 
@@ -3384,13 +3445,17 @@
 
 static const struct snd_kcontrol_new tegra210_sfc_controls[] = {
 	SOC_ENUM_EXT("Input Stereo To Mono", tegra210_sfc_stereo_conv_enum,
-		tegra210_sfc_get_control, tegra210_sfc_put_control),
+		     tegra210_sfc_iget_stereo_to_mono,
+		     tegra210_sfc_iput_stereo_to_mono),
 	SOC_ENUM_EXT("Input Mono To Stereo", tegra210_sfc_mono_conv_enum,
-		tegra210_sfc_get_control, tegra210_sfc_put_control),
+		     tegra210_sfc_iget_mono_to_stereo,
+		     tegra210_sfc_iput_mono_to_stereo),
 	SOC_ENUM_EXT("Output Stereo To Mono", tegra210_sfc_stereo_conv_enum,
-		tegra210_sfc_get_control, tegra210_sfc_put_control),
+		     tegra210_sfc_oget_stereo_to_mono,
+		     tegra210_sfc_oput_stereo_to_mono),
 	SOC_ENUM_EXT("Output Mono To Stereo", tegra210_sfc_mono_conv_enum,
-		tegra210_sfc_get_control, tegra210_sfc_put_control),
+		     tegra210_sfc_oget_mono_to_stereo,
+		     tegra210_sfc_oput_mono_to_stereo),
 };
 
 static const struct snd_soc_component_driver tegra210_sfc_cmpnt = {
diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c
index 49d1976..5ed8e36 100644
--- a/sound/synth/emux/emux.c
+++ b/sound/synth/emux/emux.c
@@ -88,7 +88,7 @@
 	emu->name = kstrdup(name, GFP_KERNEL);
 	emu->voices = kcalloc(emu->max_voices, sizeof(struct snd_emux_voice),
 			      GFP_KERNEL);
-	if (emu->voices == NULL)
+	if (emu->name == NULL || emu->voices == NULL)
 		return -ENOMEM;
 
 	/* create soundfont list */
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 95ec8ee..cec6e91a 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -581,6 +581,12 @@
 	return 0;
 }
 
+/* free-wheeling mode? (e.g. dmix) */
+static int in_free_wheeling_mode(struct snd_pcm_runtime *runtime)
+{
+	return runtime->stop_threshold > runtime->buffer_size;
+}
+
 /* check whether early start is needed for playback stream */
 static int lowlatency_playback_available(struct snd_pcm_runtime *runtime,
 					 struct snd_usb_substream *subs)
@@ -592,8 +598,7 @@
 	/* disabled via module option? */
 	if (!chip->lowlatency)
 		return false;
-	/* free-wheeling mode? (e.g. dmix) */
-	if (runtime->stop_threshold > runtime->buffer_size)
+	if (in_free_wheeling_mode(runtime))
 		return false;
 	/* implicit feedback mode has own operation mode */
 	if (snd_usb_endpoint_implicit_feedback_sink(subs->data_endpoint))
@@ -635,7 +640,8 @@
 	runtime->delay = 0;
 
 	subs->lowlatency_playback = lowlatency_playback_available(runtime, subs);
-	if (!subs->lowlatency_playback)
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
+	    !subs->lowlatency_playback)
 		ret = start_endpoints(subs);
 
  unlock:
@@ -1552,6 +1558,8 @@
 					      subs);
 		if (subs->lowlatency_playback &&
 		    cmd == SNDRV_PCM_TRIGGER_START) {
+			if (in_free_wheeling_mode(substream->runtime))
+				subs->lowlatency_playback = false;
 			err = start_endpoints(subs);
 			if (err < 0) {
 				snd_usb_endpoint_set_callback(subs->data_endpoint,
diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c
index 2cb0a19..4041748 100644
--- a/sound/xen/xen_snd_front.c
+++ b/sound/xen/xen_snd_front.c
@@ -358,6 +358,7 @@
 	.probe = xen_drv_probe,
 	.remove = xen_drv_remove,
 	.otherend_changed = sndback_changed,
+	.not_essential = true,
 };
 
 static int __init xen_drv_init(void)
diff --git a/tools/Makefile b/tools/Makefile
index 5da1fde..db2f7b8 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -24,7 +24,6 @@
 	@echo '  intel-speed-select     - Intel Speed Select tool'
 	@echo '  kvm_stat               - top-like utility for displaying kvm statistics'
 	@echo '  leds                   - LEDs  tools'
-	@echo '  liblockdep             - user-space wrapper for kernel locking-validator'
 	@echo '  objtool                - an ELF object analysis tool'
 	@echo '  pci                    - PCI tools'
 	@echo '  perf                   - Linux performance measurement and analysis tool'
@@ -72,9 +71,6 @@
 bpf/%: FORCE
 	$(call descend,$@)
 
-liblockdep: FORCE
-	$(call descend,lib/lockdep)
-
 libapi: FORCE
 	$(call descend,lib/api)
 
@@ -101,7 +97,7 @@
 kvm_stat: FORCE
 	$(call descend,kvm/$@)
 
-all: acpi cgroup counter cpupower gpio hv firewire liblockdep \
+all: acpi cgroup counter cpupower gpio hv firewire \
 		perf selftests bootconfig spi turbostat usb \
 		virtio vm bpf x86_energy_perf_policy \
 		tmon freefall iio objtool kvm_stat wmi \
@@ -116,9 +112,6 @@
 cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
 	$(call descend,$(@:_install=),install)
 
-liblockdep_install:
-	$(call descend,lib/lockdep,install)
-
 selftests_install:
 	$(call descend,testing/$(@:_install=),install)
 
@@ -135,7 +128,7 @@
 	$(call descend,kvm/$(@:_install=),install)
 
 install: acpi_install cgroup_install counter_install cpupower_install gpio_install \
-		hv_install firewire_install iio_install liblockdep_install \
+		hv_install firewire_install iio_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install vm_install bpf_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install \
@@ -151,9 +144,6 @@
 cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
 	$(call descend,$(@:_clean=),clean)
 
-liblockdep_clean:
-	$(call descend,lib/lockdep,clean)
-
 libapi_clean:
 	$(call descend,lib/api,clean)
 
@@ -185,7 +175,7 @@
 clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \
 		perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
 		vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
-		freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
+		freefall_clean build_clean libbpf_clean libsubcmd_clean \
 		gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
 		intel-speed-select_clean tracing_clean
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index d0ce5cf..d5b5f2a 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -277,6 +277,7 @@
 #define X86_FEATURE_XSAVEC		(10*32+ 1) /* XSAVEC instruction */
 #define X86_FEATURE_XGETBV1		(10*32+ 2) /* XGETBV with ECX = 1 instruction */
 #define X86_FEATURE_XSAVES		(10*32+ 3) /* XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD			(10*32+ 4) /* "" eXtended Feature Disabling */
 
 /*
  * Extended auxiliary flags: Linux defined - for features scattered in various
@@ -298,6 +299,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_AMX_TILE		(18*32+24) /* AMX tile Support */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index a7c4134..01e2650 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -625,6 +625,8 @@
 
 #define MSR_IA32_BNDCFGS_RSVD		0x00000ffc
 
+#define MSR_IA32_XFD			0x000001c4
+#define MSR_IA32_XFD_ERR		0x000001c5
 #define MSR_IA32_XSS			0x00000da0
 
 #define MSR_IA32_APICBASE		0x0000001b
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 2ef1f65..5a776a0 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -504,4 +504,8 @@
 #define KVM_PMU_EVENT_ALLOW 0
 #define KVM_PMU_EVENT_DENY 1
 
+/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
+#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
+#define   KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h
index 5a6aac9..754a078 100644
--- a/tools/arch/x86/include/uapi/asm/prctl.h
+++ b/tools/arch/x86/include/uapi/asm/prctl.h
@@ -10,6 +10,10 @@
 #define ARCH_GET_CPUID		0x1011
 #define ARCH_SET_CPUID		0x1012
 
+#define ARCH_GET_XCOMP_SUPP	0x1021
+#define ARCH_GET_XCOMP_PERM	0x1022
+#define ARCH_REQ_XCOMP_PERM	0x1023
+
 #define ARCH_MAP_VDSO_X32	0x2001
 #define ARCH_MAP_VDSO_32	0x2002
 #define ARCH_MAP_VDSO_64	0x2003
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index bbd1150..8791d0e 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -88,5 +88,4 @@
 
 $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT)
 	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)   \
-		    LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT)			       \
-		    LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD)
+		    CC=$(HOSTCC) LD=$(HOSTLD)
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 9204395..0b243ce8 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -200,7 +200,6 @@
 	main_test_timerfd();
 	main_test_stackprotector_all();
 	main_test_libdw_dwarf_unwind();
-	main_test_sync_compare_and_swap(argc, argv);
 	main_test_zlib();
 	main_test_pthread_attr_setaffinity_np();
 	main_test_pthread_barrier();
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index a7e54a08f..3e8df50 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -7,6 +7,7 @@
 #include <assert.h>
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
+#include <linux/math.h>
 #include <endian.h>
 #include <byteswap.h>
 
@@ -14,8 +15,6 @@
 #define UINT_MAX	(~0U)
 #endif
 
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-
 #define PERF_ALIGN(x, a)	__PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
 #define __PERF_ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
 
@@ -52,15 +51,6 @@
 	_min1 < _min2 ? _min1 : _min2; })
 #endif
 
-#ifndef roundup
-#define roundup(x, y) (                                \
-{                                                      \
-	const typeof(y) __y = y;		       \
-	(((x) + (__y - 1)) / __y) * __y;	       \
-}                                                      \
-)
-#endif
-
 #ifndef BUG_ON
 #ifdef NDEBUG
 #define BUG_ON(cond) do { if (cond) {} } while (0)
@@ -104,16 +94,6 @@
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
-
 #define current_gfp_context(k) 0
 #define synchronize_rcu()
 
diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h
new file mode 100644
index 0000000..4e7af99
--- /dev/null
+++ b/tools/include/linux/math.h
@@ -0,0 +1,25 @@
+#ifndef _TOOLS_MATH_H
+#define _TOOLS_MATH_H
+
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#ifndef roundup
+#define roundup(x, y) (                                \
+{                                                      \
+	const typeof(y) __y = y;		       \
+	(((x) + (__y - 1)) / __y) * __y;	       \
+}                                                      \
+)
+#endif
+
+#endif
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 1c5fb86..4557a8b 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -880,8 +880,11 @@
 #define __NR_process_mrelease 448
 __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 
+#define __NR_futex_waitv 449
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+
 #undef __NR_syscalls
-#define __NR_syscalls 449
+#define __NR_syscalls 450
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index bde5860..914ebd9 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -1522,6 +1522,12 @@
 #define I915_TILING_NONE	0
 #define I915_TILING_X		1
 #define I915_TILING_Y		2
+/*
+ * Do not add new tiling types here.  The I915_TILING_* values are for
+ * de-tiling fence registers that no longer exist on modern platforms.  Although
+ * the hardware may support new types of tiling in general (e.g., Tile4), we
+ * do not need to add them to the uapi that is specific to now-defunct ioctls.
+ */
 #define I915_TILING_LAST	I915_TILING_Y
 
 #define I915_BIT_6_SWIZZLE_NONE		0
@@ -1824,6 +1830,7 @@
  * Extensions:
  *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
  *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
+ *   i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
 
@@ -1846,6 +1853,55 @@
  * attempted to use it, never re-use this context param number.
  */
 #define I915_CONTEXT_PARAM_RINGSIZE	0xc
+
+/*
+ * I915_CONTEXT_PARAM_PROTECTED_CONTENT:
+ *
+ * Mark that the context makes use of protected content, which will result
+ * in the context being invalidated when the protected content session is.
+ * Given that the protected content session is killed on suspend, the device
+ * is kept awake for the lifetime of a protected context, so the user should
+ * make sure to dispose of them once done.
+ * This flag can only be set at context creation time and, when set to true,
+ * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE
+ * to false. This flag can't be set to true in conjunction with setting the
+ * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example:
+ *
+ * .. code-block:: C
+ *
+ *	struct drm_i915_gem_context_create_ext_setparam p_protected = {
+ *		.base = {
+ *			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ *		},
+ *		.param = {
+ *			.param = I915_CONTEXT_PARAM_PROTECTED_CONTENT,
+ *			.value = 1,
+ *		}
+ *	};
+ *	struct drm_i915_gem_context_create_ext_setparam p_norecover = {
+ *		.base = {
+ *			.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ *			.next_extension = to_user_pointer(&p_protected),
+ *		},
+ *		.param = {
+ *			.param = I915_CONTEXT_PARAM_RECOVERABLE,
+ *			.value = 0,
+ *		}
+ *	};
+ *	struct drm_i915_gem_context_create_ext create = {
+ *		.flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ *		.extensions = to_user_pointer(&p_norecover);
+ *	};
+ *
+ *	ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * In addition to the normal failure cases, setting this flag during context
+ * creation can result in the following errors:
+ *
+ * -ENODEV: feature not available
+ * -EPERM: trying to mark a recoverable or not bannable context as protected
+ */
+#define I915_CONTEXT_PARAM_PROTECTED_CONTENT    0xd
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -2050,6 +2106,135 @@
 } __attribute__((packed)) name__
 
 /**
+ * struct i915_context_engines_parallel_submit - Configure engine for
+ * parallel submission.
+ *
+ * Setup a slot in the context engine map to allow multiple BBs to be submitted
+ * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
+ * in parallel. Multiple hardware contexts are created internally in the i915 to
+ * run these BBs. Once a slot is configured for N BBs only N BBs can be
+ * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
+ * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
+ * many BBs there are based on the slot's configuration. The N BBs are the last
+ * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
+ *
+ * The default placement behavior is to create implicit bonds between each
+ * context if each context maps to more than 1 physical engine (e.g. context is
+ * a virtual engine). Also we only allow contexts of same engine class and these
+ * contexts must be in logically contiguous order. Examples of the placement
+ * behavior are described below. Lastly, the default is to not allow BBs to be
+ * preempted mid-batch. Rather insert coordinated preemption points on all
+ * hardware contexts between each set of BBs. Flags could be added in the future
+ * to change both of these default behaviors.
+ *
+ * Returns -EINVAL if hardware context placement configuration is invalid or if
+ * the placement configuration isn't supported on the platform / submission
+ * interface.
+ * Returns -ENODEV if extension isn't supported on the platform / submission
+ * interface.
+ *
+ * .. code-block:: none
+ *
+ *	Examples syntax:
+ *	CS[X] = generic engine of same class, logical instance X
+ *	INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
+ *
+ *	Example 1 pseudo code:
+ *	set_engines(INVALID)
+ *	set_parallel(engine_index=0, width=2, num_siblings=1,
+ *		     engines=CS[0],CS[1])
+ *
+ *	Results in the following valid placement:
+ *	CS[0], CS[1]
+ *
+ *	Example 2 pseudo code:
+ *	set_engines(INVALID)
+ *	set_parallel(engine_index=0, width=2, num_siblings=2,
+ *		     engines=CS[0],CS[2],CS[1],CS[3])
+ *
+ *	Results in the following valid placements:
+ *	CS[0], CS[1]
+ *	CS[2], CS[3]
+ *
+ *	This can be thought of as two virtual engines, each containing two
+ *	engines thereby making a 2D array. However, there are bonds tying the
+ *	entries together and placing restrictions on how they can be scheduled.
+ *	Specifically, the scheduler can choose only vertical columns from the 2D
+ *	array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
+ *	scheduler wants to submit to CS[0], it must also choose CS[1] and vice
+ *	versa. Same for CS[2] requires also using CS[3].
+ *	VE[0] = CS[0], CS[2]
+ *	VE[1] = CS[1], CS[3]
+ *
+ *	Example 3 pseudo code:
+ *	set_engines(INVALID)
+ *	set_parallel(engine_index=0, width=2, num_siblings=2,
+ *		     engines=CS[0],CS[1],CS[1],CS[3])
+ *
+ *	Results in the following valid and invalid placements:
+ *	CS[0], CS[1]
+ *	CS[1], CS[3] - Not logically contiguous, return -EINVAL
+ */
+struct i915_context_engines_parallel_submit {
+	/**
+	 * @base: base user extension.
+	 */
+	struct i915_user_extension base;
+
+	/**
+	 * @engine_index: slot for parallel engine
+	 */
+	__u16 engine_index;
+
+	/**
+	 * @width: number of contexts per parallel engine or in other words the
+	 * number of batches in each submission
+	 */
+	__u16 width;
+
+	/**
+	 * @num_siblings: number of siblings per context or in other words the
+	 * number of possible placements for each submission
+	 */
+	__u16 num_siblings;
+
+	/**
+	 * @mbz16: reserved for future use; must be zero
+	 */
+	__u16 mbz16;
+
+	/**
+	 * @flags: all undefined flags must be zero, currently not defined flags
+	 */
+	__u64 flags;
+
+	/**
+	 * @mbz64: reserved for future use; must be zero
+	 */
+	__u64 mbz64[3];
+
+	/**
+	 * @engines: 2-d array of engine instances to configure parallel engine
+	 *
+	 * length = width (i) * num_siblings (j)
+	 * index = j + i * num_siblings
+	 */
+	struct i915_engine_class_instance engines[0];
+
+} __packed;
+
+#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
+	struct i915_user_extension base; \
+	__u16 engine_index; \
+	__u16 width; \
+	__u16 num_siblings; \
+	__u16 mbz16; \
+	__u64 flags; \
+	__u64 mbz64[3]; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/**
  * DOC: Context Engine Map uAPI
  *
  * Context engine map is a new way of addressing engines when submitting batch-
@@ -2108,6 +2293,7 @@
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
 #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
+#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
 	struct i915_engine_class_instance engines[0];
 } __attribute__((packed));
 
@@ -2726,14 +2912,20 @@
 
 	/** @flags: Engine flags. */
 	__u64 flags;
+#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE		(1 << 0)
 
 	/** @capabilities: Capabilities of this engine. */
 	__u64 capabilities;
 #define I915_VIDEO_CLASS_CAPABILITY_HEVC		(1 << 0)
 #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC	(1 << 1)
 
+	/** @logical_instance: Logical instance of engine */
+	__u16 logical_instance;
+
 	/** @rsvd1: Reserved fields. */
-	__u64 rsvd1[4];
+	__u16 rsvd1[3];
+	/** @rsvd2: Reserved fields. */
+	__u64 rsvd2[3];
 };
 
 /**
@@ -2979,8 +3171,12 @@
 	 *
 	 * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
 	 * struct drm_i915_gem_create_ext_memory_regions.
+	 *
+	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
+	 * struct drm_i915_gem_create_ext_protected_content.
 	 */
 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
+#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
 	__u64 extensions;
 };
 
@@ -3038,6 +3234,50 @@
 	__u64 regions;
 };
 
+/**
+ * struct drm_i915_gem_create_ext_protected_content - The
+ * I915_OBJECT_PARAM_PROTECTED_CONTENT extension.
+ *
+ * If this extension is provided, buffer contents are expected to be protected
+ * by PXP encryption and require decryption for scan out and processing. This
+ * is only possible on platforms that have PXP enabled, on all other scenarios
+ * using this extension will cause the ioctl to fail and return -ENODEV. The
+ * flags parameter is reserved for future expansion and must currently be set
+ * to zero.
+ *
+ * The buffer contents are considered invalid after a PXP session teardown.
+ *
+ * The encryption is guaranteed to be processed correctly only if the object
+ * is submitted with a context created using the
+ * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks
+ * at submission time on the validity of the objects involved.
+ *
+ * Below is an example on how to create a protected object:
+ *
+ * .. code-block:: C
+ *
+ *      struct drm_i915_gem_create_ext_protected_content protected_ext = {
+ *              .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
+ *              .flags = 0,
+ *      };
+ *      struct drm_i915_gem_create_ext create_ext = {
+ *              .size = PAGE_SIZE,
+ *              .extensions = (uintptr_t)&protected_ext,
+ *      };
+ *
+ *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ *      if (err) ...
+ */
+struct drm_i915_gem_create_ext_protected_content {
+	/** @base: Extension link. See struct i915_user_extension. */
+	struct i915_user_extension base;
+	/** @flags: reserved for future usage, currently MBZ */
+	__u32 flags;
+};
+
+/* ID of the protected content session managed by i915 when PXP is active */
+#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index b3610fd..eebd389 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -7,24 +7,23 @@
 
 /* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
+	__u32	rx_packets;
+	__u32	tx_packets;
+	__u32	rx_bytes;
+	__u32	tx_bytes;
+	__u32	rx_errors;
+	__u32	tx_errors;
+	__u32	rx_dropped;
+	__u32	tx_dropped;
+	__u32	multicast;
 	__u32	collisions;
-
 	/* detailed rx_errors: */
 	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
+	__u32	rx_over_errors;
+	__u32	rx_crc_errors;
+	__u32	rx_frame_errors;
+	__u32	rx_fifo_errors;
+	__u32	rx_missed_errors;
 
 	/* detailed tx_errors */
 	__u32	tx_aborted_errors;
@@ -37,29 +36,201 @@
 	__u32	rx_compressed;
 	__u32	tx_compressed;
 
-	__u32	rx_nohandler;		/* dropped, no handler found	*/
+	__u32	rx_nohandler;
 };
 
-/* The main device statistics structure */
+/**
+ * struct rtnl_link_stats64 - The main device statistics structure.
+ *
+ * @rx_packets: Number of good packets received by the interface.
+ *   For hardware interfaces counts all good packets received from the device
+ *   by the host, including packets which host had to drop at various stages
+ *   of processing (even in the driver).
+ *
+ * @tx_packets: Number of packets successfully transmitted.
+ *   For hardware interfaces counts packets which host was able to successfully
+ *   hand over to the device, which does not necessarily mean that packets
+ *   had been successfully transmitted out of the device, only that device
+ *   acknowledged it copied them out of host memory.
+ *
+ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @rx_errors: Total number of bad packets received on this network device.
+ *   This counter must include events counted by @rx_length_errors,
+ *   @rx_crc_errors, @rx_frame_errors and other errors not otherwise
+ *   counted.
+ *
+ * @tx_errors: Total number of transmit problems.
+ *   This counter must include events counter by @tx_aborted_errors,
+ *   @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors,
+ *   @tx_window_errors and other errors not otherwise counted.
+ *
+ * @rx_dropped: Number of packets received but not processed,
+ *   e.g. due to lack of resources or unsupported protocol.
+ *   For hardware interfaces this counter may include packets discarded
+ *   due to L2 address filtering but should not include packets dropped
+ *   by the device due to buffer exhaustion which are counted separately in
+ *   @rx_missed_errors (since procfs folds those two counters together).
+ *
+ * @tx_dropped: Number of packets dropped on their way to transmission,
+ *   e.g. due to lack of resources.
+ *
+ * @multicast: Multicast packets received.
+ *   For hardware interfaces this statistic is commonly calculated
+ *   at the device level (unlike @rx_packets) and therefore may include
+ *   packets which did not reach the host.
+ *
+ *   For IEEE 802.3 devices this counter may be equivalent to:
+ *
+ *    - 30.3.1.1.21 aMulticastFramesReceivedOK
+ *
+ * @collisions: Number of collisions during packet transmissions.
+ *
+ * @rx_length_errors: Number of packets dropped due to invalid length.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to a sum
+ *   of the following attributes:
+ *
+ *    - 30.3.1.1.23 aInRangeLengthErrors
+ *    - 30.3.1.1.24 aOutOfRangeLengthField
+ *    - 30.3.1.1.25 aFrameTooLongErrors
+ *
+ * @rx_over_errors: Receiver FIFO overflow event counter.
+ *
+ *   Historically the count of overflow events. Such events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   The recommended interpretation for high speed interfaces is -
+ *   number of packets dropped because they did not fit into buffers
+ *   provided by the host, e.g. packets larger than MTU or next buffer
+ *   in the ring was not available for a scatter transfer.
+ *
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   This statistics was historically used interchangeably with
+ *   @rx_fifo_errors.
+ *
+ *   This statistic corresponds to hardware events and is not commonly used
+ *   on software devices.
+ *
+ * @rx_crc_errors: Number of packets received with a CRC error.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.6 aFrameCheckSequenceErrors
+ *
+ * @rx_frame_errors: Receiver frame alignment errors.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to:
+ *
+ *    - 30.3.1.1.7 aAlignmentErrors
+ *
+ * @rx_fifo_errors: Receiver FIFO error counter.
+ *
+ *   Historically the count of overflow events. Those events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   This statistics was used interchangeably with @rx_over_errors.
+ *   Not recommended for use in drivers for high speed interfaces.
+ *
+ *   This statistic is used on software devices, e.g. to count software
+ *   packet queue overflow (can) or sequencing errors (GRE).
+ *
+ * @rx_missed_errors: Count of packets missed by the host.
+ *   Folded into the "drop" counter in `/proc/net/dev`.
+ *
+ *   Counts number of packets dropped by the device due to lack
+ *   of buffer space. This usually indicates that the host interface
+ *   is slower than the network interface, or host is not keeping up
+ *   with the receive packet rate.
+ *
+ *   This statistic corresponds to hardware events and is not used
+ *   on software devices.
+ *
+ * @tx_aborted_errors:
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *   For IEEE 802.3 devices capable of half-duplex operation this counter
+ *   must be equivalent to:
+ *
+ *    - 30.3.1.1.11 aFramesAbortedDueToXSColls
+ *
+ *   High speed interfaces may use this counter as a general device
+ *   discard counter.
+ *
+ * @tx_carrier_errors: Number of frame transmission errors due to loss
+ *   of carrier during transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.13 aCarrierSenseErrors
+ *
+ * @tx_fifo_errors: Number of frame transmission errors due to device
+ *   FIFO underrun / underflow. This condition occurs when the device
+ *   begins transmission of a frame but is unable to deliver the
+ *   entire frame to the transmitter in time for transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for
+ *   old half-duplex Ethernet.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices possibly equivalent to:
+ *
+ *    - 30.3.2.1.4 aSQETestErrors
+ *
+ * @tx_window_errors: Number of frame transmission errors due
+ *   to late collisions (for Ethernet - after the first 64B of transmission).
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.10 aLateCollisions
+ *
+ * @rx_compressed: Number of correctly received compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @tx_compressed: Number of transmitted compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @rx_nohandler: Number of packets received on the interface
+ *   but dropped by the networking stack because the device is
+ *   not designated to receive packets (e.g. backup link in a bond).
+ */
 struct rtnl_link_stats64 {
-	__u64	rx_packets;		/* total packets received	*/
-	__u64	tx_packets;		/* total packets transmitted	*/
-	__u64	rx_bytes;		/* total bytes received 	*/
-	__u64	tx_bytes;		/* total bytes transmitted	*/
-	__u64	rx_errors;		/* bad packets received		*/
-	__u64	tx_errors;		/* packet transmit problems	*/
-	__u64	rx_dropped;		/* no space in linux buffers	*/
-	__u64	tx_dropped;		/* no space available in linux	*/
-	__u64	multicast;		/* multicast packets received	*/
+	__u64	rx_packets;
+	__u64	tx_packets;
+	__u64	rx_bytes;
+	__u64	tx_bytes;
+	__u64	rx_errors;
+	__u64	tx_errors;
+	__u64	rx_dropped;
+	__u64	tx_dropped;
+	__u64	multicast;
 	__u64	collisions;
 
 	/* detailed rx_errors: */
 	__u64	rx_length_errors;
-	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u64	rx_frame_errors;	/* recv'd frame alignment error */
-	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u64	rx_missed_errors;	/* receiver missed packet	*/
+	__u64	rx_over_errors;
+	__u64	rx_crc_errors;
+	__u64	rx_frame_errors;
+	__u64	rx_fifo_errors;
+	__u64	rx_missed_errors;
 
 	/* detailed tx_errors */
 	__u64	tx_aborted_errors;
@@ -71,8 +242,7 @@
 	/* for cslip etc */
 	__u64	rx_compressed;
 	__u64	tx_compressed;
-
-	__u64	rx_nohandler;		/* dropped, no handler found	*/
+	__u64	rx_nohandler;
 };
 
 /* The struct should be in sync with struct ifmap */
@@ -170,12 +340,29 @@
 	IFLA_PROP_LIST,
 	IFLA_ALT_IFNAME, /* Alternative ifname */
 	IFLA_PERM_ADDRESS,
+	IFLA_PROTO_DOWN_REASON,
+
+	/* device (sysfs) name as parent, used instead
+	 * of IFLA_LINK where there's no parent netdev
+	 */
+	IFLA_PARENT_DEV_NAME,
+	IFLA_PARENT_DEV_BUS_NAME,
+
 	__IFLA_MAX
 };
 
 
 #define IFLA_MAX (__IFLA_MAX - 1)
 
+enum {
+	IFLA_PROTO_DOWN_REASON_UNSPEC,
+	IFLA_PROTO_DOWN_REASON_MASK,	/* u32, mask for reason bits */
+	IFLA_PROTO_DOWN_REASON_VALUE,   /* u32, reason bit value */
+
+	__IFLA_PROTO_DOWN_REASON_CNT,
+	IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1
+};
+
 /* backwards compatibility for userspace */
 #ifndef __KERNEL__
 #define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
@@ -293,6 +480,7 @@
 	IFLA_BR_MCAST_MLD_VERSION,
 	IFLA_BR_VLAN_STATS_PER_PORT,
 	IFLA_BR_MULTI_BOOLOPT,
+	IFLA_BR_MCAST_QUERIER_STATE,
 	__IFLA_BR_MAX,
 };
 
@@ -346,6 +534,8 @@
 	IFLA_BRPORT_BACKUP_PORT,
 	IFLA_BRPORT_MRP_RING_OPEN,
 	IFLA_BRPORT_MRP_IN_OPEN,
+	IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+	IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -433,6 +623,7 @@
 };
 
 #define MACVLAN_FLAG_NOPROMISC	1
+#define MACVLAN_FLAG_NODST	2 /* skip dst macvlan if matching src macvlan */
 
 /* VRF section */
 enum {
@@ -597,6 +788,18 @@
 	GENEVE_DF_MAX = __GENEVE_DF_END - 1,
 };
 
+/* Bareudp section  */
+enum {
+	IFLA_BAREUDP_UNSPEC,
+	IFLA_BAREUDP_PORT,
+	IFLA_BAREUDP_ETHERTYPE,
+	IFLA_BAREUDP_SRCPORT_MIN,
+	IFLA_BAREUDP_MULTIPROTO_MODE,
+	__IFLA_BAREUDP_MAX
+};
+
+#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1)
+
 /* PPP section */
 enum {
 	IFLA_PPP_UNSPEC,
@@ -899,7 +1102,14 @@
 #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
 
 
-/* HSR section */
+/* HSR/PRP section, both uses same interface */
+
+/* Different redundancy protocols for hsr device */
+enum {
+	HSR_PROTOCOL_HSR,
+	HSR_PROTOCOL_PRP,
+	HSR_PROTOCOL_MAX,
+};
 
 enum {
 	IFLA_HSR_UNSPEC,
@@ -909,6 +1119,9 @@
 	IFLA_HSR_SUPERVISION_ADDR,	/* Supervision frame multicast addr */
 	IFLA_HSR_SEQ_NR,
 	IFLA_HSR_VERSION,		/* HSR version */
+	IFLA_HSR_PROTOCOL,		/* Indicate different protocol than
+					 * HSR. For example PRP.
+					 */
 	__IFLA_HSR_MAX,
 };
 
@@ -1033,6 +1246,8 @@
 #define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
 #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
 #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5           (1U << 4)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5            (1U << 5)
 
 enum {
 	IFLA_RMNET_UNSPEC,
@@ -1048,4 +1263,14 @@
 	__u32	mask;
 };
 
+/* MCTP section */
+
+enum {
+	IFLA_MCTP_UNSPEC,
+	IFLA_MCTP_NET,
+	__IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index a067410..1daa452 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -269,6 +269,7 @@
 #define KVM_EXIT_AP_RESET_HOLD    32
 #define KVM_EXIT_X86_BUS_LOCK     33
 #define KVM_EXIT_XEN              34
+#define KVM_EXIT_RISCV_SBI        35
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -397,13 +398,23 @@
 		 * "ndata" is correct, that new fields are enumerated in "flags",
 		 * and that each flag enumerates fields that are 64-bit aligned
 		 * and sized (so that ndata+internal.data[] is valid/accurate).
+		 *
+		 * Space beyond the defined fields may be used to store arbitrary
+		 * debug information relating to the emulation failure. It is
+		 * accounted for in "ndata" but the format is unspecified and is
+		 * not represented in "flags". Any such information is *not* ABI!
 		 */
 		struct {
 			__u32 suberror;
 			__u32 ndata;
 			__u64 flags;
-			__u8  insn_size;
-			__u8  insn_bytes[15];
+			union {
+				struct {
+					__u8  insn_size;
+					__u8  insn_bytes[15];
+				};
+			};
+			/* Arbitrary debug data may follow. */
 		} emulation_failure;
 		/* KVM_EXIT_OSI */
 		struct {
@@ -469,6 +480,13 @@
 		} msr;
 		/* KVM_EXIT_XEN */
 		struct kvm_xen_exit xen;
+		/* KVM_EXIT_RISCV_SBI */
+		struct {
+			unsigned long extension_id;
+			unsigned long function_id;
+			unsigned long args[6];
+			unsigned long ret[2];
+		} riscv_sbi;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1112,6 +1130,7 @@
 #define KVM_CAP_BINARY_STATS_FD 203
 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 #define KVM_CAP_ARM_MTE 205
+#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1223,11 +1242,16 @@
 
 /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags.  */
 #define KVM_CLOCK_TSC_STABLE		2
+#define KVM_CLOCK_REALTIME		(1 << 2)
+#define KVM_CLOCK_HOST_TSC		(1 << 3)
 
 struct kvm_clock_data {
 	__u64 clock;
 	__u32 flags;
-	__u32 pad[9];
+	__u32 pad0;
+	__u64 realtime;
+	__u64 host_tsc;
+	__u32 pad[4];
 };
 
 /* For KVM_CAP_SW_TLB */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index de45fcd..bb73e9a 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -268,5 +268,8 @@
 # define PR_SCHED_CORE_SHARE_TO		2 /* push core_sched cookie to pid */
 # define PR_SCHED_CORE_SHARE_FROM	3 /* pull core_sched cookie to pid */
 # define PR_SCHED_CORE_MAX		4
+# define PR_SCHED_CORE_SCOPE_THREAD		0
+# define PR_SCHED_CORE_SCOPE_THREAD_GROUP	1
+# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP	2
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 5859ca0..5fbb79e 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -1002,7 +1002,7 @@
 #define SNDRV_CTL_ELEM_ACCESS_WRITE		(1<<1)
 #define SNDRV_CTL_ELEM_ACCESS_READWRITE		(SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
 #define SNDRV_CTL_ELEM_ACCESS_VOLATILE		(1<<2)	/* control value may be changed without a notification */
-// (1 << 3) is unused.
+/* (1 << 3) is unused. */
 #define SNDRV_CTL_ELEM_ACCESS_TLV_READ		(1<<4)	/* TLV read is possible */
 #define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE		(1<<5)	/* TLV write is possible */
 #define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE	(SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index d26e547..6f3df00 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -45,8 +45,8 @@
 	int nr_fd_array;
 };
 
-void bpf_gen__init(struct bpf_gen *gen, int log_level);
-int bpf_gen__finish(struct bpf_gen *gen);
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps);
 void bpf_gen__free(struct bpf_gen *gen);
 void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
 void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx);
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 502dea5..9934851 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -18,7 +18,7 @@
 #define MAX_USED_MAPS	64
 #define MAX_USED_PROGS	32
 #define MAX_KFUNC_DESCS 256
-#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS)
+#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS)
 
 /* The following structure describes the stack layout of the loader program.
  * In addition R6 contains the pointer to context.
@@ -33,8 +33,8 @@
  */
 struct loader_stack {
 	__u32 btf_fd;
-	__u32 prog_fd[MAX_USED_PROGS];
 	__u32 inner_map_fd;
+	__u32 prog_fd[MAX_USED_PROGS];
 };
 
 #define stack_off(field) \
@@ -42,6 +42,11 @@
 
 #define attr_field(attr, field) (attr + offsetof(union bpf_attr, field))
 
+static int blob_fd_array_off(struct bpf_gen *gen, int index)
+{
+	return gen->fd_array + index * sizeof(int);
+}
+
 static int realloc_insn_buf(struct bpf_gen *gen, __u32 size)
 {
 	size_t off = gen->insn_cur - gen->insn_start;
@@ -102,11 +107,15 @@
 	emit(gen, insn2);
 }
 
-void bpf_gen__init(struct bpf_gen *gen, int log_level)
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
 {
-	size_t stack_sz = sizeof(struct loader_stack);
+	size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz;
 	int i;
 
+	gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
 	gen->log_level = log_level;
 	/* save ctx pointer into R6 */
 	emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
@@ -118,19 +127,27 @@
 	emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
 	emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));
 
+	/* amount of stack actually used, only used to calculate iterations, not stack offset */
+	nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]);
 	/* jump over cleanup code */
 	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0,
-			      /* size of cleanup code below */
-			      (stack_sz / 4) * 3 + 2));
+			      /* size of cleanup code below (including map fd cleanup) */
+			      (nr_progs_sz / 4) * 3 + 2 +
+			      /* 6 insns for emit_sys_close_blob,
+			       * 6 insns for debug_regs in emit_sys_close_blob
+			       */
+			      nr_maps * (6 + (gen->log_level ? 6 : 0))));
 
 	/* remember the label where all error branches will jump to */
 	gen->cleanup_label = gen->insn_cur - gen->insn_start;
 	/* emit cleanup code: close all temp FDs */
-	for (i = 0; i < stack_sz; i += 4) {
+	for (i = 0; i < nr_progs_sz; i += 4) {
 		emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
 		emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
 		emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
 	}
+	for (i = 0; i < nr_maps; i++)
+		emit_sys_close_blob(gen, blob_fd_array_off(gen, i));
 	/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
 	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
 	emit(gen, BPF_EXIT_INSN());
@@ -160,8 +177,6 @@
  */
 static int add_map_fd(struct bpf_gen *gen)
 {
-	if (!gen->fd_array)
-		gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
 	if (gen->nr_maps == MAX_USED_MAPS) {
 		pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS);
 		gen->error = -E2BIG;
@@ -174,8 +189,6 @@
 {
 	int cur;
 
-	if (!gen->fd_array)
-		gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
 	if (gen->nr_fd_array == MAX_KFUNC_DESCS) {
 		cur = add_data(gen, NULL, sizeof(int));
 		return (cur - gen->fd_array) / sizeof(int);
@@ -183,11 +196,6 @@
 	return MAX_USED_MAPS + gen->nr_fd_array++;
 }
 
-static int blob_fd_array_off(struct bpf_gen *gen, int index)
-{
-	return gen->fd_array + index * sizeof(int);
-}
-
 static int insn_bytes_to_bpf_size(__u32 sz)
 {
 	switch (sz) {
@@ -359,10 +367,15 @@
 	__emit_sys_close(gen);
 }
 
-int bpf_gen__finish(struct bpf_gen *gen)
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
 {
 	int i;
 
+	if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) {
+		pr_warn("progs/maps mismatch\n");
+		gen->error = -EFAULT;
+		return gen->error;
+	}
 	emit_sys_close_stack(gen, stack_off(btf_fd));
 	for (i = 0; i < gen->nr_progs; i++)
 		move_stack2ctx(gen,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a1bea19..7c74342 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7258,7 +7258,7 @@
 	}
 
 	if (obj->gen_loader)
-		bpf_gen__init(obj->gen_loader, attr->log_level);
+		bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps);
 
 	err = bpf_object__probe_loading(obj);
 	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
@@ -7277,7 +7277,7 @@
 		for (i = 0; i < obj->nr_maps; i++)
 			obj->maps[i].fd = -1;
 		if (!err)
-			err = bpf_gen__finish(obj->gen_loader);
+			err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
 	}
 
 	/* clean up fd_array */
diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore
deleted file mode 100644
index 6c308ac..0000000
--- a/tools/lib/lockdep/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-liblockdep.so.*
diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build
deleted file mode 100644
index 6f66735..0000000
--- a/tools/lib/lockdep/Build
+++ /dev/null
@@ -1 +0,0 @@
-liblockdep-y += common.o lockdep.o preload.o rbtree.o
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
deleted file mode 100644
index 9dafb8c..0000000
--- a/tools/lib/lockdep/Makefile
+++ /dev/null
@@ -1,162 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# file format version
-FILE_VERSION = 1
-
-LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
-
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
-  $(if $(or $(findstring environment,$(origin $(1))),\
-            $(findstring command line,$(origin $(1)))),,\
-    $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,LD,$(CROSS_COMPILE)ld)
-
-INSTALL = install
-
-# Use DESTDIR for installing into a different root directory.
-# This is useful for building a package. The program will be
-# installed in this directory as if it was the root directory.
-# Then the build tool can move it later.
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-prefix ?= /usr/local
-libdir_relative = lib
-libdir = $(prefix)/$(libdir_relative)
-bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
-
-export DESTDIR DESTDIR_SQ INSTALL
-
-MAKEFLAGS += --no-print-directory
-
-include ../../scripts/Makefile.include
-
-# copy a bit from Linux kbuild
-
-ifeq ("$(origin V)", "command line")
-  VERBOSE = $(V)
-endif
-ifndef VERBOSE
-  VERBOSE = 0
-endif
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-# Shell quotes
-libdir_SQ = $(subst ','\'',$(libdir))
-bindir_SQ = $(subst ','\'',$(bindir))
-
-LIB_IN := $(OUTPUT)liblockdep-in.o
-
-BIN_FILE = lockdep
-LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION)
-
-CONFIG_INCLUDES =
-CONFIG_LIBS	=
-CONFIG_FLAGS	=
-
-OBJ		= $@
-N		=
-
-export Q VERBOSE
-
-INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
-
-# Set compile option CFLAGS if not set elsewhere
-CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
-CFLAGS += -fPIC
-CFLAGS += -Wall
-
-override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
-
-ifeq ($(VERBOSE),1)
-  Q =
-  print_shared_lib_compile =
-  print_install =
-else
-  Q = @
-  print_shared_lib_compile =	echo '  LD       '$(OBJ);
-  print_static_lib_build =	echo '  LD       '$(OBJ);
-  print_install =		echo '  INSTALL  '$1'	to	$(DESTDIR_SQ)$2';
-endif
-
-all:
-
-export srctree OUTPUT CC LD CFLAGS V
-include $(srctree)/tools/build/Makefile.include
-
-do_compile_shared_library =			\
-	($(print_shared_lib_compile)		\
-	$(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so))
-
-do_build_static_lib =				\
-	($(print_static_lib_build)		\
-	$(RM) $@;  $(AR) rcs $@ $^)
-
-CMD_TARGETS = $(LIB_FILE)
-
-TARGETS = $(CMD_TARGETS)
-
-
-all: fixdep all_cmd
-
-all_cmd: $(CMD_TARGETS)
-
-$(LIB_IN): force
-	$(Q)$(MAKE) $(build)=liblockdep
-
-$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
-	$(Q)$(do_compile_shared_library)
-
-$(OUTPUT)liblockdep.a: $(LIB_IN)
-	$(Q)$(do_build_static_lib)
-
-tags:	force
-	$(RM) tags
-	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
-	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
-
-TAGS:	force
-	$(RM) TAGS
-	find . -name '*.[ch]' | xargs etags \
-	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
-
-define do_install
-	$(print_install)				\
-	if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
-	fi;						\
-	$(INSTALL) $1 '$(DESTDIR_SQ)$2'
-endef
-
-install_lib: all_cmd
-	$(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ))
-	$(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ))
-
-install: install_lib
-
-clean:
-	$(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd
-	$(RM) tags TAGS
-
-PHONY += force
-force:
-
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable so we can use it in if_changed and friends.
-.PHONY: $(PHONY)
diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
deleted file mode 100644
index 5c3b58c..0000000
--- a/tools/lib/lockdep/common.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/compiler.h>
-#include <linux/lockdep.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-
-static __thread struct task_struct current_obj;
-
-/* lockdep wants these */
-bool debug_locks = true;
-bool debug_locks_silent;
-
-__attribute__((destructor)) static void liblockdep_exit(void)
-{
-	debug_check_no_locks_held();
-}
-
-struct task_struct *__curr(void)
-{
-	if (current_obj.pid == 0) {
-		/* Makes lockdep output pretty */
-		prctl(PR_GET_NAME, current_obj.comm);
-		current_obj.pid = syscall(__NR_gettid);
-	}
-
-	return &current_obj;
-}
diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
deleted file mode 100644
index a6d7ee5..0000000
--- a/tools/lib/lockdep/include/liblockdep/common.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_COMMON_H
-#define _LIBLOCKDEP_COMMON_H
-
-#include <pthread.h>
-
-#define NR_LOCKDEP_CACHING_CLASSES 2
-#define MAX_LOCKDEP_SUBCLASSES 8UL
-
-#ifndef CALLER_ADDR0
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#endif
-
-#ifndef _RET_IP_
-#define _RET_IP_ CALLER_ADDR0
-#endif
-
-#ifndef _THIS_IP_
-#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
-#endif
-
-struct lockdep_subclass_key {
-	char __one_byte;
-};
-
-struct lock_class_key {
-	struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
-};
-
-struct lockdep_map {
-	struct lock_class_key	*key;
-	struct lock_class	*class_cache[NR_LOCKDEP_CACHING_CLASSES];
-	const char		*name;
-#ifdef CONFIG_LOCK_STAT
-	int			cpu;
-	unsigned long		ip;
-#endif
-};
-
-void lockdep_init_map(struct lockdep_map *lock, const char *name,
-			struct lock_class_key *key, int subclass);
-void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
-			int trylock, int read, int check,
-			struct lockdep_map *nest_lock, unsigned long ip);
-void lock_release(struct lockdep_map *lock, unsigned long ip);
-void lockdep_reset_lock(struct lockdep_map *lock);
-void lockdep_register_key(struct lock_class_key *key);
-void lockdep_unregister_key(struct lock_class_key *key);
-extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-
-#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
-	{ .name = (_name), .key = (void *)(_key), }
-
-#endif
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h
deleted file mode 100644
index bd106b8..0000000
--- a/tools/lib/lockdep/include/liblockdep/mutex.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_MUTEX_H
-#define _LIBLOCKDEP_MUTEX_H
-
-#include <pthread.h>
-#include "common.h"
-
-struct liblockdep_pthread_mutex {
-	pthread_mutex_t mutex;
-	struct lock_class_key key;
-	struct lockdep_map dep_map;
-};
-
-typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t;
-
-#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx)			\
-		(const struct liblockdep_pthread_mutex) {		\
-	.mutex = PTHREAD_MUTEX_INITIALIZER,				\
-	.dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)),	\
-}
-
-static inline int __mutex_init(liblockdep_pthread_mutex_t *lock,
-				const char *name,
-				struct lock_class_key *key,
-				const pthread_mutexattr_t *__mutexattr)
-{
-	lockdep_init_map(&lock->dep_map, name, key, 0);
-	return pthread_mutex_init(&lock->mutex, __mutexattr);
-}
-
-#define liblockdep_pthread_mutex_init(mutex, mutexattr)			\
-({									\
-	lockdep_register_key(&(mutex)->key);				\
-	__mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr));	\
-})
-
-static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock)
-{
-	lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
-	return pthread_mutex_lock(&lock->mutex);
-}
-
-static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock)
-{
-	lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
-	return pthread_mutex_unlock(&lock->mutex);
-}
-
-static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock)
-{
-	lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
-	return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0;
-}
-
-static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock)
-{
-	lockdep_reset_lock(&lock->dep_map);
-	lockdep_unregister_key(&lock->key);
-	return pthread_mutex_destroy(&lock->mutex);
-}
-
-#ifdef __USE_LIBLOCKDEP
-
-#define pthread_mutex_t         liblockdep_pthread_mutex_t
-#define pthread_mutex_init      liblockdep_pthread_mutex_init
-#define pthread_mutex_lock      liblockdep_pthread_mutex_lock
-#define pthread_mutex_unlock    liblockdep_pthread_mutex_unlock
-#define pthread_mutex_trylock   liblockdep_pthread_mutex_trylock
-#define pthread_mutex_destroy   liblockdep_pthread_mutex_destroy
-
-#endif
-
-#endif
diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h
deleted file mode 100644
index 6d5d293..0000000
--- a/tools/lib/lockdep/include/liblockdep/rwlock.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_RWLOCK_H
-#define _LIBLOCKDEP_RWLOCK_H
-
-#include <pthread.h>
-#include "common.h"
-
-struct liblockdep_pthread_rwlock {
-	pthread_rwlock_t rwlock;
-	struct lockdep_map dep_map;
-};
-
-typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t;
-
-#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl)			\
-		(struct liblockdep_pthread_rwlock) {			\
-	.rwlock = PTHREAD_RWLOCK_INITIALIZER,				\
-	.dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)),	\
-}
-
-static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock,
-				const char *name,
-				struct lock_class_key *key,
-				const pthread_rwlockattr_t *attr)
-{
-	lockdep_init_map(&lock->dep_map, name, key, 0);
-
-	return pthread_rwlock_init(&lock->rwlock, attr);
-}
-
-#define liblockdep_pthread_rwlock_init(lock, attr)		\
-({							\
-	static struct lock_class_key __key;		\
-							\
-	__rwlock_init((lock), #lock, &__key, (attr));	\
-})
-
-static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock)
-{
-	lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
-	return pthread_rwlock_rdlock(&lock->rwlock);
-
-}
-
-static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock)
-{
-	lock_release(&lock->dep_map, (unsigned long)_RET_IP_);
-	return pthread_rwlock_unlock(&lock->rwlock);
-}
-
-static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock)
-{
-	lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
-	return pthread_rwlock_wrlock(&lock->rwlock);
-}
-
-static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock)
-{
-	lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
-	return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0;
-}
-
-static inline int liblockdep_pthread_rwlock_trywrlock(liblockdep_pthread_rwlock_t *lock)
-{
-	lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
-	return pthread_rwlock_trywrlock(&lock->rwlock) == 0 ? 1 : 0;
-}
-
-static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
-{
-	return pthread_rwlock_destroy(&lock->rwlock);
-}
-
-#ifdef __USE_LIBLOCKDEP
-
-#define pthread_rwlock_t		liblockdep_pthread_rwlock_t
-#define pthread_rwlock_init		liblockdep_pthread_rwlock_init
-#define pthread_rwlock_rdlock		liblockdep_pthread_rwlock_rdlock
-#define pthread_rwlock_unlock		liblockdep_pthread_rwlock_unlock
-#define pthread_rwlock_wrlock		liblockdep_pthread_rwlock_wrlock
-#define pthread_rwlock_tryrdlock	liblockdep_pthread_rwlock_tryrdlock
-#define pthread_rwlock_trywrlock	liblockdep_pthread_rwlock_trywrlock
-#define pthread_rwlock_destroy		liblockdep_rwlock_destroy
-
-#endif
-
-#endif
diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep
deleted file mode 100755
index 49af9fe..0000000
--- a/tools/lib/lockdep/lockdep
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@"
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
deleted file mode 100644
index 348a9d0..0000000
--- a/tools/lib/lockdep/lockdep.c
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/lockdep.h>
-#include <stdlib.h>
-
-/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
-#define hlist_for_each_entry_rcu	hlist_for_each_entry
-#define hlist_add_head_rcu		hlist_add_head
-#define hlist_del_rcu			hlist_del
-#define list_for_each_entry_rcu		list_for_each_entry
-#define list_add_tail_rcu		list_add_tail
-
-u32 prandom_u32(void)
-{
-	/* Used only by lock_pin_lock() which is dead code */
-	abort();
-}
-
-void print_irqtrace_events(struct task_struct *curr)
-{
-	abort();
-}
-
-static struct new_utsname *init_utsname(void)
-{
-	static struct new_utsname n = (struct new_utsname) {
-		.release = "liblockdep",
-		.version = LIBLOCKDEP_VERSION,
-	};
-
-	return &n;
-}
-
-#include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h
deleted file mode 100644
index 29d0c95..0000000
--- a/tools/lib/lockdep/lockdep_internals.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../kernel/locking/lockdep_internals.h"
diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h
deleted file mode 100644
index 248d235..0000000
--- a/tools/lib/lockdep/lockdep_states.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../kernel/locking/lockdep_states.h"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
deleted file mode 100644
index 8f1adbe..0000000
--- a/tools/lib/lockdep/preload.c
+++ /dev/null
@@ -1,443 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <pthread.h>
-#include <stdio.h>
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <sysexits.h>
-#include <unistd.h>
-#include "include/liblockdep/mutex.h"
-#include "../../include/linux/rbtree.h"
-
-/**
- * struct lock_lookup - liblockdep's view of a single unique lock
- * @orig: pointer to the original pthread lock, used for lookups
- * @dep_map: lockdep's dep_map structure
- * @key: lockdep's key structure
- * @node: rb-tree node used to store the lock in a global tree
- * @name: a unique name for the lock
- */
-struct lock_lookup {
-	void *orig; /* Original pthread lock, used for lookups */
-	struct lockdep_map dep_map; /* Since all locks are dynamic, we need
-				     * a dep_map and a key for each lock */
-	/*
-	 * Wait, there's no support for key classes? Yup :(
-	 * Most big projects wrap the pthread api with their own calls to
-	 * be compatible with different locking methods. This means that
-	 * "classes" will be brokes since the function that creates all
-	 * locks will point to a generic locking function instead of the
-	 * actual code that wants to do the locking.
-	 */
-	struct lock_class_key key;
-	struct rb_node node;
-#define LIBLOCKDEP_MAX_LOCK_NAME 22
-	char name[LIBLOCKDEP_MAX_LOCK_NAME];
-};
-
-/* This is where we store our locks */
-static struct rb_root locks = RB_ROOT;
-static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER;
-
-/* pthread mutex API */
-
-#ifdef __GLIBC__
-extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr);
-extern int __pthread_mutex_lock(pthread_mutex_t *mutex);
-extern int __pthread_mutex_trylock(pthread_mutex_t *mutex);
-extern int __pthread_mutex_unlock(pthread_mutex_t *mutex);
-extern int __pthread_mutex_destroy(pthread_mutex_t *mutex);
-#else
-#define __pthread_mutex_init	NULL
-#define __pthread_mutex_lock	NULL
-#define __pthread_mutex_trylock	NULL
-#define __pthread_mutex_unlock	NULL
-#define __pthread_mutex_destroy	NULL
-#endif
-static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex,
-			const pthread_mutexattr_t *attr)	= __pthread_mutex_init;
-static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex)	= __pthread_mutex_lock;
-static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex)	= __pthread_mutex_trylock;
-static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex)	= __pthread_mutex_unlock;
-static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex)	= __pthread_mutex_destroy;
-
-/* pthread rwlock API */
-
-#ifdef __GLIBC__
-extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);
-extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
-extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
-#else
-#define __pthread_rwlock_init		NULL
-#define __pthread_rwlock_destroy	NULL
-#define __pthread_rwlock_wrlock		NULL
-#define __pthread_rwlock_trywrlock	NULL
-#define __pthread_rwlock_rdlock		NULL
-#define __pthread_rwlock_tryrdlock	NULL
-#define __pthread_rwlock_unlock		NULL
-#endif
-
-static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock,
-			const pthread_rwlockattr_t *attr)		= __pthread_rwlock_init;
-static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_destroy;
-static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_rdlock;
-static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_tryrdlock;
-static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_trywrlock;
-static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_wrlock;
-static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock)	= __pthread_rwlock_unlock;
-
-enum { none, prepare, done, } __init_state;
-static void init_preload(void);
-static void try_init_preload(void)
-{
-	if (__init_state != done)
-		init_preload();
-}
-
-static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
-{
-	struct rb_node **node = &locks.rb_node;
-	struct lock_lookup *l;
-
-	*parent = NULL;
-
-	while (*node) {
-		l = rb_entry(*node, struct lock_lookup, node);
-
-		*parent = *node;
-		if (lock < l->orig)
-			node = &l->node.rb_left;
-		else if (lock > l->orig)
-			node = &l->node.rb_right;
-		else
-			return node;
-	}
-
-	return node;
-}
-
-#ifndef LIBLOCKDEP_STATIC_ENTRIES
-#define LIBLOCKDEP_STATIC_ENTRIES	1024
-#endif
-
-static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
-static int __locks_nr;
-
-static inline bool is_static_lock(struct lock_lookup *lock)
-{
-	return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks);
-}
-
-static struct lock_lookup *alloc_lock(void)
-{
-	if (__init_state != done) {
-		/*
-		 * Some programs attempt to initialize and use locks in their
-		 * allocation path. This means that a call to malloc() would
-		 * result in locks being initialized and locked.
-		 *
-		 * Why is it an issue for us? dlsym() below will try allocating
-		 * to give us the original function. Since this allocation will
-		 * result in a locking operations, we have to let pthread deal
-		 * with it, but we can't! we don't have the pointer to the
-		 * original API since we're inside dlsym() trying to get it
-		 */
-
-		int idx = __locks_nr++;
-		if (idx >= ARRAY_SIZE(__locks)) {
-			dprintf(STDERR_FILENO,
-		"LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
-			exit(EX_UNAVAILABLE);
-		}
-		return __locks + idx;
-	}
-
-	return malloc(sizeof(struct lock_lookup));
-}
-
-static inline void free_lock(struct lock_lookup *lock)
-{
-	if (likely(!is_static_lock(lock)))
-		free(lock);
-}
-
-/**
- * __get_lock - find or create a lock instance
- * @lock: pointer to a pthread lock function
- *
- * Try to find an existing lock in the rbtree using the provided pointer. If
- * one wasn't found - create it.
- */
-static struct lock_lookup *__get_lock(void *lock)
-{
-	struct rb_node **node, *parent;
-	struct lock_lookup *l;
-
-	ll_pthread_rwlock_rdlock(&locks_rwlock);
-	node = __get_lock_node(lock, &parent);
-	ll_pthread_rwlock_unlock(&locks_rwlock);
-	if (*node) {
-		return rb_entry(*node, struct lock_lookup, node);
-	}
-
-	/* We didn't find the lock, let's create it */
-	l = alloc_lock();
-	if (l == NULL)
-		return NULL;
-
-	l->orig = lock;
-	/*
-	 * Currently the name of the lock is the ptr value of the pthread lock,
-	 * while not optimal, it makes debugging a bit easier.
-	 *
-	 * TODO: Get the real name of the lock using libdwarf
-	 */
-	sprintf(l->name, "%p", lock);
-	lockdep_init_map(&l->dep_map, l->name, &l->key, 0);
-
-	ll_pthread_rwlock_wrlock(&locks_rwlock);
-	/* This might have changed since the last time we fetched it */
-	node = __get_lock_node(lock, &parent);
-	rb_link_node(&l->node, parent, node);
-	rb_insert_color(&l->node, &locks);
-	ll_pthread_rwlock_unlock(&locks_rwlock);
-
-	return l;
-}
-
-static void __del_lock(struct lock_lookup *lock)
-{
-	ll_pthread_rwlock_wrlock(&locks_rwlock);
-	rb_erase(&lock->node, &locks);
-	ll_pthread_rwlock_unlock(&locks_rwlock);
-	free_lock(lock);
-}
-
-int pthread_mutex_init(pthread_mutex_t *mutex,
-			const pthread_mutexattr_t *attr)
-{
-	int r;
-
-	/*
-	 * We keep trying to init our preload module because there might be
-	 * code in init sections that tries to touch locks before we are
-	 * initialized, in that case we'll need to manually call preload
-	 * to get us going.
-	 *
-	 * Funny enough, kernel's lockdep had the same issue, and used
-	 * (almost) the same solution. See look_up_lock_class() in
-	 * kernel/locking/lockdep.c for details.
-	 */
-	try_init_preload();
-
-	r = ll_pthread_mutex_init(mutex, attr);
-	if (r == 0)
-		/*
-		 * We do a dummy initialization here so that lockdep could
-		 * warn us if something fishy is going on - such as
-		 * initializing a held lock.
-		 */
-		__get_lock(mutex);
-
-	return r;
-}
-
-int pthread_mutex_lock(pthread_mutex_t *mutex)
-{
-	int r;
-
-	try_init_preload();
-
-	lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL,
-			(unsigned long)_RET_IP_);
-	/*
-	 * Here's the thing with pthread mutexes: unlike the kernel variant,
-	 * they can fail.
-	 *
-	 * This means that the behaviour here is a bit different from what's
-	 * going on in the kernel: there we just tell lockdep that we took the
-	 * lock before actually taking it, but here we must deal with the case
-	 * that locking failed.
-	 *
-	 * To do that we'll "release" the lock if locking failed - this way
-	 * we'll get lockdep doing the correct checks when we try to take
-	 * the lock, and if that fails - we'll be back to the correct
-	 * state by releasing it.
-	 */
-	r = ll_pthread_mutex_lock(mutex);
-	if (r)
-		lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-int pthread_mutex_trylock(pthread_mutex_t *mutex)
-{
-	int r;
-
-	try_init_preload();
-
-	lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
-	r = ll_pthread_mutex_trylock(mutex);
-	if (r)
-		lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-int pthread_mutex_unlock(pthread_mutex_t *mutex)
-{
-	int r;
-
-	try_init_preload();
-
-	lock_release(&__get_lock(mutex)->dep_map, (unsigned long)_RET_IP_);
-	/*
-	 * Just like taking a lock, only in reverse!
-	 *
-	 * If we fail releasing the lock, tell lockdep we're holding it again.
-	 */
-	r = ll_pthread_mutex_unlock(mutex);
-	if (r)
-		lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-int pthread_mutex_destroy(pthread_mutex_t *mutex)
-{
-	try_init_preload();
-
-	/*
-	 * Let's see if we're releasing a lock that's held.
-	 *
-	 * TODO: Hook into free() and add that check there as well.
-	 */
-	debug_check_no_locks_freed(mutex, sizeof(*mutex));
-	__del_lock(__get_lock(mutex));
-	return ll_pthread_mutex_destroy(mutex);
-}
-
-/* This is the rwlock part, very similar to what happened with mutex above */
-int pthread_rwlock_init(pthread_rwlock_t *rwlock,
-			const pthread_rwlockattr_t *attr)
-{
-	int r;
-
-	try_init_preload();
-
-	r = ll_pthread_rwlock_init(rwlock, attr);
-	if (r == 0)
-		__get_lock(rwlock);
-
-	return r;
-}
-
-int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
-{
-	try_init_preload();
-
-	debug_check_no_locks_freed(rwlock, sizeof(*rwlock));
-	__del_lock(__get_lock(rwlock));
-	return ll_pthread_rwlock_destroy(rwlock);
-}
-
-int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
-{
-	int r;
-
-        init_preload();
-
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
-	r = ll_pthread_rwlock_rdlock(rwlock);
-	if (r)
-		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
-{
-	int r;
-
-        init_preload();
-
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
-	r = ll_pthread_rwlock_tryrdlock(rwlock);
-	if (r)
-		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
-{
-	int r;
-
-        init_preload();
-
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
-	r = ll_pthread_rwlock_trywrlock(rwlock);
-	if (r)
-		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
-{
-	int r;
-
-        init_preload();
-
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
-	r = ll_pthread_rwlock_wrlock(rwlock);
-	if (r)
-		lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
-{
-	int r;
-
-        init_preload();
-
-	lock_release(&__get_lock(rwlock)->dep_map, (unsigned long)_RET_IP_);
-	r = ll_pthread_rwlock_unlock(rwlock);
-	if (r)
-		lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
-
-	return r;
-}
-
-__attribute__((constructor)) static void init_preload(void)
-{
-	if (__init_state == done)
-		return;
-
-#ifndef __GLIBC__
-	__init_state = prepare;
-
-	ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
-	ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
-	ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
-	ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
-	ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy");
-
-	ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init");
-	ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy");
-	ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock");
-	ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock");
-	ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock");
-	ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock");
-	ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
-#endif
-
-	__init_state = done;
-}
diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
deleted file mode 100644
index 297c304..0000000
--- a/tools/lib/lockdep/rbtree.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../lib/rbtree.c"
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
deleted file mode 100755
index 11f4256..0000000
--- a/tools/lib/lockdep/run_tests.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#! /bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-if ! make >/dev/null; then
-    echo "Building liblockdep failed."
-    echo "FAILED!"
-    exit 1
-fi
-
-find tests -name '*.c' | sort | while read -r i; do
-	testname=$(basename "$i" .c)
-	echo -ne "$testname... "
-	if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &&
-		timeout 1 "tests/$testname" 2>&1 | /bin/bash "tests/${testname}.sh"; then
-		echo "PASSED!"
-	else
-		echo "FAILED!"
-	fi
-	rm -f "tests/$testname"
-done
-
-find tests -name '*.c' | sort | while read -r i; do
-	testname=$(basename "$i" .c)
-	echo -ne "(PRELOAD) $testname... "
-	if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
-		timeout 1 ./lockdep "tests/$testname" 2>&1 |
-		/bin/bash "tests/${testname}.sh"; then
-		echo "PASSED!"
-	else
-		echo "FAILED!"
-	fi
-	rm -f "tests/$testname"
-done
-
-find tests -name '*.c' | sort | while read -r i; do
-	testname=$(basename "$i" .c)
-	echo -ne "(PRELOAD + Valgrind) $testname... "
-	if gcc -o "tests/$testname" -pthread -Iinclude "$i" &&
-		{ timeout 10 valgrind --read-var-info=yes ./lockdep "./tests/$testname" >& "tests/${testname}.vg.out"; true; } &&
-		/bin/bash "tests/${testname}.sh" < "tests/${testname}.vg.out" &&
-		! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then
-		echo "PASSED!"
-	else
-		echo "FAILED!"
-	fi
-	rm -f "tests/$testname"
-done
diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
deleted file mode 100644
index 63c7ce9..0000000
--- a/tools/lib/lockdep/tests/AA.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-
-int main(void)
-{
-	pthread_mutex_t a;
-
-	pthread_mutex_init(&a, NULL);
-
-	pthread_mutex_lock(&a);
-	pthread_mutex_lock(&a);
-
-	return 0;
-}
diff --git a/tools/lib/lockdep/tests/AA.sh b/tools/lib/lockdep/tests/AA.sh
deleted file mode 100644
index f39b328..0000000
--- a/tools/lib/lockdep/tests/AA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c
deleted file mode 100644
index efa39b2..0000000
--- a/tools/lib/lockdep/tests/ABA.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-
-void main(void)
-{
-	pthread_mutex_t a, b;
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-
-	pthread_mutex_lock(&a);
-	pthread_mutex_lock(&b);
-	pthread_mutex_lock(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABA.sh b/tools/lib/lockdep/tests/ABA.sh
deleted file mode 100644
index f39b328..0000000
--- a/tools/lib/lockdep/tests/ABA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
deleted file mode 100644
index 543789b..0000000
--- a/tools/lib/lockdep/tests/ABBA.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
-	pthread_mutex_t a, b;
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-
-	LOCK_UNLOCK_2(a, b);
-	LOCK_UNLOCK_2(b, a);
-
-	pthread_mutex_destroy(&b);
-	pthread_mutex_destroy(&a);
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-
-	LOCK_UNLOCK_2(a, b);
-	LOCK_UNLOCK_2(b, a);
-
-	pthread_mutex_destroy(&b);
-	pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABBA.sh b/tools/lib/lockdep/tests/ABBA.sh
deleted file mode 100644
index fc31c60..0000000
--- a/tools/lib/lockdep/tests/ABBA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c
deleted file mode 100644
index 39325ef..0000000
--- a/tools/lib/lockdep/tests/ABBA_2threads.c
+++ /dev/null
@@ -1,47 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <pthread.h>
-
-pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
-pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
-pthread_barrier_t bar;
-
-void *ba_lock(void *arg)
-{
-	int ret, i;
-
-	pthread_mutex_lock(&b);
-
-	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
-		pthread_barrier_destroy(&bar);
-
-	pthread_mutex_lock(&a);
-
-	pthread_mutex_unlock(&a);
-	pthread_mutex_unlock(&b);
-}
-
-int main(void)
-{
-	pthread_t t;
-
-	pthread_barrier_init(&bar, NULL, 2);
-
-	if (pthread_create(&t, NULL, ba_lock, NULL)) {
-		fprintf(stderr, "pthread_create() failed\n");
-		return 1;
-	}
-	pthread_mutex_lock(&a);
-
-	if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
-		pthread_barrier_destroy(&bar);
-
-	pthread_mutex_lock(&b);
-
-	pthread_mutex_unlock(&b);
-	pthread_mutex_unlock(&a);
-
-	pthread_join(t, NULL);
-
-	return 0;
-}
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.sh b/tools/lib/lockdep/tests/ABBA_2threads.sh
deleted file mode 100644
index fc31c60..0000000
--- a/tools/lib/lockdep/tests/ABBA_2threads.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
deleted file mode 100644
index 4844612..0000000
--- a/tools/lib/lockdep/tests/ABBCCA.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
-	pthread_mutex_t a, b, c;
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-	pthread_mutex_init(&c, NULL);
-
-	LOCK_UNLOCK_2(a, b);
-	LOCK_UNLOCK_2(b, c);
-	LOCK_UNLOCK_2(c, a);
-
-	pthread_mutex_destroy(&c);
-	pthread_mutex_destroy(&b);
-	pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABBCCA.sh b/tools/lib/lockdep/tests/ABBCCA.sh
deleted file mode 100644
index fc31c60..0000000
--- a/tools/lib/lockdep/tests/ABBCCA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c
deleted file mode 100644
index 3570bf7..0000000
--- a/tools/lib/lockdep/tests/ABBCCDDA.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
-	pthread_mutex_t a, b, c, d;
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-	pthread_mutex_init(&c, NULL);
-	pthread_mutex_init(&d, NULL);
-
-	LOCK_UNLOCK_2(a, b);
-	LOCK_UNLOCK_2(b, c);
-	LOCK_UNLOCK_2(c, d);
-	LOCK_UNLOCK_2(d, a);
-
-	pthread_mutex_destroy(&d);
-	pthread_mutex_destroy(&c);
-	pthread_mutex_destroy(&b);
-	pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.sh b/tools/lib/lockdep/tests/ABBCCDDA.sh
deleted file mode 100644
index fc31c60..0000000
--- a/tools/lib/lockdep/tests/ABBCCDDA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
deleted file mode 100644
index a1c4659..0000000
--- a/tools/lib/lockdep/tests/ABCABC.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
-	pthread_mutex_t a, b, c;
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-	pthread_mutex_init(&c, NULL);
-
-	LOCK_UNLOCK_2(a, b);
-	LOCK_UNLOCK_2(c, a);
-	LOCK_UNLOCK_2(b, c);
-
-	pthread_mutex_destroy(&c);
-	pthread_mutex_destroy(&b);
-	pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABCABC.sh b/tools/lib/lockdep/tests/ABCABC.sh
deleted file mode 100644
index fc31c60..0000000
--- a/tools/lib/lockdep/tests/ABCABC.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c
deleted file mode 100644
index 335af1c..0000000
--- a/tools/lib/lockdep/tests/ABCDBCDA.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
-	pthread_mutex_t a, b, c, d;
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-	pthread_mutex_init(&c, NULL);
-	pthread_mutex_init(&d, NULL);
-
-	LOCK_UNLOCK_2(a, b);
-	LOCK_UNLOCK_2(c, d);
-	LOCK_UNLOCK_2(b, c);
-	LOCK_UNLOCK_2(d, a);
-
-	pthread_mutex_destroy(&d);
-	pthread_mutex_destroy(&c);
-	pthread_mutex_destroy(&b);
-	pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.sh b/tools/lib/lockdep/tests/ABCDBCDA.sh
deleted file mode 100644
index fc31c60..0000000
--- a/tools/lib/lockdep/tests/ABCDBCDA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c
deleted file mode 100644
index 3c597286..0000000
--- a/tools/lib/lockdep/tests/ABCDBDDA.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-#include "common.h"
-
-void main(void)
-{
-	pthread_mutex_t a, b, c, d;
-
-	pthread_mutex_init(&a, NULL);
-	pthread_mutex_init(&b, NULL);
-	pthread_mutex_init(&c, NULL);
-	pthread_mutex_init(&d, NULL);
-
-	LOCK_UNLOCK_2(a, b);
-	LOCK_UNLOCK_2(c, d);
-	LOCK_UNLOCK_2(b, d);
-	LOCK_UNLOCK_2(d, a);
-
-	pthread_mutex_destroy(&d);
-	pthread_mutex_destroy(&c);
-	pthread_mutex_destroy(&b);
-	pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.sh b/tools/lib/lockdep/tests/ABCDBDDA.sh
deleted file mode 100644
index fc31c60..0000000
--- a/tools/lib/lockdep/tests/ABCDBDDA.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible circular locking dependency detected'
diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c
deleted file mode 100644
index eee88df..0000000
--- a/tools/lib/lockdep/tests/WW.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/rwlock.h>
-
-void main(void)
-{
-	pthread_rwlock_t a, b;
-
-	pthread_rwlock_init(&a, NULL);
-	pthread_rwlock_init(&b, NULL);
-
-	pthread_rwlock_wrlock(&a);
-	pthread_rwlock_rdlock(&b);
-	pthread_rwlock_wrlock(&a);
-}
diff --git a/tools/lib/lockdep/tests/WW.sh b/tools/lib/lockdep/tests/WW.sh
deleted file mode 100644
index f39b328..0000000
--- a/tools/lib/lockdep/tests/WW.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: possible recursive locking detected'
diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h
deleted file mode 100644
index 3026c29..0000000
--- a/tools/lib/lockdep/tests/common.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LIBLOCKDEP_TEST_COMMON_H
-#define _LIBLOCKDEP_TEST_COMMON_H
-
-#define LOCK_UNLOCK_2(a, b)			\
-	do {					\
-		pthread_mutex_lock(&(a));	\
-		pthread_mutex_lock(&(b));	\
-		pthread_mutex_unlock(&(b));	\
-		pthread_mutex_unlock(&(a));	\
-	} while(0)
-
-#endif
diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c
deleted file mode 100644
index dba2506..0000000
--- a/tools/lib/lockdep/tests/unlock_balance.c
+++ /dev/null
@@ -1,15 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <liblockdep/mutex.h>
-
-void main(void)
-{
-	pthread_mutex_t a;
-
-	pthread_mutex_init(&a, NULL);
-
-	pthread_mutex_lock(&a);
-	pthread_mutex_unlock(&a);
-	pthread_mutex_unlock(&a);
-
-	pthread_mutex_destroy(&a);
-}
diff --git a/tools/lib/lockdep/tests/unlock_balance.sh b/tools/lib/lockdep/tests/unlock_balance.sh
deleted file mode 100644
index c6e3952..0000000
--- a/tools/lib/lockdep/tests/unlock_balance.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/bash
-grep -q 'WARNING: bad unlock balance detected'
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index add3990..2173582 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3310,6 +3310,9 @@
 	if (!insn->func)
 		return false;
 
+	if (insn->func->static_call_tramp)
+		return true;
+
 	/*
 	 * CONFIG_UBSAN_TRAP inserts a UD2 when it sees
 	 * __builtin_unreachable().  The BUG() macro has an unreachable() after
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 2d7df87..3cf7bac 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -469,7 +469,7 @@
 
 --switch-events::
 Record context switch events i.e. events of type PERF_RECORD_SWITCH or
-PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
+PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE)
 switch events will be enabled automatically, which can be suppressed by
 by the option --no-switch-events.
 
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 07e65a0..afd1447 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -1010,6 +1010,9 @@
   ifndef NO_AUXTRACE
     $(call detected,CONFIG_AUXTRACE)
     CFLAGS += -DHAVE_AUXTRACE_SUPPORT
+    ifeq ($(feature-reallocarray), 0)
+      CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+    endif
   endif
 endif
 
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0777748..80522bc 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -516,17 +516,17 @@
 $(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
 	$(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
 
-socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c
-socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
-
-$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
-	$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
-
-socket_arrays := $(beauty_outdir)/socket_arrays.c
+socket_arrays := $(beauty_outdir)/socket.c
 socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
 
-$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
-	$(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
+$(socket_arrays): $(linux_uapi_dir)/in.h $(beauty_linux_dir)/socket.h $(socket_tbl)
+	$(Q)$(SHELL) '$(socket_tbl)' $(linux_uapi_dir) $(beauty_linux_dir) > $@
+
+sockaddr_arrays := $(beauty_outdir)/sockaddr.c
+sockaddr_tbl := $(srctree)/tools/perf/trace/beauty/sockaddr.sh
+
+$(sockaddr_arrays): $(beauty_linux_dir)/socket.h $(sockaddr_tbl)
+	$(Q)$(SHELL) '$(sockaddr_tbl)' $(beauty_linux_dir) > $@
 
 vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
 vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
@@ -736,8 +736,8 @@
 	$(sndrv_ctl_ioctl_array) \
 	$(kcmp_type_array) \
 	$(kvm_ioctl_array) \
-	$(socket_ipproto_array) \
 	$(socket_arrays) \
+	$(sockaddr_arrays) \
 	$(vhost_virtio_ioctl_array) \
 	$(madvise_behavior_array) \
 	$(mmap_flags_array) \
@@ -1113,8 +1113,8 @@
 		$(OUTPUT)$(sndrv_pcm_ioctl_array) \
 		$(OUTPUT)$(kvm_ioctl_array) \
 		$(OUTPUT)$(kcmp_type_array) \
-		$(OUTPUT)$(socket_ipproto_array) \
 		$(OUTPUT)$(socket_arrays) \
+		$(OUTPUT)$(sockaddr_arrays) \
 		$(OUTPUT)$(vhost_virtio_ioctl_array) \
 		$(OUTPUT)$(perf_ioctl_array) \
 		$(OUTPUT)$(prctl_option_array) \
diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h
index c625380..452b3d9 100644
--- a/tools/perf/arch/arm/include/arch-tests.h
+++ b/tools/perf/arch/arm/include/arch-tests.h
@@ -2,6 +2,6 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c
index 6848101..6956111 100644
--- a/tools/perf/arch/arm/tests/arch-tests.c
+++ b/tools/perf/arch/arm/tests/arch-tests.c
@@ -3,18 +3,10 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
+struct test_suite *arch_tests[] = {
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-	{
-		.desc = "DWARF unwind",
-		.func = test__dwarf_unwind,
-	},
+	&suite__dwarf_unwind,
 #endif
-	{
-		.desc = "Vectors page",
-		.func = test__vectors_page,
-	},
-	{
-		.func = NULL,
-	},
+	&suite__vectors_page,
+	NULL,
 };
diff --git a/tools/perf/arch/arm/tests/vectors-page.c b/tools/perf/arch/arm/tests/vectors-page.c
index 7ffdd799..55a8358 100644
--- a/tools/perf/arch/arm/tests/vectors-page.c
+++ b/tools/perf/arch/arm/tests/vectors-page.c
@@ -9,8 +9,7 @@
 
 #define VECTORS__MAP_NAME "[vectors]"
 
-int test__vectors_page(struct test *test __maybe_unused,
-		       int subtest __maybe_unused)
+static int test__vectors_page(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	void *start, *end;
 
@@ -22,3 +21,5 @@
 
 	return TEST_OK;
 }
+
+DEFINE_SUITE("Vectors page", vectors_page);
diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h
index c625380..452b3d9 100644
--- a/tools/perf/arch/arm64/include/arch-tests.h
+++ b/tools/perf/arch/arm64/include/arch-tests.h
@@ -2,6 +2,6 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
diff --git a/tools/perf/arch/arm64/tests/arch-tests.c b/tools/perf/arch/arm64/tests/arch-tests.c
index 5b1543c..ad16b4f 100644
--- a/tools/perf/arch/arm64/tests/arch-tests.c
+++ b/tools/perf/arch/arm64/tests/arch-tests.c
@@ -3,14 +3,9 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
+struct test_suite *arch_tests[] = {
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-	{
-		.desc = "DWARF unwind",
-		.func = test__dwarf_unwind,
-	},
+	&suite__dwarf_unwind,
 #endif
-	{
-		.func = NULL,
-	},
+	NULL,
 };
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index a4420d4..2100d46 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -23,6 +23,7 @@
 #include "../../../util/auxtrace.h"
 #include "../../../util/record.h"
 #include "../../../util/arm-spe.h"
+#include <tools/libc_compat.h> // reallocarray
 
 #define KiB(x) ((x) * 1024)
 #define MiB(x) ((x) * 1024 * 1024)
@@ -31,6 +32,8 @@
 	struct auxtrace_record		itr;
 	struct perf_pmu			*arm_spe_pmu;
 	struct evlist		*evlist;
+	int			wrapped_cnt;
+	bool			*wrapped;
 };
 
 static void arm_spe_set_timestamp(struct auxtrace_record *itr,
@@ -84,6 +87,55 @@
 	return 0;
 }
 
+static void
+arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
+					   bool privileged)
+{
+	/*
+	 * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor
+	 * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for
+	 * unprivileged users.
+	 *
+	 * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for
+	 * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages
+	 * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+	 * user is likely to get an error as they exceed their mlock limmit.
+	 */
+
+	/*
+	 * No size were given to '-S' or '-m,', so go with the default
+	 */
+	if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+		if (privileged) {
+			opts->auxtrace_mmap_pages = MiB(4) / page_size;
+		} else {
+			opts->auxtrace_mmap_pages = KiB(128) / page_size;
+			if (opts->mmap_pages == UINT_MAX)
+				opts->mmap_pages = KiB(256) / page_size;
+		}
+	} else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) {
+		opts->mmap_pages = KiB(256) / page_size;
+	}
+
+	/*
+	 * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the
+	 * auxtrace mmap area.
+	 */
+	if (!opts->auxtrace_snapshot_size)
+		opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+	/*
+	 * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big
+	 * enough to fit the requested snapshot size.
+	 */
+	if (!opts->auxtrace_mmap_pages) {
+		size_t sz = opts->auxtrace_snapshot_size;
+
+		sz = round_up(sz, page_size) / page_size;
+		opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+	}
+}
+
 static int arm_spe_recording_options(struct auxtrace_record *itr,
 				     struct evlist *evlist,
 				     struct record_opts *opts)
@@ -115,6 +167,36 @@
 	if (!opts->full_auxtrace)
 		return 0;
 
+	/*
+	 * we are in snapshot mode.
+	 */
+	if (opts->auxtrace_snapshot_mode) {
+		/*
+		 * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with
+		 * default values.
+		 */
+		if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages)
+			arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged);
+
+		/*
+		 * Snapshot size can't be bigger than the auxtrace area.
+		 */
+		if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) {
+			pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+			       opts->auxtrace_snapshot_size,
+			       opts->auxtrace_mmap_pages * (size_t)page_size);
+			return -EINVAL;
+		}
+
+		/*
+		 * Something went wrong somewhere - this shouldn't happen.
+		 */
+		if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+			pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+			return -EINVAL;
+		}
+	}
+
 	/* We are in full trace mode but '-m,xyz' wasn't specified */
 	if (!opts->auxtrace_mmap_pages) {
 		if (privileged) {
@@ -138,6 +220,9 @@
 		}
 	}
 
+	if (opts->auxtrace_snapshot_mode)
+		pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
+			  opts->auxtrace_snapshot_size);
 
 	/*
 	 * To obtain the auxtrace buffer file descriptor, the auxtrace event
@@ -166,8 +251,199 @@
 	tracking_evsel->core.attr.sample_period = 1;
 
 	/* In per-cpu case, always need the time of mmap events etc */
-	if (!perf_cpu_map__empty(cpus))
+	if (!perf_cpu_map__empty(cpus)) {
 		evsel__set_sample_bit(tracking_evsel, TIME);
+		evsel__set_sample_bit(tracking_evsel, CPU);
+
+		/* also track task context switch */
+		if (!record_opts__no_switch_events(opts))
+			tracking_evsel->core.attr.context_switch = 1;
+	}
+
+	return 0;
+}
+
+static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+					 struct record_opts *opts,
+					 const char *str)
+{
+	unsigned long long snapshot_size = 0;
+	char *endptr;
+
+	if (str) {
+		snapshot_size = strtoull(str, &endptr, 0);
+		if (*endptr || snapshot_size > SIZE_MAX)
+			return -1;
+	}
+
+	opts->auxtrace_snapshot_mode = true;
+	opts->auxtrace_snapshot_size = snapshot_size;
+
+	return 0;
+}
+
+static int arm_spe_snapshot_start(struct auxtrace_record *itr)
+{
+	struct arm_spe_recording *ptr =
+			container_of(itr, struct arm_spe_recording, itr);
+	struct evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
+			return evsel__disable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
+{
+	struct arm_spe_recording *ptr =
+			container_of(itr, struct arm_spe_recording, itr);
+	struct evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
+			return evsel__enable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
+{
+	bool *wrapped;
+	int cnt = ptr->wrapped_cnt, new_cnt, i;
+
+	/*
+	 * No need to allocate, so return early.
+	 */
+	if (idx < cnt)
+		return 0;
+
+	/*
+	 * Make ptr->wrapped as big as idx.
+	 */
+	new_cnt = idx + 1;
+
+	/*
+	 * Free'ed in arm_spe_recording_free().
+	 */
+	wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool));
+	if (!wrapped)
+		return -ENOMEM;
+
+	/*
+	 * init new allocated values.
+	 */
+	for (i = cnt; i < new_cnt; i++)
+		wrapped[i] = false;
+
+	ptr->wrapped_cnt = new_cnt;
+	ptr->wrapped = wrapped;
+
+	return 0;
+}
+
+static bool arm_spe_buffer_has_wrapped(unsigned char *buffer,
+				      size_t buffer_size, u64 head)
+{
+	u64 i, watermark;
+	u64 *buf = (u64 *)buffer;
+	size_t buf_size = buffer_size;
+
+	/*
+	 * Defensively handle the case where head might be continually increasing - if its value is
+	 * equal or greater than the size of the ring buffer, then we can safely determine it has
+	 * wrapped around. Otherwise, continue to detect if head might have wrapped.
+	 */
+	if (head >= buffer_size)
+		return true;
+
+	/*
+	 * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
+	 */
+	watermark = buf_size - 512;
+
+	/*
+	 * The value of head is somewhere within the size of the ring buffer. This can be that there
+	 * hasn't been enough data to fill the ring buffer yet or the trace time was so long that
+	 * head has numerically wrapped around.  To find we need to check if we have data at the
+	 * very end of the ring buffer.  We can reliably do this because mmap'ed pages are zeroed
+	 * out and there is a fresh mapping with every new session.
+	 */
+
+	/*
+	 * head is less than 512 byte from the end of the ring buffer.
+	 */
+	if (head > watermark)
+		watermark = head;
+
+	/*
+	 * Speed things up by using 64 bit transactions (see "u64 *buf" above)
+	 */
+	watermark /= sizeof(u64);
+	buf_size /= sizeof(u64);
+
+	/*
+	 * If we find trace data at the end of the ring buffer, head has been there and has
+	 * numerically wrapped around at least once.
+	 */
+	for (i = watermark; i < buf_size; i++)
+		if (buf[i])
+			return true;
+
+	return false;
+}
+
+static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx,
+				  struct auxtrace_mmap *mm, unsigned char *data,
+				  u64 *head, u64 *old)
+{
+	int err;
+	bool wrapped;
+	struct arm_spe_recording *ptr =
+			container_of(itr, struct arm_spe_recording, itr);
+
+	/*
+	 * Allocate memory to keep track of wrapping if this is the first
+	 * time we deal with this *mm.
+	 */
+	if (idx >= ptr->wrapped_cnt) {
+		err = arm_spe_alloc_wrapped_array(ptr, idx);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Check to see if *head has wrapped around.  If it hasn't only the
+	 * amount of data between *head and *old is snapshot'ed to avoid
+	 * bloating the perf.data file with zeros.  But as soon as *head has
+	 * wrapped around the entire size of the AUX ring buffer it taken.
+	 */
+	wrapped = ptr->wrapped[idx];
+	if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) {
+		wrapped = true;
+		ptr->wrapped[idx] = true;
+	}
+
+	pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+		  __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+	/*
+	 * No wrap has occurred, we can just use *head and *old.
+	 */
+	if (!wrapped)
+		return 0;
+
+	/*
+	 * *head has wrapped around - adjust *head and *old to pickup the
+	 * entire content of the AUX buffer.
+	 */
+	if (*head >= mm->len) {
+		*old = *head - mm->len;
+	} else {
+		*head += mm->len;
+		*old = *head - mm->len;
+	}
 
 	return 0;
 }
@@ -186,6 +462,7 @@
 	struct arm_spe_recording *sper =
 			container_of(itr, struct arm_spe_recording, itr);
 
+	free(sper->wrapped);
 	free(sper);
 }
 
@@ -207,6 +484,10 @@
 
 	sper->arm_spe_pmu = arm_spe_pmu;
 	sper->itr.pmu = arm_spe_pmu;
+	sper->itr.snapshot_start = arm_spe_snapshot_start;
+	sper->itr.snapshot_finish = arm_spe_snapshot_finish;
+	sper->itr.find_snapshot = arm_spe_find_snapshot;
+	sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options;
 	sper->itr.recording_options = arm_spe_recording_options;
 	sper->itr.info_priv_size = arm_spe_info_priv_size;
 	sper->itr.info_fill = arm_spe_info_fill;
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 1ca7bc3..e2c481f 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -363,3 +363,4 @@
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448	n64	process_mrelease		sys_process_mrelease
+449	n64	futex_waitv			sys_futex_waitv
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h
index c625380..452b3d9 100644
--- a/tools/perf/arch/powerpc/include/arch-tests.h
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -2,6 +2,6 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c
index 8c3fbd4..eb98c57 100644
--- a/tools/perf/arch/powerpc/tests/arch-tests.c
+++ b/tools/perf/arch/powerpc/tests/arch-tests.c
@@ -3,14 +3,10 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
+
+struct test_suite *arch_tests[] = {
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-	{
-		.desc = "Test dwarf unwind",
-		.func = test__dwarf_unwind,
-	},
+	&suite__dwarf_unwind,
 #endif
-	{
-		.func = NULL,
-	},
+	NULL,
 };
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 18b5500..fe8f8dd 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -370,6 +370,7 @@
 446	common	landlock_restrict_self	sys_landlock_restrict_self
 447	common	memfd_secret		sys_memfd_secret
 448	common	process_mrelease	sys_process_mrelease
+449	common	futex_waitv		sys_futex_waitv
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 9599e7a..6a1a1b3 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -2,15 +2,15 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-struct test;
+struct test_suite;
 
 /* Tests */
-int test__rdpmc(struct test *test, int subtest);
-int test__insn_x86(struct test *test, int subtest);
-int test__intel_pt_pkt_decoder(struct test *test, int subtest);
-int test__bp_modify(struct test *test, int subtest);
-int test__x86_sample_parsing(struct test *test, int subtest);
+int test__rdpmc(struct test_suite *test, int subtest);
+int test__insn_x86(struct test_suite *test, int subtest);
+int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
+int test__bp_modify(struct test_suite *test, int subtest);
+int test__x86_sample_parsing(struct test_suite *test, int subtest);
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 71aa673..64fb73d 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -3,39 +3,28 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
-	{
-		.desc = "x86 rdpmc",
-		.func = test__rdpmc,
-	},
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
-	{
-		.desc = "DWARF unwind",
-		.func = test__dwarf_unwind,
-	},
-#endif
+DEFINE_SUITE("x86 rdpmc", rdpmc);
 #ifdef HAVE_AUXTRACE_SUPPORT
-	{
-		.desc = "x86 instruction decoder - new instructions",
-		.func = test__insn_x86,
-	},
-	{
-		.desc = "Intel PT packet decoder",
-		.func = test__intel_pt_pkt_decoder,
-	},
+DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
+DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder);
 #endif
 #if defined(__x86_64__)
-	{
-		.desc = "x86 bp modify",
-		.func = test__bp_modify,
-	},
+DEFINE_SUITE("x86 bp modify", bp_modify);
 #endif
-	{
-		.desc = "x86 Sample parsing",
-		.func = test__x86_sample_parsing,
-	},
-	{
-		.func = NULL,
-	},
+DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
 
+struct test_suite *arch_tests[] = {
+	&suite__rdpmc,
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	&suite__dwarf_unwind,
+#endif
+#ifdef HAVE_AUXTRACE_SUPPORT
+	&suite__insn_x86,
+	&suite__intel_pt_pkt_decoder,
+#endif
+#if defined(__x86_64__)
+	&suite__bp_modify,
+#endif
+	&suite__x86_sample_parsing,
+	NULL,
 };
diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c
index dffcf9b..0924ccd 100644
--- a/tools/perf/arch/x86/tests/bp-modify.c
+++ b/tools/perf/arch/x86/tests/bp-modify.c
@@ -204,7 +204,7 @@
 	return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
 }
 
-int test__bp_modify(struct test *test __maybe_unused,
+int test__bp_modify(struct test_suite *test __maybe_unused,
 		    int subtest __maybe_unused)
 {
 	TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index 0262b0d..94b490c 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -173,7 +173,7 @@
  * verbose (-v) option to see all the instructions and whether or not they
  * decoded successfully.
  */
-int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__insn_x86(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret = 0;
 
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index 27dd8cf..cb5b2c6 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -37,7 +37,7 @@
  * the last read counter value to avoid triggering a WARN_ON_ONCE() in
  * smp_call_function_many() caused by sending IPIs from NMI context.
  */
-int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct evlist *evlist = NULL;
 	struct evsel *evsel = NULL;
diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
index c933e3d..2fc882a 100644
--- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
@@ -289,7 +289,7 @@
  * This test feeds byte sequences to the Intel PT packet decoder and checks the
  * results. Changes to the packet context are also checked.
  */
-int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct test_data *d = data;
 	int ret;
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index 1ea9166..498413a 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -157,7 +157,7 @@
 	return 0;
 }
 
-int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__rdpmc(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int status = 0;
 	int wret = 0;
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
index c92db87..bfbd366 100644
--- a/tools/perf/arch/x86/tests/sample-parsing.c
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -115,7 +115,7 @@
  * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
  * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
  */
-int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__x86_sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
 }
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 5d1fe9c..137890f 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -233,6 +233,7 @@
 	print_summary();
 
 	free(worker);
+	perf_cpu_map__put(cpu);
 	return ret;
 err:
 	usage_with_options(bench_futex_lock_pi_usage, options);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 97fe31f..f7a5ffe 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -294,6 +294,7 @@
 	print_summary();
 
 	free(worker);
+	perf_cpu_map__put(cpu);
 	return ret;
 err:
 	usage_with_options(bench_futex_requeue_usage, options);
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index e970e6b..0983f40 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -329,6 +329,7 @@
 	print_summary();
 
 	free(blocked_worker);
+	perf_cpu_map__put(cpu);
 	return ret;
 }
 #endif /* HAVE_PTHREAD_BARRIER */
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 77f058a..2226a475 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -222,5 +222,6 @@
 	print_summary();
 
 	free(worker);
+	perf_cpu_map__put(cpu);
 	return ret;
 }
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 488f6e6..fa0ff4c 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -223,6 +223,8 @@
 		snd_ctx->out_fds[i] = fds[1];
 		if (!thread_mode)
 			close(fds[0]);
+
+		free(ctx);
 	}
 
 	/* Now we have all the fds, fork the senders */
@@ -239,6 +241,8 @@
 		for (i = 0; i < num_fds; i++)
 			close(snd_ctx->out_fds[i]);
 
+	free(snd_ctx);
+
 	/* Return number of children to reap */
 	return num_fds * 2;
 }
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8167ebf..8ae4004 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -619,14 +619,17 @@
 	int ret;
 	struct perf_session *session = rep->session;
 	struct evlist *evlist = session->evlist;
-	const char *help = perf_tip(system_path(TIPDIR));
+	char *help = NULL, *path = NULL;
 
-	if (help == NULL) {
+	path = system_path(TIPDIR);
+	if (perf_tip(&help, path) || help == NULL) {
 		/* fallback for people who don't install perf ;-) */
-		help = perf_tip(DOCDIR);
-		if (help == NULL)
-			help = "Cannot load tips.txt file, please install perf!";
+		free(path);
+		path = system_path(DOCDIR);
+		if (perf_tip(&help, path) || help == NULL)
+			help = strdup("Cannot load tips.txt file, please install perf!");
 	}
+	free(path);
 
 	switch (use_browser) {
 	case 1:
@@ -651,7 +654,7 @@
 		ret = evlist__tty_browse_hists(evlist, rep, help);
 		break;
 	}
-
+	free(help);
 	return ret;
 }
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 624ea12..0b52e08 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -979,6 +979,8 @@
 	  .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
 	{ .name	    = "getrlimit",
 	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+	{ .name	    = "getsockopt",
+	  .arg = { [1] = STRARRAY(level, socket_level), }, },
 	{ .name	    = "gettid",	    .errpid = true, },
 	{ .name	    = "ioctl",
 	  .arg = {
@@ -1121,6 +1123,8 @@
 	  .arg = { [0] = STRARRAY(which, itimers), }, },
 	{ .name	    = "setrlimit",
 	  .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+	{ .name	    = "setsockopt",
+	  .arg = { [1] = STRARRAY(level, socket_level), }, },
 	{ .name	    = "socket",
 	  .arg = { [0] = STRARRAY(family, socket_families),
 		   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index a42fab3..aa8cfea 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -106,6 +106,9 @@
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
+	PERF_COUNT_HW_REF_CPU_CYCLES		= 9,
 };
 
 These are standardized types of events that work relatively uniformly
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
new file mode 100644
index 0000000..b57526f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
@@ -0,0 +1,676 @@
+[
+    {
+        "BriefDescription": "Percentage of cycles that are run cycles",
+        "MetricExpr": "PM_RUN_CYC / PM_CYC * 100",
+        "MetricGroup": "General",
+        "MetricName": "RUN_CYCLES_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction",
+        "MetricExpr": "PM_CYC / PM_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "CYCLES_PER_INSTRUCTION"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason",
+        "MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because there was a flush",
+        "MetricExpr": "PM_DISP_STALL_FLUSH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_FLUSH_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because the MMU was handling a translation miss",
+        "MetricExpr": "PM_DISP_STALL_TRANSLATION / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_TRANSLATION_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction ERAT miss",
+        "MetricExpr": "PM_DISP_STALL_IERAT_ONLY_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IERAT_ONLY_MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction TLB miss",
+        "MetricExpr": "PM_DISP_STALL_ITLB_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_ITLB_MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss",
+        "MetricExpr": "PM_DISP_STALL_IC_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L2",
+        "MetricExpr": "PM_DISP_STALL_IC_L2 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_L2_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L3",
+        "MetricExpr": "PM_DISP_STALL_IC_L3 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_L3_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from any source beyond the local L3",
+        "MetricExpr": "PM_DISP_STALL_IC_L3MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_L3MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss after a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_ICMISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_ICMISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L2 after suffering a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L2 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_IC_L2_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L3 after suffering a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_IC_L3_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from any source beyond the local L3 after suffering a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_IC_L3MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any reason",
+        "MetricExpr": "PM_DISP_STALL_HELD_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch",
+        "MetricExpr": "PM_DISP_STALL_HELD_SYNC_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISP_HELD_STALL_SYNC_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch while waiting on the scoreboard",
+        "MetricExpr": "PM_DISP_STALL_HELD_SCOREBOARD_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISP_HELD_STALL_SCOREBOARD_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch due to issue queue full",
+        "MetricExpr": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISP_HELD_STALL_ISSQ_FULL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the mapper/SRB was full",
+        "MetricExpr": "PM_DISP_STALL_HELD_RENAME_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_RENAME_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the STF mapper/SRB was full",
+        "MetricExpr": "PM_DISP_STALL_HELD_STF_MAPPER_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_STF_MAPPER_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the XVFC mapper/SRB was full",
+        "MetricExpr": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_XVFC_MAPPER_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any other reason",
+        "MetricExpr": "PM_DISP_STALL_HELD_OTHER_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_OTHER_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason",
+        "MetricExpr": "PM_ISSUE_STALL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "ISSUE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units",
+        "MetricExpr": "PM_EXEC_STALL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "EXECUTION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction spent executing an NTC instruction that gets flushed some time after dispatch",
+        "MetricExpr": "PM_EXEC_STALL_NTC_FLUSH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "NTC_FLUSH_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTF instruction finishes at dispatch",
+        "MetricExpr": "PM_EXEC_STALL_FIN_AT_DISP / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "FIN_AT_DISP_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the branch unit",
+        "MetricExpr": "PM_EXEC_STALL_BRU / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "BRU_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a simple fixed point instruction that is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_SIMPLE_FX / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "SIMPLE_FX_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the VSU",
+        "MetricExpr": "PM_EXEC_STALL_VSU / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "VSU_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units",
+        "MetricExpr": "PM_EXEC_STALL_TRANSLATION / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "TRANSLATION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a load or store that suffered a translation miss",
+        "MetricExpr": "PM_EXEC_STALL_DERAT_ONLY_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DERAT_ONLY_MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is recovering from a TLB miss",
+        "MetricExpr": "PM_EXEC_STALL_DERAT_DTLB_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DERAT_DTLB_MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_LSU / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LSU_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a load that is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_LOAD / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LOAD_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L2L3_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3, with an RC dispatch conflict",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L2L3_CONFLICT_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3, without an RC dispatch conflict",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L2L3_NOCONFLICT_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a source beyond the local L2 and local L3",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L3MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L3MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a neighbor chiplet's L2 or L3 in the same chip",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L21_L31 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L21_L31_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from L4, local memory or OpenCAPI chip",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_LMEM / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_LMEM_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a remote chip (cache, L4, memory or OpenCAPI) in the same group",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_OFF_CHIP / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_OFF_CHIP_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a distant chip (cache, L4, memory or OpenCAPI chip)",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_OFF_NODE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_OFF_NODE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing a TLBIEL instruction",
+        "MetricExpr": "PM_EXEC_STALL_TLBIEL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "TLBIEL_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is finishing a load after its data has been reloaded from a data source beyond the local L1, OR when the LSU is processing an L1-hit, OR when the NTF instruction merged with another load in the LMQ",
+        "MetricExpr": "PM_EXEC_STALL_LOAD_FINISH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LOAD_FINISH_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a store that is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_STORE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STORE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is in the store unit outside of handling store misses or other special store operations",
+        "MetricExpr": "PM_EXEC_STALL_STORE_PIPE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STORE_PIPE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a store whose cache line was not resident in the L1 and had to wait for allocation of the missing line into the L1",
+        "MetricExpr": "PM_EXEC_STALL_STORE_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STORE_MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a TLBIE instruction waiting for a response from the L2",
+        "MetricExpr": "PM_EXEC_STALL_TLBIE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "TLBIE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing a PTESYNC instruction",
+        "MetricExpr": "PM_EXEC_STALL_PTESYNC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "PTESYNC_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because the thread was blocked",
+        "MetricExpr": "PM_CMPL_STALL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because it was interrupted by ANY exception",
+        "MetricExpr": "PM_CMPL_STALL_EXCEPTION / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "EXCEPTION_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is stuck at finish waiting for the non-speculative finish of either a STCX instruction waiting for its result or a load waiting for non-critical sectors of data and ECC",
+        "MetricExpr": "PM_CMPL_STALL_MEM_ECC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "MEM_ECC_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a STCX instruction waiting for resolution from the nest",
+        "MetricExpr": "PM_CMPL_STALL_STCX / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STCX_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a LWSYNC instruction waiting to complete",
+        "MetricExpr": "PM_CMPL_STALL_LWSYNC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LWSYNC_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a HWSYNC instruction stuck at finish waiting for a response from the L2",
+        "MetricExpr": "PM_CMPL_STALL_HWSYNC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "HWSYNC_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction required special handling before completion",
+        "MetricExpr": "PM_CMPL_STALL_SPECIAL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "SPECIAL_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because fetch was being held, so there was nothing in the pipeline for this thread",
+        "MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_FETCH_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of power management",
+        "MetricExpr": "PM_DISP_STALL_HELD_HALT_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_HALT_CPI"
+    },
+    {
+        "BriefDescription": "Percentage of flushes per completed instruction",
+        "MetricExpr": "PM_FLUSH / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "FLUSH_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of flushes due to a branch mispredict per completed instruction",
+        "MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "BR_MPRED_FLUSH_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of branch mispredictions per completed instruction",
+        "MetricExpr": "PM_BR_MPRED_CMPL / PM_RUN_INST_CMPL",
+        "MetricGroup": "Others",
+        "MetricName": "BRANCH_MISPREDICTION_RATE"
+    },
+    {
+        "BriefDescription": "Percentage of finished loads that missed in the L1",
+        "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100",
+        "MetricGroup": "Others",
+        "MetricName": "L1_LD_MISS_RATIO",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were loads that missed the L1",
+        "MetricExpr": "PM_LD_MISS_L1 / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "L1_LD_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions when the DPTEG required for the load/store instruction in execution was missing from the TLB",
+        "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "DTLB_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions dispatched per instruction completed",
+        "MetricExpr": "PM_INST_DISP / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "DISPATCH_PER_INST_CMPL"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were a demand load that did not hit in the L1 or L2",
+        "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "General",
+        "MetricName": "L2_LD_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were demand fetches that missed the L1 icache",
+        "MetricExpr": "PM_L1_ICACHE_MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Instruction_Misses",
+        "MetricName": "L1_INST_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were demand fetches that reloaded from beyond the L3 icache",
+        "MetricExpr": "PM_INST_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "General",
+        "MetricName": "L3_INST_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions per cycle",
+        "MetricExpr": "PM_INST_CMPL / PM_CYC",
+        "MetricGroup": "General",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Average number of cycles per completed instruction group",
+        "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "CYCLES_PER_COMPLETED_INSTRUCTIONS_SET"
+    },
+    {
+        "BriefDescription": "Percentage of cycles when at least 1 instruction dispatched",
+        "MetricExpr": "PM_1PLUS_PPC_DISP / PM_RUN_CYC * 100",
+        "MetricGroup": "General",
+        "MetricName": "CYCLES_ATLEAST_ONE_INST_DISPATCHED",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of finished loads per completed instruction",
+        "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "LOADS_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of finished stores per completed instruction",
+        "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "STORES_PER_INST"
+    },
+    {
+        "BriefDescription": "Percentage of demand loads that reloaded from beyond the L2 per completed instruction",
+        "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L2_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of demand loads that reloaded from beyond the L3 per completed instruction",
+        "MetricExpr": "PM_DATA_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L3_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 4k page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_4K / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_4K_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 64k page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_64K / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_64K_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of run cycles per completed instruction",
+        "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "RUN_CPI"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions per run cycle",
+        "MetricExpr": "PM_RUN_INST_CMPL / PM_RUN_CYC",
+        "MetricGroup": "General",
+        "MetricName": "RUN_IPC"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions per instruction group",
+        "MetricExpr": "PM_RUN_INST_CMPL / PM_1PLUS_PPC_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "AVERAGE_COMPLETED_INSTRUCTION_SET_SIZE"
+    },
+    {
+        "BriefDescription": "Average number of finished instructions per completed instructions",
+        "MetricExpr": "PM_INST_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "INST_FIN_PER_CMPL"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTF instruction is completing and the finish was overlooked",
+        "MetricExpr": "PM_EXEC_STALL_UNKNOWN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "EXEC_STALL_UNKOWN_CPI"
+    },
+    {
+        "BriefDescription": "Percentage of finished branches that were taken",
+        "MetricExpr": "PM_BR_TAKEN_CMPL / PM_BR_FIN * 100",
+        "MetricGroup": "General",
+        "MetricName": "TAKEN_BRANCHES",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were a demand load that did not hit in the L1, L2, or the L3",
+        "MetricExpr": "PM_DATA_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "General",
+        "MetricName": "L3_LD_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of finished branches per completed instruction",
+        "MetricExpr": "PM_BR_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "BRANCHES_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of instructions finished in the LSU per completed instruction",
+        "MetricExpr": "PM_LSU_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "LSU_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of instructions finished in the VSU per completed instruction",
+        "MetricExpr": "PM_VSU_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "VSU_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of TLBIE instructions finished in the LSU per completed instruction",
+        "MetricExpr": "PM_TLBIE_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "TLBIE_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of STCX instructions finshed per completed instruction",
+        "MetricExpr": "PM_STCX_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "STXC_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of LARX instructions finshed per completed instruction",
+        "MetricExpr": "PM_LARX_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "LARX_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of PTESYNC instructions finshed per completed instruction",
+        "MetricExpr": "PM_PTESYNC_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "PTESYNC_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of simple fixed-point instructions finshed in the store unit per completed instruction",
+        "MetricExpr": "PM_FX_LSU_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "FX_PER_INST"
+    },
+    {
+        "BriefDescription": "Percentage of demand load misses that reloaded the L1 cache",
+        "MetricExpr": "PM_LD_DEMAND_MISS_L1 / PM_LD_MISS_L1 * 100",
+        "MetricGroup": "General",
+        "MetricName": "DL1_MISS_RELOADS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of demand load misses that reloaded from beyond the local L2",
+        "MetricExpr": "PM_DATA_FROM_L2MISS / PM_LD_DEMAND_MISS_L1 * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L2_MISS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of demand load misses that reloaded from beyond the local L3",
+        "MetricExpr": "PM_DATA_FROM_L3MISS / PM_LD_DEMAND_MISS_L1 * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L3_MISS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of cycles stalled due to the NTC instruction waiting for a load miss to resolve from a source beyond the local L2 and local L3",
+        "MetricExpr": "DMISS_L3MISS_STALL_CPI / RUN_CPI * 100",
+        "MetricGroup": "General",
+        "MetricName": "DCACHE_MISS_CPI",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 2M page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_2M / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_2M_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 16M page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_16M / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_16M_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 4K page size",
+        "MetricExpr": "PM_DERAT_MISS_4K / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_4K_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 2M page size",
+        "MetricExpr": "PM_DERAT_MISS_2M / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_2M_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 16M page size",
+        "MetricExpr": "PM_DERAT_MISS_16M / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_16M_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 64K page size",
+        "MetricExpr": "PM_DERAT_MISS_64K / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_64K_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses that resulted in TLB reloads",
+        "MetricExpr": "PM_DTLB_MISS / PM_DERAT_MISS * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_MISS_RELOAD",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of icache misses that were reloaded from beyond the local L3",
+        "MetricExpr": "PM_INST_FROM_L3MISS / PM_L1_ICACHE_MISS * 100",
+        "MetricGroup": "Instruction_Misses",
+        "MetricName": "INST_FROM_L3_MISS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of icache reloads from the beyond the L3 per completed instruction",
+        "MetricExpr": "PM_INST_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Instruction_Misses",
+        "MetricName": "INST_FROM_L3_MISS_RATE",
+        "ScaleUnit": "1%"
+    }
+]
diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c
index 2ada86a..e91cf2c 100644
--- a/tools/perf/tests/api-io.c
+++ b/tools/perf/tests/api-io.c
@@ -289,8 +289,8 @@
 	return ret;
 }
 
-int test__api_io(struct test *test __maybe_unused,
-		int subtest __maybe_unused)
+static int test__api_io(struct test_suite *test __maybe_unused,
+			int subtest __maybe_unused)
 {
 	int ret = 0;
 
@@ -302,3 +302,5 @@
 		ret = TEST_FAIL;
 	return ret;
 }
+
+DEFINE_SUITE("Test api io", api_io);
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 9b40a25..0f73e30 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -178,7 +178,7 @@
 	return system(cmd) ? TEST_FAIL : TEST_OK;
 }
 
-int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__attr(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct stat st;
 	char path_perf[PATH_MAX];
@@ -207,3 +207,5 @@
 
 	return TEST_SKIP;
 }
+
+DEFINE_SUITE("Setup struct perf_event_attr", attr);
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 7447a44..79a980b 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -82,7 +82,7 @@
 }
 
 
-int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__backward_ring_buffer(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0;
 	char pid[16], sbuf[STRERR_BUFSIZE];
@@ -167,3 +167,5 @@
 	evlist__delete(evlist);
 	return ret;
 }
+
+DEFINE_SUITE("Read backward ring buffer", backward_ring_buffer);
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 12b805e..3848563 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -40,7 +40,7 @@
 	return ret;
 }
 
-int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bitmap_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1"));
 	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5"));
@@ -51,3 +51,5 @@
 	TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40"));
 	return 0;
 }
+
+DEFINE_SUITE("Print bitmap", bitmap_print);
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index 489b5060..d1ebb55 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -19,6 +19,19 @@
 #include "../perf-sys.h"
 #include "cloexec.h"
 
+/*
+ * PowerPC and S390 do not support creation of instruction breakpoints using the
+ * perf_event interface.
+ *
+ * Just disable the test for these architectures until these issues are
+ * resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__)
+#define BP_ACCOUNT_IS_SUPPORTED 0
+#else
+#define BP_ACCOUNT_IS_SUPPORTED 1
+#endif
+
 static volatile long the_var;
 
 static noinline int test_function(void)
@@ -173,13 +186,18 @@
  *     we create another watchpoint to ensure
  *     the slot accounting is correct
  */
-int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bp_accounting(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int has_ioctl = detect_ioctl();
 	int wp_cnt = detect_cnt(false);
 	int bp_cnt = detect_cnt(true);
 	int share  = detect_share(wp_cnt, bp_cnt);
 
+	if (!BP_ACCOUNT_IS_SUPPORTED) {
+		pr_debug("Test not supported on this architecture");
+		return TEST_SKIP;
+	}
+
 	pr_debug("watchpoints count %d, breakpoints count %d, has_ioctl %d, share %d\n",
 		 wp_cnt, bp_cnt, has_ioctl, share);
 
@@ -189,18 +207,4 @@
 	return bp_accounting(wp_cnt, share);
 }
 
-bool test__bp_account_is_supported(void)
-{
-	/*
-	 * PowerPC and S390 do not support creation of instruction
-	 * breakpoints using the perf_event interface.
-	 *
-	 * Just disable the test for these architectures until these
-	 * issues are resolved.
-	 */
-#if defined(__powerpc__) || defined(__s390x__)
-	return false;
-#else
-	return true;
-#endif
-}
+DEFINE_SUITE("Breakpoint accounting", bp_accounting);
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index ef37353..1f2908f0 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -161,11 +161,16 @@
 	return count;
 }
 
-int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bp_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct sigaction sa;
 	long long count1, count2, count3;
 
+	if (!BP_SIGNAL_IS_SUPPORTED) {
+		pr_debug("Test not supported on this architecture");
+		return TEST_SKIP;
+	}
+
 	/* setup SIGIO signal handler */
 	memset(&sa, 0, sizeof(struct sigaction));
 	sa.sa_sigaction = (void *) sig_handler;
@@ -285,29 +290,4 @@
 		TEST_OK : TEST_FAIL;
 }
 
-bool test__bp_signal_is_supported(void)
-{
-	/*
-	 * PowerPC and S390 do not support creation of instruction
-	 * breakpoints using the perf_event interface.
-	 *
-	 * ARM requires explicit rounding down of the instruction
-	 * pointer in Thumb mode, and then requires the single-step
-	 * to be handled explicitly in the overflow handler to avoid
-	 * stepping into the SIGIO handler and getting stuck on the
-	 * breakpointed instruction.
-	 *
-	 * Since arm64 has the same issue with arm for the single-step
-	 * handling, this case also gets stuck on the breakpointed
-	 * instruction.
-	 *
-	 * Just disable the test for these architectures until these
-	 * issues are resolved.
-	 */
-#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || \
-    defined(__aarch64__)
-	return false;
-#else
-	return true;
-#endif
-}
+DEFINE_SUITE("Breakpoint overflow signal handler", bp_signal);
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index eb4dbbd..4e897c2 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -59,13 +59,18 @@
 #define EXECUTIONS 10000
 #define THRESHOLD  100
 
-int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bp_signal_overflow(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_event_attr pe;
 	struct sigaction sa;
 	long long count;
 	int fd, i, fails = 0;
 
+	if (!BP_SIGNAL_IS_SUPPORTED) {
+		pr_debug("Test not supported on this architecture");
+		return TEST_SKIP;
+	}
+
 	/* setup SIGIO signal handler */
 	memset(&sa, 0, sizeof(struct sigaction));
 	sa.sa_sigaction = (void *) sig_handler;
@@ -133,3 +138,5 @@
 
 	return fails ? TEST_FAIL : TEST_OK;
 }
+
+DEFINE_SUITE("Breakpoint overflow sampling", bp_signal_overflow);
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 2bf146e..329f77f 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -62,7 +62,6 @@
 
 static struct {
 	enum test_llvm__testcase prog_id;
-	const char *desc;
 	const char *name;
 	const char *msg_compile_fail;
 	const char *msg_load_fail;
@@ -72,7 +71,6 @@
 } bpf_testcase_table[] = {
 	{
 		.prog_id	  = LLVM_TESTCASE_BASE,
-		.desc		  = "Basic BPF filtering",
 		.name		  = "[basic_bpf_test]",
 		.msg_compile_fail = "fix 'perf test LLVM' first",
 		.msg_load_fail	  = "load bpf object failed",
@@ -81,7 +79,6 @@
 	},
 	{
 		.prog_id	  = LLVM_TESTCASE_BASE,
-		.desc		  = "BPF pinning",
 		.name		  = "[bpf_pinning]",
 		.msg_compile_fail = "fix kbuild first",
 		.msg_load_fail	  = "check your vmlinux setting?",
@@ -92,7 +89,6 @@
 #ifdef HAVE_BPF_PROLOGUE
 	{
 		.prog_id	  = LLVM_TESTCASE_BPF_PROLOGUE,
-		.desc		  = "BPF prologue generation",
 		.name		  = "[bpf_prologue_test]",
 		.msg_compile_fail = "fix kbuild first",
 		.msg_load_fail	  = "check your vmlinux setting?",
@@ -283,18 +279,6 @@
 	return ret;
 }
 
-int test__bpf_subtest_get_nr(void)
-{
-	return (int)ARRAY_SIZE(bpf_testcase_table);
-}
-
-const char *test__bpf_subtest_get_desc(int i)
-{
-	if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
-		return NULL;
-	return bpf_testcase_table[i].desc;
-}
-
 static int check_env(void)
 {
 	int err;
@@ -313,7 +297,7 @@
 	}
 
 	err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
-			       sizeof(insns) / sizeof(insns[0]),
+			       ARRAY_SIZE(insns),
 			       license, kver_int, NULL, 0);
 	if (err < 0) {
 		pr_err("Missing basic BPF support, skip this test: %s\n",
@@ -325,7 +309,7 @@
 	return 0;
 }
 
-int test__bpf(struct test *test __maybe_unused, int i)
+static int test__bpf(int i)
 {
 	int err;
 
@@ -343,21 +327,60 @@
 	err = __test__bpf(i);
 	return err;
 }
+#endif
 
+static int test__basic_bpf_test(struct test_suite *test __maybe_unused,
+				int subtest __maybe_unused)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+	return test__bpf(0);
 #else
-int test__bpf_subtest_get_nr(void)
-{
-	return 0;
-}
-
-const char *test__bpf_subtest_get_desc(int i __maybe_unused)
-{
-	return NULL;
-}
-
-int test__bpf(struct test *test __maybe_unused, int i __maybe_unused)
-{
 	pr_debug("Skip BPF test because BPF support is not compiled\n");
 	return TEST_SKIP;
-}
 #endif
+}
+
+static int test__bpf_pinning(struct test_suite *test __maybe_unused,
+			     int subtest __maybe_unused)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+	return test__bpf(1);
+#else
+	pr_debug("Skip BPF test because BPF support is not compiled\n");
+	return TEST_SKIP;
+#endif
+}
+
+static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
+				   int subtest __maybe_unused)
+{
+#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE)
+	return test__bpf(2);
+#else
+	pr_debug("Skip BPF test because BPF support is not compiled\n");
+	return TEST_SKIP;
+#endif
+}
+
+
+static struct test_case bpf_tests[] = {
+#ifdef HAVE_LIBBPF_SUPPORT
+	TEST_CASE("Basic BPF filtering", basic_bpf_test),
+	TEST_CASE("BPF pinning", bpf_pinning),
+#ifdef HAVE_BPF_PROLOGUE
+	TEST_CASE("BPF prologue generation", bpf_prologue_test),
+#else
+	TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
+#endif
+#else
+	TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in"),
+	TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in"),
+	TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
+#endif
+	{ .name = NULL, }
+};
+
+struct test_suite suite__bpf = {
+	.desc = "BPF filter",
+	.test_cases = bpf_tests,
+};
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index da7dc5e..8cb5a1c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -30,350 +30,134 @@
 
 static bool dont_fork;
 
-struct test __weak arch_tests[] = {
-	{
-		.func = NULL,
-	},
+struct test_suite *__weak arch_tests[] = {
+	NULL,
 };
 
-static struct test generic_tests[] = {
-	{
-		.desc = "vmlinux symtab matches kallsyms",
-		.func = test__vmlinux_matches_kallsyms,
-	},
-	{
-		.desc = "Detect openat syscall event",
-		.func = test__openat_syscall_event,
-	},
-	{
-		.desc = "Detect openat syscall event on all cpus",
-		.func = test__openat_syscall_event_on_all_cpus,
-	},
-	{
-		.desc = "Read samples using the mmap interface",
-		.func = test__basic_mmap,
-	},
-	{
-		.desc = "Test data source output",
-		.func = test__mem,
-	},
-	{
-		.desc = "Parse event definition strings",
-		.func = test__parse_events,
-	},
-	{
-		.desc = "Simple expression parser",
-		.func = test__expr,
-	},
-	{
-		.desc = "PERF_RECORD_* events & perf_sample fields",
-		.func = test__PERF_RECORD,
-	},
-	{
-		.desc = "Parse perf pmu format",
-		.func = test__pmu,
-	},
-	{
-		.desc = "PMU events",
-		.func = test__pmu_events,
-		.subtest = {
-			.skip_if_fail	= false,
-			.get_nr		= test__pmu_events_subtest_get_nr,
-			.get_desc	= test__pmu_events_subtest_get_desc,
-			.skip_reason	= test__pmu_events_subtest_skip_reason,
-		},
-
-	},
-	{
-		.desc = "DSO data read",
-		.func = test__dso_data,
-	},
-	{
-		.desc = "DSO data cache",
-		.func = test__dso_data_cache,
-	},
-	{
-		.desc = "DSO data reopen",
-		.func = test__dso_data_reopen,
-	},
-	{
-		.desc = "Roundtrip evsel->name",
-		.func = test__perf_evsel__roundtrip_name_test,
-	},
-	{
-		.desc = "Parse sched tracepoints fields",
-		.func = test__perf_evsel__tp_sched_test,
-	},
-	{
-		.desc = "syscalls:sys_enter_openat event fields",
-		.func = test__syscall_openat_tp_fields,
-	},
-	{
-		.desc = "Setup struct perf_event_attr",
-		.func = test__attr,
-	},
-	{
-		.desc = "Match and link multiple hists",
-		.func = test__hists_link,
-	},
-	{
-		.desc = "'import perf' in python",
-		.func = test__python_use,
-	},
-	{
-		.desc = "Breakpoint overflow signal handler",
-		.func = test__bp_signal,
-		.is_supported = test__bp_signal_is_supported,
-	},
-	{
-		.desc = "Breakpoint overflow sampling",
-		.func = test__bp_signal_overflow,
-		.is_supported = test__bp_signal_is_supported,
-	},
-	{
-		.desc = "Breakpoint accounting",
-		.func = test__bp_accounting,
-		.is_supported = test__bp_account_is_supported,
-	},
-	{
-		.desc = "Watchpoint",
-		.func = test__wp,
-		.is_supported = test__wp_is_supported,
-		.subtest = {
-			.skip_if_fail	= false,
-			.get_nr		= test__wp_subtest_get_nr,
-			.get_desc	= test__wp_subtest_get_desc,
-			.skip_reason    = test__wp_subtest_skip_reason,
-		},
-	},
-	{
-		.desc = "Number of exit events of a simple workload",
-		.func = test__task_exit,
-	},
-	{
-		.desc = "Software clock events period values",
-		.func = test__sw_clock_freq,
-	},
-	{
-		.desc = "Object code reading",
-		.func = test__code_reading,
-	},
-	{
-		.desc = "Sample parsing",
-		.func = test__sample_parsing,
-	},
-	{
-		.desc = "Use a dummy software event to keep tracking",
-		.func = test__keep_tracking,
-	},
-	{
-		.desc = "Parse with no sample_id_all bit set",
-		.func = test__parse_no_sample_id_all,
-	},
-	{
-		.desc = "Filter hist entries",
-		.func = test__hists_filter,
-	},
-	{
-		.desc = "Lookup mmap thread",
-		.func = test__mmap_thread_lookup,
-	},
-	{
-		.desc = "Share thread maps",
-		.func = test__thread_maps_share,
-	},
-	{
-		.desc = "Sort output of hist entries",
-		.func = test__hists_output,
-	},
-	{
-		.desc = "Cumulate child hist entries",
-		.func = test__hists_cumulate,
-	},
-	{
-		.desc = "Track with sched_switch",
-		.func = test__switch_tracking,
-	},
-	{
-		.desc = "Filter fds with revents mask in a fdarray",
-		.func = test__fdarray__filter,
-	},
-	{
-		.desc = "Add fd to a fdarray, making it autogrow",
-		.func = test__fdarray__add,
-	},
-	{
-		.desc = "kmod_path__parse",
-		.func = test__kmod_path__parse,
-	},
-	{
-		.desc = "Thread map",
-		.func = test__thread_map,
-	},
-	{
-		.desc = "LLVM search and compile",
-		.func = test__llvm,
-		.subtest = {
-			.skip_if_fail	= true,
-			.get_nr		= test__llvm_subtest_get_nr,
-			.get_desc	= test__llvm_subtest_get_desc,
-		},
-	},
-	{
-		.desc = "Session topology",
-		.func = test__session_topology,
-	},
-	{
-		.desc = "BPF filter",
-		.func = test__bpf,
-		.subtest = {
-			.skip_if_fail	= true,
-			.get_nr		= test__bpf_subtest_get_nr,
-			.get_desc	= test__bpf_subtest_get_desc,
-		},
-	},
-	{
-		.desc = "Synthesize thread map",
-		.func = test__thread_map_synthesize,
-	},
-	{
-		.desc = "Remove thread map",
-		.func = test__thread_map_remove,
-	},
-	{
-		.desc = "Synthesize cpu map",
-		.func = test__cpu_map_synthesize,
-	},
-	{
-		.desc = "Synthesize stat config",
-		.func = test__synthesize_stat_config,
-	},
-	{
-		.desc = "Synthesize stat",
-		.func = test__synthesize_stat,
-	},
-	{
-		.desc = "Synthesize stat round",
-		.func = test__synthesize_stat_round,
-	},
-	{
-		.desc = "Synthesize attr update",
-		.func = test__event_update,
-	},
-	{
-		.desc = "Event times",
-		.func = test__event_times,
-	},
-	{
-		.desc = "Read backward ring buffer",
-		.func = test__backward_ring_buffer,
-	},
-	{
-		.desc = "Print cpu map",
-		.func = test__cpu_map_print,
-	},
-	{
-		.desc = "Merge cpu map",
-		.func = test__cpu_map_merge,
-	},
-
-	{
-		.desc = "Probe SDT events",
-		.func = test__sdt_event,
-	},
-	{
-		.desc = "is_printable_array",
-		.func = test__is_printable_array,
-	},
-	{
-		.desc = "Print bitmap",
-		.func = test__bitmap_print,
-	},
-	{
-		.desc = "perf hooks",
-		.func = test__perf_hooks,
-	},
-	{
-		.desc = "builtin clang support",
-		.func = test__clang,
-		.subtest = {
-			.skip_if_fail	= true,
-			.get_nr		= test__clang_subtest_get_nr,
-			.get_desc	= test__clang_subtest_get_desc,
-		}
-	},
-	{
-		.desc = "unit_number__scnprintf",
-		.func = test__unit_number__scnprint,
-	},
-	{
-		.desc = "mem2node",
-		.func = test__mem2node,
-	},
-	{
-		.desc = "time utils",
-		.func = test__time_utils,
-	},
-	{
-		.desc = "Test jit_write_elf",
-		.func = test__jit_write_elf,
-	},
-	{
-		.desc = "Test libpfm4 support",
-		.func = test__pfm,
-		.subtest = {
-			.skip_if_fail	= true,
-			.get_nr		= test__pfm_subtest_get_nr,
-			.get_desc	= test__pfm_subtest_get_desc,
-		}
-	},
-	{
-		.desc = "Test api io",
-		.func = test__api_io,
-	},
-	{
-		.desc = "maps__merge_in",
-		.func = test__maps__merge_in,
-	},
-	{
-		.desc = "Demangle Java",
-		.func = test__demangle_java,
-	},
-	{
-		.desc = "Demangle OCaml",
-		.func = test__demangle_ocaml,
-	},
-	{
-		.desc = "Parse and process metrics",
-		.func = test__parse_metric,
-	},
-	{
-		.desc = "PE file support",
-		.func = test__pe_file_parsing,
-	},
-	{
-		.desc = "Event expansion for cgroups",
-		.func = test__expand_cgroup_events,
-	},
-	{
-		.desc = "Convert perf time to TSC",
-		.func = test__perf_time_to_tsc,
-		.is_supported = test__tsc_is_supported,
-	},
-	{
-		.desc = "dlfilter C API",
-		.func = test__dlfilter,
-	},
-	{
-		.func = NULL,
-	},
+static struct test_suite *generic_tests[] = {
+	&suite__vmlinux_matches_kallsyms,
+	&suite__openat_syscall_event,
+	&suite__openat_syscall_event_on_all_cpus,
+	&suite__basic_mmap,
+	&suite__mem,
+	&suite__parse_events,
+	&suite__expr,
+	&suite__PERF_RECORD,
+	&suite__pmu,
+	&suite__pmu_events,
+	&suite__dso_data,
+	&suite__dso_data_cache,
+	&suite__dso_data_reopen,
+	&suite__perf_evsel__roundtrip_name_test,
+	&suite__perf_evsel__tp_sched_test,
+	&suite__syscall_openat_tp_fields,
+	&suite__attr,
+	&suite__hists_link,
+	&suite__python_use,
+	&suite__bp_signal,
+	&suite__bp_signal_overflow,
+	&suite__bp_accounting,
+	&suite__wp,
+	&suite__task_exit,
+	&suite__sw_clock_freq,
+	&suite__code_reading,
+	&suite__sample_parsing,
+	&suite__keep_tracking,
+	&suite__parse_no_sample_id_all,
+	&suite__hists_filter,
+	&suite__mmap_thread_lookup,
+	&suite__thread_maps_share,
+	&suite__hists_output,
+	&suite__hists_cumulate,
+	&suite__switch_tracking,
+	&suite__fdarray__filter,
+	&suite__fdarray__add,
+	&suite__kmod_path__parse,
+	&suite__thread_map,
+	&suite__llvm,
+	&suite__session_topology,
+	&suite__bpf,
+	&suite__thread_map_synthesize,
+	&suite__thread_map_remove,
+	&suite__cpu_map_synthesize,
+	&suite__synthesize_stat_config,
+	&suite__synthesize_stat,
+	&suite__synthesize_stat_round,
+	&suite__event_update,
+	&suite__event_times,
+	&suite__backward_ring_buffer,
+	&suite__cpu_map_print,
+	&suite__cpu_map_merge,
+	&suite__sdt_event,
+	&suite__is_printable_array,
+	&suite__bitmap_print,
+	&suite__perf_hooks,
+	&suite__clang,
+	&suite__unit_number__scnprint,
+	&suite__mem2node,
+	&suite__time_utils,
+	&suite__jit_write_elf,
+	&suite__pfm,
+	&suite__api_io,
+	&suite__maps__merge_in,
+	&suite__demangle_java,
+	&suite__demangle_ocaml,
+	&suite__parse_metric,
+	&suite__pe_file_parsing,
+	&suite__expand_cgroup_events,
+	&suite__perf_time_to_tsc,
+	&suite__dlfilter,
+	NULL,
 };
 
-static struct test *tests[] = {
+static struct test_suite **tests[] = {
 	generic_tests,
 	arch_tests,
 };
 
+static int num_subtests(const struct test_suite *t)
+{
+	int num;
+
+	if (!t->test_cases)
+		return 0;
+
+	num = 0;
+	while (t->test_cases[num].name)
+		num++;
+
+	return num;
+}
+
+static bool has_subtests(const struct test_suite *t)
+{
+	return num_subtests(t) > 1;
+}
+
+static const char *skip_reason(const struct test_suite *t, int subtest)
+{
+	if (t->test_cases && subtest >= 0)
+		return t->test_cases[subtest].skip_reason;
+
+	return NULL;
+}
+
+static const char *test_description(const struct test_suite *t, int subtest)
+{
+	if (t->test_cases && subtest >= 0)
+		return t->test_cases[subtest].desc;
+
+	return t->desc;
+}
+
+static test_fnptr test_function(const struct test_suite *t, int subtest)
+{
+	if (subtest <= 0)
+		return t->test_cases[0].run_case;
+
+	return t->test_cases[subtest].run_case;
+}
+
 static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[])
 {
 	int i;
@@ -398,7 +182,7 @@
 	return false;
 }
 
-static int run_test(struct test *test, int subtest)
+static int run_test(struct test_suite *test, int subtest)
 {
 	int status, err = -1, child = dont_fork ? 0 : fork();
 	char sbuf[STRERR_BUFSIZE];
@@ -430,7 +214,7 @@
 			}
 		}
 
-		err = test->func(test, subtest);
+		err = test_function(test, subtest)(test, subtest);
 		if (!dont_fork)
 			exit(err);
 	}
@@ -450,24 +234,19 @@
 	return err;
 }
 
-#define for_each_test(j, t)	 				\
+#define for_each_test(j, k, t)			\
 	for (j = 0; j < ARRAY_SIZE(tests); j++)	\
-		for (t = &tests[j][0]; t->func; t++)
+		for (k = 0, t = tests[j][k]; tests[j][k]; k++, t = tests[j][k])
 
-static int test_and_print(struct test *t, bool force_skip, int subtest)
+static int test_and_print(struct test_suite *t, int subtest)
 {
 	int err;
 
-	if (!force_skip) {
-		pr_debug("\n--- start ---\n");
-		err = run_test(t, subtest);
-		pr_debug("---- end ----\n");
-	} else {
-		pr_debug("\n--- force skipped ---\n");
-		err = TEST_SKIP;
-	}
+	pr_debug("\n--- start ---\n");
+	err = run_test(t, subtest);
+	pr_debug("---- end ----\n");
 
-	if (!t->subtest.get_nr)
+	if (!has_subtests(t))
 		pr_debug("%s:", t->desc);
 	else
 		pr_debug("%s subtest %d:", t->desc, subtest + 1);
@@ -477,11 +256,10 @@
 		pr_info(" Ok\n");
 		break;
 	case TEST_SKIP: {
-		const char *skip_reason = NULL;
-		if (t->subtest.skip_reason)
-			skip_reason = t->subtest.skip_reason(subtest);
-		if (skip_reason)
-			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason);
+		const char *reason = skip_reason(t, subtest);
+
+		if (reason)
+			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", reason);
 		else
 			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
 	}
@@ -580,7 +358,7 @@
 	const char *file;
 };
 
-static int shell_test__run(struct test *test, int subdir __maybe_unused)
+static int shell_test__run(struct test_suite *test, int subdir __maybe_unused)
 {
 	int err;
 	char script[PATH_MAX];
@@ -622,24 +400,34 @@
 	for_each_shell_test(entlist, n_dirs, st.dir, ent) {
 		int curr = i++;
 		char desc[256];
-		struct test test = {
-			.desc = shell_test__description(desc, sizeof(desc), st.dir, ent->d_name),
-			.func = shell_test__run,
+		struct test_case test_cases[] = {
+			{
+				.desc = shell_test__description(desc,
+								sizeof(desc),
+								st.dir,
+								ent->d_name),
+				.run_case = shell_test__run,
+			},
+			{ .name = NULL, }
+		};
+		struct test_suite test_suite = {
+			.desc = test_cases[0].desc,
+			.test_cases = test_cases,
 			.priv = &st,
 		};
 
-		if (!perf_test__matches(test.desc, curr, argc, argv))
+		if (!perf_test__matches(test_suite.desc, curr, argc, argv))
 			continue;
 
 		st.file = ent->d_name;
-		pr_info("%2d: %-*s:", i, width, test.desc);
+		pr_info("%2d: %-*s:", i, width, test_suite.desc);
 
 		if (intlist__find(skiplist, i)) {
 			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
 			continue;
 		}
 
-		test_and_print(&test, false, -1);
+		test_and_print(&test_suite, 0);
 	}
 
 	for (e = 0; e < n_dirs; e++)
@@ -650,33 +438,31 @@
 
 static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 {
-	struct test *t;
-	unsigned int j;
+	struct test_suite *t;
+	unsigned int j, k;
 	int i = 0;
 	int width = shell_tests__max_desc_width();
 
-	for_each_test(j, t) {
-		int len = strlen(t->desc);
+	for_each_test(j, k, t) {
+		int len = strlen(test_description(t, -1));
 
 		if (width < len)
 			width = len;
 	}
 
-	for_each_test(j, t) {
-		int curr = i++, err;
+	for_each_test(j, k, t) {
+		int curr = i++;
 		int subi;
 
-		if (!perf_test__matches(t->desc, curr, argc, argv)) {
+		if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) {
 			bool skip = true;
 			int subn;
 
-			if (!t->subtest.get_nr)
-				continue;
-
-			subn = t->subtest.get_nr();
+			subn = num_subtests(t);
 
 			for (subi = 0; subi < subn; subi++) {
-				if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+				if (perf_test__matches(test_description(t, subi),
+							curr, argc, argv))
 					skip = false;
 			}
 
@@ -684,22 +470,17 @@
 				continue;
 		}
 
-		if (t->is_supported && !t->is_supported()) {
-			pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
-			continue;
-		}
-
-		pr_info("%2d: %-*s:", i, width, t->desc);
+		pr_info("%2d: %-*s:", i, width, test_description(t, -1));
 
 		if (intlist__find(skiplist, i)) {
 			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
 			continue;
 		}
 
-		if (!t->subtest.get_nr) {
-			test_and_print(t, false, -1);
+		if (!has_subtests(t)) {
+			test_and_print(t, -1);
 		} else {
-			int subn = t->subtest.get_nr();
+			int subn = num_subtests(t);
 			/*
 			 * minus 2 to align with normal testcases.
 			 * For subtest we print additional '.x' in number.
@@ -709,7 +490,6 @@
 			 * 35.1: Basic BPF llvm compiling test                          : Ok
 			 */
 			int subw = width > 2 ? width - 2 : width;
-			bool skip = false;
 
 			if (subn <= 0) {
 				color_fprintf(stderr, PERF_COLOR_YELLOW,
@@ -719,21 +499,20 @@
 			pr_info("\n");
 
 			for (subi = 0; subi < subn; subi++) {
-				int len = strlen(t->subtest.get_desc(subi));
+				int len = strlen(test_description(t, subi));
 
 				if (subw < len)
 					subw = len;
 			}
 
 			for (subi = 0; subi < subn; subi++) {
-				if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+				if (!perf_test__matches(test_description(t, subi),
+							curr, argc, argv))
 					continue;
 
 				pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
-					t->subtest.get_desc(subi));
-				err = test_and_print(t, skip, subi);
-				if (err != TEST_OK && t->subtest.skip_if_fail)
-					skip = true;
+					test_description(t, subi));
+				test_and_print(t, subi);
 			}
 		}
 	}
@@ -759,7 +538,7 @@
 	for_each_shell_test(entlist, n_dirs, path, ent) {
 		int curr = i++;
 		char bf[256];
-		struct test t = {
+		struct test_suite t = {
 			.desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
 		};
 
@@ -778,26 +557,25 @@
 
 static int perf_test__list(int argc, const char **argv)
 {
-	unsigned int j;
-	struct test *t;
+	unsigned int j, k;
+	struct test_suite *t;
 	int i = 0;
 
-	for_each_test(j, t) {
+	for_each_test(j, k, t) {
 		int curr = i++;
 
-		if (!perf_test__matches(t->desc, curr, argc, argv) ||
-		    (t->is_supported && !t->is_supported()))
+		if (!perf_test__matches(test_description(t, -1), curr, argc, argv))
 			continue;
 
-		pr_info("%2d: %s\n", i, t->desc);
+		pr_info("%2d: %s\n", i, test_description(t, -1));
 
-		if (t->subtest.get_nr) {
-			int subn = t->subtest.get_nr();
+		if (has_subtests(t)) {
+			int subn = num_subtests(t);
 			int subi;
 
 			for (subi = 0; subi < subn; subi++)
 				pr_info("%2d:%1d: %s\n", i, subi + 1,
-					t->subtest.get_desc(subi));
+					test_description(t, subi));
 		}
 	}
 
diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c
index 2577d3e..a711100 100644
--- a/tools/perf/tests/clang.c
+++ b/tools/perf/tests/clang.c
@@ -3,44 +3,30 @@
 #include "c++/clang-c.h"
 #include <linux/kernel.h>
 
-static struct {
-	int (*func)(void);
-	const char *desc;
-} clang_testcase_table[] = {
-#ifdef HAVE_LIBCLANGLLVM_SUPPORT
-	{
-		.func = test__clang_to_IR,
-		.desc = "builtin clang compile C source to IR",
-	},
-	{
-		.func = test__clang_to_obj,
-		.desc = "builtin clang compile C source to ELF object",
-	},
-#endif
-};
-
-int test__clang_subtest_get_nr(void)
-{
-	return (int)ARRAY_SIZE(clang_testcase_table);
-}
-
-const char *test__clang_subtest_get_desc(int i)
-{
-	if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
-		return NULL;
-	return clang_testcase_table[i].desc;
-}
-
 #ifndef HAVE_LIBCLANGLLVM_SUPPORT
-int test__clang(struct test *test __maybe_unused, int i __maybe_unused)
+static int test__clang_to_IR(struct test_suite *test __maybe_unused,
+			     int subtest __maybe_unused)
 {
 	return TEST_SKIP;
 }
-#else
-int test__clang(struct test *test __maybe_unused, int i)
+
+static int test__clang_to_obj(struct test_suite *test __maybe_unused,
+			      int subtest __maybe_unused)
 {
-	if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
-		return TEST_FAIL;
-	return clang_testcase_table[i].func();
+	return TEST_SKIP;
 }
 #endif
+
+static struct test_case clang_tests[] = {
+	TEST_CASE_REASON("builtin clang compile C source to IR", clang_to_IR,
+			 "not compiled in"),
+	TEST_CASE_REASON("builtin clang compile C source to ELF object",
+			 clang_to_obj,
+			 "not compiled in"),
+	{ .name = NULL, }
+};
+
+struct test_suite suite__clang = {
+	.desc = "builtin clang support",
+	.test_cases = clang_tests,
+};
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index f439bd4..5610767 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -716,7 +716,7 @@
 	return err;
 }
 
-int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__code_reading(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret;
 
@@ -743,3 +743,5 @@
 		return -1;
 	};
 }
+
+DEFINE_SUITE("Object code reading", code_reading);
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 0472b11..89a1550 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -75,7 +75,7 @@
 }
 
 
-int test__cpu_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__cpu_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_cpu_map *cpus;
 
@@ -111,7 +111,7 @@
 	return !strcmp(buf, str);
 }
 
-int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1"));
 	TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5"));
@@ -123,7 +123,7 @@
 	return 0;
 }
 
-int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_cpu_map *a = perf_cpu_map__new("4,2,1");
 	struct perf_cpu_map *b = perf_cpu_map__new("4,5,7");
@@ -137,3 +137,7 @@
 	perf_cpu_map__put(c);
 	return 0;
 }
+
+DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize);
+DEFINE_SUITE("Print cpu map", cpu_map_print);
+DEFINE_SUITE("Merge cpu map", cpu_map_merge);
diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c
index 8f3b908..44d1be3 100644
--- a/tools/perf/tests/demangle-java-test.c
+++ b/tools/perf/tests/demangle-java-test.c
@@ -7,7 +7,7 @@
 #include "debug.h"
 #include "demangle-java.h"
 
-int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__demangle_java(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret = TEST_OK;
 	char *buf = NULL;
@@ -40,3 +40,5 @@
 
 	return ret;
 }
+
+DEFINE_SUITE("Demangle Java", demangle_java);
diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c
index 0043be8..90a4285 100644
--- a/tools/perf/tests/demangle-ocaml-test.c
+++ b/tools/perf/tests/demangle-ocaml-test.c
@@ -7,7 +7,7 @@
 #include "debug.h"
 #include "demangle-ocaml.h"
 
-int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__demangle_ocaml(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret = TEST_OK;
 	char *buf = NULL;
@@ -41,3 +41,5 @@
 
 	return ret;
 }
+
+DEFINE_SUITE("Demangle OCaml", demangle_ocaml);
diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
index bc03b5d..84352d5 100644
--- a/tools/perf/tests/dlfilter-test.c
+++ b/tools/perf/tests/dlfilter-test.c
@@ -398,7 +398,7 @@
 	}
 }
 
-int test__dlfilter(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct test_data td = {.fd = -1};
 	int pid = getpid();
@@ -414,3 +414,5 @@
 	test_data__free(&td);
 	return err;
 }
+
+DEFINE_SUITE("dlfilter C API", dlfilter);
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 43e1b01..3419a4a 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -113,7 +113,7 @@
 	return fd;
 }
 
-int test__dso_data(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dso_data(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct machine machine;
 	struct dso *dso;
@@ -248,7 +248,7 @@
 	return setrlimit(RLIMIT_NOFILE, &rlim);
 }
 
-int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dso_data_cache(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct machine machine;
 	long nr_end, nr = open_files_cnt();
@@ -318,7 +318,7 @@
 	return ret;
 }
 
-int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct machine machine;
 	long nr_end, nr = open_files_cnt(), lim = new_limit(3);
@@ -393,3 +393,7 @@
 	TEST_ASSERT_VAL("failed leaking files", nr == nr_end);
 	return 0;
 }
+
+DEFINE_SUITE("DSO data read", dso_data);
+DEFINE_SUITE("DSO data cache", dso_data_cache);
+DEFINE_SUITE("DSO data reopen", dso_data_reopen);
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index c756284..2dab2d2 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -195,7 +195,8 @@
 	return ret;
 }
 
-int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dwarf_unwind(struct test_suite *test __maybe_unused,
+			      int subtest __maybe_unused)
 {
 	struct machine *machine;
 	struct thread *thread;
@@ -237,3 +238,5 @@
 	machine__delete(machine);
 	return err;
 }
+
+DEFINE_SUITE("Test dwarf unwind", dwarf_unwind);
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 04ce440..7606eb3 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -216,7 +216,7 @@
  * and checks that enabled and running times
  * match.
  */
-int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__event_times(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err, ret = 0;
 
@@ -239,3 +239,5 @@
 #undef _T
 	return ret;
 }
+
+DEFINE_SUITE("Event times", event_times);
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index 44a5052..d01532d 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -83,12 +83,11 @@
 	return 0;
 }
 
-int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__event_update(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct evsel *evsel;
 	struct event_name tmp;
 	struct evlist *evlist = evlist__new_default();
-	char *unit = strdup("KRAVA");
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
@@ -99,7 +98,8 @@
 
 	perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123);
 
-	evsel->unit = unit;
+	free((char *)evsel->unit);
+	evsel->unit = strdup("KRAVA");
 
 	TEST_ASSERT_VAL("failed to synthesize attr update unit",
 			!perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit));
@@ -119,7 +119,8 @@
 	TEST_ASSERT_VAL("failed to synthesize attr update cpus",
 			!perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus));
 
-	free(unit);
 	evlist__delete(evlist);
 	return 0;
 }
+
+DEFINE_SUITE("Synthesize attr update", event_update);
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 4e09f0a..fdbf176 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -99,7 +99,8 @@
 #define perf_evsel__name_array_test(names, distance) \
 	__perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance)
 
-int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_evsel__roundtrip_name_test(struct test_suite *test __maybe_unused,
+						 int subtest __maybe_unused)
 {
 	int err = 0, ret = 0;
 
@@ -120,3 +121,5 @@
 
 	return ret;
 }
+
+DEFINE_SUITE("Roundtrip evsel->name", perf_evsel__roundtrip_name_test);
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index f9e34bd..cf4da3d 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -32,7 +32,8 @@
 	return ret;
 }
 
-int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unused,
+					   int subtest __maybe_unused)
 {
 	struct evsel *evsel = evsel__newtp("sched", "sched_switch");
 	int ret = 0;
@@ -87,3 +88,5 @@
 	evsel__delete(evsel);
 	return ret;
 }
+
+DEFINE_SUITE("Parse sched tracepoints fields", perf_evsel__tp_sched_test);
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index 80cff8a..dfefe5b 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -221,8 +221,8 @@
 	return ret;
 }
 
-int test__expand_cgroup_events(struct test *test __maybe_unused,
-			       int subtest __maybe_unused)
+static int test__expand_cgroup_events(struct test_suite *test __maybe_unused,
+				      int subtest __maybe_unused)
 {
 	int ret;
 
@@ -240,3 +240,5 @@
 
 	return ret;
 }
+
+DEFINE_SUITE("Event expansion for cgroups", expand_cgroup_events);
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 0777832..c895de4 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -62,11 +62,11 @@
 	return 0;
 }
 
-int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
 	struct expr_id_data *val_ptr;
 	const char *p;
-	double val;
+	double val, num_cpus, num_cores, num_dies, num_packages;
 	int ret;
 	struct expr_parse_ctx *ctx;
 
@@ -134,6 +134,16 @@
 	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@",
 						    (void **)&val_ptr));
 
+	expr__ctx_clear(ctx);
+	TEST_ASSERT_VAL("find ids",
+			expr__find_ids("dash\\-event1 - dash\\-event2",
+				       NULL, ctx) == 0);
+	TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2);
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1",
+						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2",
+						    (void **)&val_ptr));
+
 	/* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */
 	expr__ctx_clear(ctx);
 	TEST_ASSERT_VAL("find ids",
@@ -151,7 +161,31 @@
 			NULL, ctx) == 0);
 	TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0);
 
+	/* Test toplogy constants appear well ordered. */
+	expr__ctx_clear(ctx);
+	TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0);
+	TEST_ASSERT_VAL("#num_cores", expr__parse(&num_cores, ctx, "#num_cores") == 0);
+	TEST_ASSERT_VAL("#num_cpus >= #num_cores", num_cpus >= num_cores);
+	TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0);
+	TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies);
+	TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0);
+	TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages);
+
+	/*
+	 * Source count returns the number of events aggregating in a leader
+	 * event including the leader. Check parsing yields an id.
+	 */
+	expr__ctx_clear(ctx);
+	TEST_ASSERT_VAL("source count",
+			expr__find_ids("source_count(EVENT1)",
+			NULL, ctx) == 0);
+	TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1);
+	TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1",
+							(void **)&val_ptr));
+
 	expr__ctx_free(ctx);
 
 	return 0;
 }
+
+DEFINE_SUITE("Simple expression parser", expr);
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index d9eca8e..40983c3 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -28,7 +28,7 @@
 	return printed + fdarray__fprintf(fda, fp);
 }
 
-int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__fdarray__filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int nr_fds, err = TEST_FAIL;
 	struct fdarray *fda = fdarray__new(5, 5);
@@ -89,7 +89,7 @@
 	return err;
 }
 
-int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__fdarray__add(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err = TEST_FAIL;
 	struct fdarray *fda = fdarray__new(2, 2);
@@ -158,3 +158,6 @@
 out:
 	return err;
 }
+
+DEFINE_SUITE("Filter fds with revents mask in a fdarray", fdarray__filter);
+DEFINE_SUITE("Add fd to a fdarray, making it autogrow", fdarray__add);
diff --git a/tools/perf/tests/genelf.c b/tools/perf/tests/genelf.c
index f797f98..95f3be1 100644
--- a/tools/perf/tests/genelf.c
+++ b/tools/perf/tests/genelf.c
@@ -16,8 +16,8 @@
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
 
-int test__jit_write_elf(struct test *test __maybe_unused,
-			int subtest __maybe_unused)
+static int test__jit_write_elf(struct test_suite *test __maybe_unused,
+			       int subtest __maybe_unused)
 {
 #ifdef HAVE_JITDUMP
 	static unsigned char x86_code[] = {
@@ -49,3 +49,5 @@
 	return TEST_SKIP;
 #endif
 }
+
+DEFINE_SUITE("Test jit_write_elf", jit_write_elf);
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 890cb1f..17f4fcd 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -689,7 +689,7 @@
 	return err;
 }
 
-int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_cumulate(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err = TEST_FAIL;
 	struct machines machines;
@@ -736,3 +736,5 @@
 
 	return err;
 }
+
+DEFINE_SUITE("Cumulate child hist entries", hists_cumulate);
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index ca6120c..08cbeb9 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -101,7 +101,7 @@
 	return TEST_FAIL;
 }
 
-int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err = TEST_FAIL;
 	struct machines machines;
@@ -325,3 +325,5 @@
 
 	return err;
 }
+
+DEFINE_SUITE("Filter hist entries", hists_filter);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index a024d3f..c575e13 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -264,7 +264,7 @@
 	return __validate_link(leader, 0) || __validate_link(other, 1);
 }
 
-int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_link(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err = -1;
 	struct hists *hists, *first_hists;
@@ -339,3 +339,5 @@
 
 	return err;
 }
+
+DEFINE_SUITE("Match and link multiple hists", hists_link);
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 8973f35..0bde4a7 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -575,7 +575,7 @@
 	return err;
 }
 
-int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_output(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err = TEST_FAIL;
 	struct machines machines;
@@ -623,3 +623,5 @@
 
 	return err;
 }
+
+DEFINE_SUITE("Sort output of hist entries", hists_output);
diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c
index 9c7b3ba..f72de24 100644
--- a/tools/perf/tests/is_printable_array.c
+++ b/tools/perf/tests/is_printable_array.c
@@ -5,7 +5,7 @@
 #include "debug.h"
 #include "print_binary.h"
 
-int test__is_printable_array(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__is_printable_array(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 };
 	char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 };
@@ -36,3 +36,5 @@
 
 	return TEST_OK;
 }
+
+DEFINE_SUITE("is_printable_array", is_printable_array);
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index a0438b0..dd20673 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -61,7 +61,7 @@
  * when an event is disabled but a dummy software event is not disabled.  If the
  * test passes %0 is returned, otherwise %-1 is returned.
  */
-int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__keep_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct record_opts opts = {
 		.mmap_pages	     = UINT_MAX,
@@ -160,3 +160,5 @@
 
 	return err;
 }
+
+DEFINE_SUITE("Use a dummy software event to keep tracking", keep_tracking);
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index e483210..dfe1bd5 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -47,7 +47,7 @@
 #define M(path, c, e) \
 	TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
 
-int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__kmod_path__parse(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
 	/* path                alloc_name  kmod  comp   name   */
 	T("/xxxx/xxxx/x-x.ko", true      , true, 0    , "[x_x]");
@@ -159,3 +159,5 @@
 
 	return 0;
 }
+
+DEFINE_SUITE("kmod_path__parse", kmod_path__parse);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index 33e43cc..8ac0a3a 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -124,7 +124,7 @@
 	return ret;
 }
 
-int test__llvm(struct test *test __maybe_unused, int subtest)
+static int test__llvm(int subtest)
 {
 	int ret;
 	void *obj_buf = NULL;
@@ -148,32 +148,72 @@
 
 	return ret;
 }
+#endif //HAVE_LIBBPF_SUPPORT
 
-int test__llvm_subtest_get_nr(void)
+static int test__llvm__bpf_base_prog(struct test_suite *test __maybe_unused,
+				     int subtest __maybe_unused)
 {
-	return __LLVM_TESTCASE_MAX;
-}
-
-const char *test__llvm_subtest_get_desc(int subtest)
-{
-	if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
-		return NULL;
-
-	return bpf_source_table[subtest].desc;
-}
-#else //HAVE_LIBBPF_SUPPORT
-int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused)
-{
+#ifdef HAVE_LIBBPF_SUPPORT
+	return test__llvm(LLVM_TESTCASE_BASE);
+#else
+	pr_debug("Skip LLVM test because BPF support is not compiled\n");
 	return TEST_SKIP;
+#endif
 }
 
-int test__llvm_subtest_get_nr(void)
+static int test__llvm__bpf_test_kbuild_prog(struct test_suite *test __maybe_unused,
+					    int subtest __maybe_unused)
 {
-	return 0;
+#ifdef HAVE_LIBBPF_SUPPORT
+	return test__llvm(LLVM_TESTCASE_KBUILD);
+#else
+	pr_debug("Skip LLVM test because BPF support is not compiled\n");
+	return TEST_SKIP;
+#endif
 }
 
-const char *test__llvm_subtest_get_desc(int subtest __maybe_unused)
+static int test__llvm__bpf_test_prologue_prog(struct test_suite *test __maybe_unused,
+					      int subtest __maybe_unused)
 {
-	return NULL;
+#ifdef HAVE_LIBBPF_SUPPORT
+	return test__llvm(LLVM_TESTCASE_BPF_PROLOGUE);
+#else
+	pr_debug("Skip LLVM test because BPF support is not compiled\n");
+	return TEST_SKIP;
+#endif
 }
-#endif // HAVE_LIBBPF_SUPPORT
+
+static int test__llvm__bpf_test_relocation(struct test_suite *test __maybe_unused,
+					   int subtest __maybe_unused)
+{
+#ifdef HAVE_LIBBPF_SUPPORT
+	return test__llvm(LLVM_TESTCASE_BPF_RELOCATION);
+#else
+	pr_debug("Skip LLVM test because BPF support is not compiled\n");
+	return TEST_SKIP;
+#endif
+}
+
+
+static struct test_case llvm_tests[] = {
+#ifdef HAVE_LIBBPF_SUPPORT
+	TEST_CASE("Basic BPF llvm compile", llvm__bpf_base_prog),
+	TEST_CASE("kbuild searching", llvm__bpf_test_kbuild_prog),
+	TEST_CASE("Compile source for BPF prologue generation",
+		  llvm__bpf_test_prologue_prog),
+	TEST_CASE("Compile source for BPF relocation", llvm__bpf_test_relocation),
+#else
+	TEST_CASE_REASON("Basic BPF llvm compile", llvm__bpf_base_prog, "not compiled in"),
+	TEST_CASE_REASON("kbuild searching", llvm__bpf_test_kbuild_prog, "not compiled in"),
+	TEST_CASE_REASON("Compile source for BPF prologue generation",
+			llvm__bpf_test_prologue_prog, "not compiled in"),
+	TEST_CASE_REASON("Compile source for BPF relocation",
+			llvm__bpf_test_relocation, "not compiled in"),
+#endif
+	{ .name = NULL, }
+};
+
+struct test_suite suite__llvm = {
+	.desc = "LLVM search and compile",
+	.test_cases = llvm_tests,
+};
diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c
index 1ac7291..e308a32 100644
--- a/tools/perf/tests/maps.c
+++ b/tools/perf/tests/maps.c
@@ -33,7 +33,7 @@
 	return TEST_OK;
 }
 
-int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
 	struct maps maps;
 	unsigned int i;
@@ -120,3 +120,5 @@
 	maps__exit(&maps);
 	return TEST_OK;
 }
+
+DEFINE_SUITE("maps__merge_in", maps__merge_in);
diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c
index 673a11a6..56014ec 100644
--- a/tools/perf/tests/mem.c
+++ b/tools/perf/tests/mem.c
@@ -23,7 +23,7 @@
 	return 0;
 }
 
-int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused)
+static int test__mem(struct test_suite *text __maybe_unused, int subtest __maybe_unused)
 {
 	int ret = 0;
 	union perf_mem_data_src src;
@@ -56,3 +56,5 @@
 
 	return ret;
 }
+
+DEFINE_SUITE("Test data source output", mem);
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index e4d0d58..b17b863 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -43,7 +43,7 @@
 	return bm && map ? bm : NULL;
 }
 
-int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__mem2node(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
 	struct mem2node map;
 	struct memory_node nodes[3];
@@ -77,3 +77,5 @@
 	mem2node__exit(&map);
 	return 0;
 }
+
+DEFINE_SUITE("mem2node", mem2node);
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index d38757d..90b2fed 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -29,7 +29,7 @@
  * Then it checks if the number of syscalls reported as perf events by
  * the kernel corresponds to the number of syscalls made.
  */
-int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err = -1;
 	union perf_event *event;
@@ -164,3 +164,5 @@
 	perf_thread_map__put(threads);
 	return err;
 }
+
+DEFINE_SUITE("Read samples using the mmap interface", basic_mmap);
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 6f2da7a..a4301fc 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -224,7 +224,7 @@
  *
  * by using all thread objects.
  */
-int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__mmap_thread_lookup(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	/* perf_event__synthesize_threads synthesize */
 	TEST_ASSERT_VAL("failed with sythesizing all",
@@ -236,3 +236,5 @@
 
 	return 0;
 }
+
+DEFINE_SUITE("Lookup mmap thread", mmap_thread_lookup);
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index f7dd6c46..cd3dd46 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -19,7 +19,8 @@
 #include "stat.h"
 #include "util/counts.h"
 
-int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused,
+						  int subtest __maybe_unused)
 {
 	int err = -1, fd, cpu;
 	struct perf_cpu_map *cpus;
@@ -127,3 +128,5 @@
 	perf_thread_map__put(threads);
 	return err;
 }
+
+DEFINE_SUITE("Detect openat syscall event on all cpus", openat_syscall_event_on_all_cpus);
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 5e4af2f..a7b2800 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -22,7 +22,8 @@
 #define AT_FDCWD       -100
 #endif
 
-int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused,
+					  int subtest __maybe_unused)
 {
 	struct record_opts opts = {
 		.target = {
@@ -142,3 +143,5 @@
 out:
 	return err;
 }
+
+DEFINE_SUITE("syscalls:sys_enter_openat event fields", syscall_openat_tp_fields);
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 85a8f0f..7f4c13c4 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -13,7 +13,8 @@
 #include "tests.h"
 #include "util/counts.h"
 
-int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__openat_syscall_event(struct test_suite *test __maybe_unused,
+				      int subtest __maybe_unused)
 {
 	int err = -1, fd;
 	struct evsel *evsel;
@@ -66,3 +67,5 @@
 	perf_thread_map__put(threads);
 	return err;
 }
+
+DEFINE_SUITE("Detect openat syscall event", openat_syscall_event);
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 6af9463..a508f1d 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -2276,7 +2276,7 @@
 	return test_event(&e);
 }
 
-int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret1, ret2 = 0;
 	char *event, *alias;
@@ -2319,3 +2319,5 @@
 
 	return ret2;
 }
+
+DEFINE_SUITE("Parse event definition strings", parse_events);
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index dfc797e..574b7e4 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -369,7 +369,7 @@
 	return 0;
 }
 
-int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__parse_metric(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
 	TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
@@ -383,3 +383,5 @@
 	}
 	return 0;
 }
+
+DEFINE_SUITE("Parse and process metrics", parse_metric);
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index 4712736..d62e315 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -67,7 +67,8 @@
  *
  * Return: %0 on success, %-1 if the test fails.
  */
-int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__parse_no_sample_id_all(struct test_suite *test __maybe_unused,
+					int subtest __maybe_unused)
 {
 	int err;
 
@@ -103,3 +104,5 @@
 
 	return 0;
 }
+
+DEFINE_SUITE("Parse with no sample_id_all bit set", parse_no_sample_id_all);
diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c
index 58b90c4..c09a9fa 100644
--- a/tools/perf/tests/pe-file-parsing.c
+++ b/tools/perf/tests/pe-file-parsing.c
@@ -68,7 +68,7 @@
 	return TEST_OK;
 }
 
-int test__pe_file_parsing(struct test *test __maybe_unused,
+static int test__pe_file_parsing(struct test_suite *test __maybe_unused,
 			  int subtest __maybe_unused)
 {
 	struct stat st;
@@ -89,10 +89,12 @@
 
 #else
 
-int test__pe_file_parsing(struct test *test __maybe_unused,
+static int test__pe_file_parsing(struct test_suite *test __maybe_unused,
 			  int subtest __maybe_unused)
 {
 	return TEST_SKIP;
 }
 
 #endif
+
+DEFINE_SUITE("PE file support", pe_file_parsing);
diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c
index dd865e0..78cdeb8 100644
--- a/tools/perf/tests/perf-hooks.c
+++ b/tools/perf/tests/perf-hooks.c
@@ -26,7 +26,7 @@
 	raise(SIGSEGV);
 }
 
-int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_hooks(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int hook_flags = 0;
 
@@ -45,3 +45,5 @@
 		return TEST_FAIL;
 	return TEST_OK;
 }
+
+DEFINE_SUITE("perf hooks", perf_hooks);
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 0df471b..6354465 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -41,7 +41,7 @@
 	return cpu;
 }
 
-int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct record_opts opts = {
 		.target = {
@@ -332,3 +332,5 @@
 out:
 	return (err < 0 || errs > 0) ? -1 : 0;
 }
+
+DEFINE_SUITE("PERF_RECORD_* events & perf_sample fields", PERF_RECORD);
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 7c56bc1..d12d0ad 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -23,6 +23,16 @@
 #include "pmu.h"
 #include "pmu-hybrid.h"
 
+/*
+ * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.  Just
+ * enable the test for x86_64/i386 and Arm64 archs.
+ */
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
+#define TSC_IS_SUPPORTED 1
+#else
+#define TSC_IS_SUPPORTED 0
+#endif
+
 #define CHECK__(x) {				\
 	while ((x) < 0) {			\
 		pr_debug(#x " failed!\n");	\
@@ -45,7 +55,7 @@
  * %0 is returned, otherwise %-1 is returned.  If TSC conversion is not
  * supported then then the test passes but " (not supported)" is printed.
  */
-int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct record_opts opts = {
 		.mmap_pages	     = UINT_MAX,
@@ -69,6 +79,11 @@
 	u64 test_time, comm1_time = 0, comm2_time = 0;
 	struct mmap *md;
 
+	if (!TSC_IS_SUPPORTED) {
+		pr_debug("Test not supported on this architecture");
+		return TEST_SKIP;
+	}
+
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	CHECK_NOT_NULL__(threads);
 
@@ -185,15 +200,4 @@
 	return err;
 }
 
-bool test__tsc_is_supported(void)
-{
-	/*
-	 * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.
-	 * Just enable the test for x86_64/i386 and Arm64 archs.
-	 */
-#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
-	return true;
-#else
-	return false;
-#endif
-}
+DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc);
diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
index e8fd0da..71b76de 100644
--- a/tools/perf/tests/pfm.c
+++ b/tools/perf/tests/pfm.c
@@ -12,27 +12,6 @@
 #include <linux/kernel.h>
 
 #ifdef HAVE_LIBPFM
-static int test__pfm_events(void);
-static int test__pfm_group(void);
-#endif
-
-static const struct {
-	int (*func)(void);
-	const char *desc;
-} pfm_testcase_table[] = {
-#ifdef HAVE_LIBPFM
-	{
-		.func = test__pfm_events,
-		.desc = "test of individual --pfm-events",
-	},
-	{
-		.func = test__pfm_group,
-		.desc = "test groups of --pfm-events",
-	},
-#endif
-};
-
-#ifdef HAVE_LIBPFM
 static int count_pfm_events(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
@@ -44,7 +23,8 @@
 	return count;
 }
 
-static int test__pfm_events(void)
+static int test__pfm_events(struct test_suite *test __maybe_unused,
+			    int subtest __maybe_unused)
 {
 	struct evlist *evlist;
 	struct option opt;
@@ -104,7 +84,8 @@
 	return 0;
 }
 
-static int test__pfm_group(void)
+static int test__pfm_group(struct test_suite *test __maybe_unused,
+			   int subtest __maybe_unused)
 {
 	struct evlist *evlist;
 	struct option opt;
@@ -187,27 +168,27 @@
 	}
 	return 0;
 }
-#endif
-
-const char *test__pfm_subtest_get_desc(int i)
-{
-	if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
-		return NULL;
-	return pfm_testcase_table[i].desc;
-}
-
-int test__pfm_subtest_get_nr(void)
-{
-	return (int)ARRAY_SIZE(pfm_testcase_table);
-}
-
-int test__pfm(struct test *test __maybe_unused, int i __maybe_unused)
-{
-#ifdef HAVE_LIBPFM
-	if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
-		return TEST_FAIL;
-	return pfm_testcase_table[i].func();
 #else
+static int test__pfm_events(struct test_suite *test __maybe_unused,
+			    int subtest __maybe_unused)
+{
 	return TEST_SKIP;
-#endif
 }
+
+static int test__pfm_group(struct test_suite *test __maybe_unused,
+			   int subtest __maybe_unused)
+{
+	return TEST_SKIP;
+}
+#endif
+
+static struct test_case pfm_tests[] = {
+	TEST_CASE_REASON("test of individual --pfm-events", pfm_events, "not compiled in"),
+	TEST_CASE_REASON("test groups of --pfm-events", pfm_group, "not compiled in"),
+	{ .name = NULL, }
+};
+
+struct test_suite suite__pfm = {
+	.desc = "Test libpfm4 support",
+	.test_cases = pfm_tests,
+};
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 9ae894c..df1c9a3 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -418,7 +418,8 @@
 }
 
 /* Verify generated events from pmu-events.c are as expected */
-static int test_pmu_event_table(void)
+static int test__pmu_event_table(struct test_suite *test __maybe_unused,
+				 int subtest __maybe_unused)
 {
 	const struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table();
 	const struct pmu_events_map *map = __test_pmu_get_events_map();
@@ -705,7 +706,8 @@
 };
 
 /* Test that aliases generated are as expected */
-static int test_aliases(void)
+static int test__aliases(struct test_suite *test __maybe_unused,
+			int subtest __maybe_unused)
 {
 	struct perf_pmu *pmu = NULL;
 	unsigned long i;
@@ -892,7 +894,8 @@
 
 }
 
-static int test_parsing(void)
+static int test__parsing(struct test_suite *test __maybe_unused,
+			 int subtest __maybe_unused)
 {
 	const struct pmu_events_map *cpus_map = pmu_events_map__find();
 	const struct pmu_events_map *map;
@@ -1034,7 +1037,8 @@
  * or all defined cpus via the 'fake_pmu'
  * in parse_events.
  */
-static int test_parsing_fake(void)
+static int test__parsing_fake(struct test_suite *test __maybe_unused,
+			      int subtest __maybe_unused)
 {
 	const struct pmu_events_map *map;
 	const struct pmu_event *pe;
@@ -1068,55 +1072,16 @@
 	return 0;
 }
 
-static const struct {
-	int (*func)(void);
-	const char *desc;
-} pmu_events_testcase_table[] = {
-	{
-		.func = test_pmu_event_table,
-		.desc = "PMU event table sanity",
-	},
-	{
-		.func = test_aliases,
-		.desc = "PMU event map aliases",
-	},
-	{
-		.func = test_parsing,
-		.desc = "Parsing of PMU event table metrics",
-	},
-	{
-		.func = test_parsing_fake,
-		.desc = "Parsing of PMU event table metrics with fake PMUs",
-	},
+static struct test_case pmu_events_tests[] = {
+	TEST_CASE("PMU event table sanity", pmu_event_table),
+	TEST_CASE("PMU event map aliases", aliases),
+	TEST_CASE_REASON("Parsing of PMU event table metrics", parsing,
+			 "some metrics failed"),
+	TEST_CASE("Parsing of PMU event table metrics with fake PMUs", parsing_fake),
+	{ .name = NULL, }
 };
 
-const char *test__pmu_events_subtest_get_desc(int subtest)
-{
-	if (subtest < 0 ||
-	    subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
-		return NULL;
-	return pmu_events_testcase_table[subtest].desc;
-}
-
-const char *test__pmu_events_subtest_skip_reason(int subtest)
-{
-	if (subtest < 0 ||
-	    subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
-		return NULL;
-	if (pmu_events_testcase_table[subtest].func != test_parsing)
-		return NULL;
-	return "some metrics failed";
-}
-
-int test__pmu_events_subtest_get_nr(void)
-{
-	return (int)ARRAY_SIZE(pmu_events_testcase_table);
-}
-
-int test__pmu_events(struct test *test __maybe_unused, int subtest)
-{
-	if (subtest < 0 ||
-	    subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
-		return TEST_FAIL;
-	return pmu_events_testcase_table[subtest].func();
-}
+struct test_suite suite__pmu_events = {
+	.desc = "PMU events",
+	.test_cases = pmu_events_tests,
+};
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 714e683..8507bd6 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -137,7 +137,7 @@
 	return &terms;
 }
 
-int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	char *format = test_format_dir_get();
 	LIST_HEAD(formats);
@@ -177,3 +177,5 @@
 	test_format_dir_put(format);
 	return ret;
 }
+
+DEFINE_SUITE("Parse perf pmu format", pmu);
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 98c6d47..6b990ee 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -9,7 +9,7 @@
 #include "tests.h"
 #include "util/debug.h"
 
-int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__python_use(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	char *cmd;
 	int ret;
@@ -23,3 +23,5 @@
 	free(cmd);
 	return ret;
 }
+
+DEFINE_SUITE("'import perf' in python", python_use);
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index c83a115..07f2411 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -36,7 +36,7 @@
  * These are based on the input value (213) specified
  * in branch_stack variable.
  */
-#define BS_EXPECTED_BE	0xa00d000000000000
+#define BS_EXPECTED_BE	0xa000d00000000000
 #define BS_EXPECTED_LE	0xd5000000
 #define FLAG(s)	s->branch_stack->entries[i].flags
 
@@ -368,7 +368,7 @@
  * checks sample format bits separately and together.  If the test passes %0 is
  * returned, otherwise %-1 is returned.
  */
-int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
 	u64 sample_type;
@@ -426,3 +426,5 @@
 
 	return 0;
 }
+
+DEFINE_SUITE("Sample parsing", sample_parsing);
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index ed76c69..9197128 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -76,7 +76,7 @@
 	return ret;
 }
 
-int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused)
 {
 	int ret = TEST_FAIL;
 	char __tempdir[] = "./test-buildid-XXXXXX";
@@ -114,9 +114,11 @@
 	return ret;
 }
 #else
-int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused)
 {
 	pr_debug("Skip SDT event test because SDT support is not compiled\n");
 	return TEST_SKIP;
 }
 #endif
+
+DEFINE_SUITE("Probe SDT events", sdt_event);
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
index 8a168cf..49bd875 100755
--- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -12,7 +12,7 @@
 
 collect_z_record() {
 	echo "Collecting compressed record file:"
-	[[ "$(uname -m)" != s390x ]] && gflag='-g'
+	[ "$(uname -m)" != s390x ] && gflag='-g'
 	$perf_tool record -o $trace_file $gflag -z -F 5000 -- \
 		dd count=500 if=/dev/urandom of=/dev/null
 }
diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh
index 2de7fd0..b30dba4 100755
--- a/tools/perf/tests/shell/stat_all_pmu.sh
+++ b/tools/perf/tests/shell/stat_all_pmu.sh
@@ -7,11 +7,11 @@
 for p in $(perf list --raw-dump pmu); do
   echo "Testing $p"
   result=$(perf stat -e "$p" true 2>&1)
-  if [[ ! "$result" =~ "$p" ]] && [[ ! "$result" =~ "<not supported>" ]]; then
+  if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then
     # We failed to see the event and it is supported. Possibly the workload was
     # too small so retry with something longer.
     result=$(perf stat -e "$p" perf bench internals synthesize 2>&1)
-    if [[ ! "$result" =~ "$p" ]]; then
+    if ! echo "$result" | grep -q "$p" ; then
       echo "Event '$p' not printed in:"
       echo "$result"
       exit 1
diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh
index 2aed20d..13473ae 100755
--- a/tools/perf/tests/shell/stat_bpf_counters.sh
+++ b/tools/perf/tests/shell/stat_bpf_counters.sh
@@ -23,7 +23,7 @@
 
 # skip if --bpf-counters is not supported
 if ! perf stat --bpf-counters true > /dev/null 2>&1; then
-	if [ "$1" == "-v" ]; then
+	if [ "$1" = "-v" ]; then
 		echo "Skipping: --bpf-counters not supported"
 		perf --no-pager stat --bpf-counters true || true
 	fi
diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh
new file mode 100755
index 0000000..e59044e
--- /dev/null
+++ b/tools/perf/tests/shell/test_arm_spe.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+# Check Arm SPE trace data recording and synthesized samples
+
+# Uses the 'perf record' to record trace data of Arm SPE events;
+# then verify if any SPE event samples are generated by SPE with
+# 'perf script' and 'perf report' commands.
+
+# SPDX-License-Identifier: GPL-2.0
+# German Gomez <german.gomez@arm.com>, 2021
+
+skip_if_no_arm_spe_event() {
+	perf list | egrep -q 'arm_spe_[0-9]+//' && return 0
+
+	# arm_spe event doesn't exist
+	return 2
+}
+
+skip_if_no_arm_spe_event || exit 2
+
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+glb_err=0
+
+cleanup_files()
+{
+	rm -f ${perfdata}
+	exit $glb_err
+}
+
+trap cleanup_files exit term int
+
+arm_spe_report() {
+	if [ $2 != 0 ]; then
+		echo "$1: FAIL"
+		glb_err=$2
+	else
+		echo "$1: PASS"
+	fi
+}
+
+perf_script_samples() {
+	echo "Looking at perf.data file for dumping samples:"
+
+	# from arm-spe.c/arm_spe_synth_events()
+	events="(ld1-miss|ld1-access|llc-miss|lld-access|tlb-miss|tlb-access|branch-miss|remote-access|memory)"
+
+	# Below is an example of the samples dumping:
+	#	dd  3048 [002]          1    l1d-access:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
+	#	dd  3048 [002]          1    tlb-access:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
+	#	dd  3048 [002]          1        memory:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
+	perf script -F,-time -i ${perfdata} 2>&1 | \
+		egrep " +$1 +[0-9]+ .* +${events}:(.*:)? +" > /dev/null 2>&1
+}
+
+perf_report_samples() {
+	echo "Looking at perf.data file for reporting samples:"
+
+	# Below is an example of the samples reporting:
+	#   73.04%    73.04%  dd    libc-2.27.so      [.] _dl_addr
+	#    7.71%     7.71%  dd    libc-2.27.so      [.] getenv
+	#    2.59%     2.59%  dd    ld-2.27.so        [.] strcmp
+	perf report --stdio -i ${perfdata} 2>&1 | \
+		egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1
+}
+
+arm_spe_snapshot_test() {
+	echo "Recording trace with snapshot mode $perfdata"
+	perf record -o ${perfdata} -e arm_spe// -S \
+		-- dd if=/dev/zero of=/dev/null > /dev/null 2>&1 &
+	PERFPID=$!
+
+	# Wait for perf program
+	sleep 1
+
+	# Send signal to snapshot trace data
+	kill -USR2 $PERFPID
+
+	# Stop perf program
+	kill $PERFPID
+	wait $PERFPID
+
+	perf_script_samples dd &&
+	perf_report_samples dd
+
+	err=$?
+	arm_spe_report "SPE snapshot testing" $err
+}
+
+arm_spe_snapshot_test
+exit $glb_err
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
index c191150..2eb096b 100644
--- a/tools/perf/tests/stat.c
+++ b/tools/perf/tests/stat.c
@@ -47,7 +47,8 @@
 	return 0;
 }
 
-int test__synthesize_stat_config(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__synthesize_stat_config(struct test_suite *test __maybe_unused,
+					int subtest __maybe_unused)
 {
 	struct perf_stat_config stat_config = {
 		.aggr_mode	= AGGR_CORE,
@@ -77,7 +78,7 @@
 	return 0;
 }
 
-int test__synthesize_stat(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__synthesize_stat(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_counts_values count;
 
@@ -103,7 +104,7 @@
 	return 0;
 }
 
-int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__synthesize_stat_round(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	TEST_ASSERT_VAL("failed to synthesize stat_config",
 		!perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL,
@@ -111,3 +112,7 @@
 
 	return 0;
 }
+
+DEFINE_SUITE("Synthesize stat config", synthesize_stat_config);
+DEFINE_SUITE("Synthesize stat", synthesize_stat);
+DEFINE_SUITE("Synthesize stat round", synthesize_stat_round);
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 7498884..9cd6fec 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -133,7 +133,7 @@
 	return err;
 }
 
-int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__sw_clock_freq(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int ret;
 
@@ -143,3 +143,5 @@
 
 	return ret;
 }
+
+DEFINE_SUITE("Software clock events period values", sw_clock_freq);
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 62c0ec2..0c0c232 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -321,7 +321,7 @@
  * evsel->core.system_wide and evsel->tracking flags (respectively) with other events
  * sometimes enabled or disabled.
  */
-int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__switch_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	const char *sched_switch = "sched:sched_switch";
 	struct switch_tracking switch_tracking = { .tids = NULL, };
@@ -588,3 +588,5 @@
 	err = -1;
 	goto out;
 }
+
+DEFINE_SUITE("Track with sched_switch", switch_tracking);
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 4c2969d..25f075f 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -39,7 +39,7 @@
  * if the number of exit event reported by the kernel is 1 or not
  * in order to check the kernel returns correct number of event.
  */
-int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__task_exit(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	int err = -1;
 	union perf_event *event;
@@ -151,3 +151,5 @@
 	evlist__delete(evlist);
 	return err;
 }
+
+DEFINE_SUITE("Number of exit events of a simple workload", task_exit);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index fe1306f..8f65098 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -27,112 +27,146 @@
 	TEST_SKIP = -2,
 };
 
-struct test {
+struct test_suite;
+
+typedef int (*test_fnptr)(struct test_suite *, int);
+
+struct test_case {
+	const char *name;
 	const char *desc;
-	int (*func)(struct test *test, int subtest);
-	struct {
-		bool skip_if_fail;
-		int (*get_nr)(void);
-		const char *(*get_desc)(int subtest);
-		const char *(*skip_reason)(int subtest);
-	} subtest;
-	bool (*is_supported)(void);
+	const char *skip_reason;
+	test_fnptr run_case;
+};
+
+struct test_suite {
+	const char *desc;
+	struct test_case *test_cases;
 	void *priv;
 };
 
-/* Tests */
-int test__vmlinux_matches_kallsyms(struct test *test, int subtest);
-int test__openat_syscall_event(struct test *test, int subtest);
-int test__openat_syscall_event_on_all_cpus(struct test *test, int subtest);
-int test__basic_mmap(struct test *test, int subtest);
-int test__PERF_RECORD(struct test *test, int subtest);
-int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest);
-int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
-int test__syscall_openat_tp_fields(struct test *test, int subtest);
-int test__pmu(struct test *test, int subtest);
-int test__pmu_events(struct test *test, int subtest);
-const char *test__pmu_events_subtest_get_desc(int subtest);
-const char *test__pmu_events_subtest_skip_reason(int subtest);
-int test__pmu_events_subtest_get_nr(void);
-int test__attr(struct test *test, int subtest);
-int test__dso_data(struct test *test, int subtest);
-int test__dso_data_cache(struct test *test, int subtest);
-int test__dso_data_reopen(struct test *test, int subtest);
-int test__parse_events(struct test *test, int subtest);
-int test__hists_link(struct test *test, int subtest);
-int test__python_use(struct test *test, int subtest);
-int test__bp_signal(struct test *test, int subtest);
-int test__bp_signal_overflow(struct test *test, int subtest);
-int test__bp_accounting(struct test *test, int subtest);
-int test__wp(struct test *test, int subtest);
-const char *test__wp_subtest_get_desc(int subtest);
-const char *test__wp_subtest_skip_reason(int subtest);
-int test__wp_subtest_get_nr(void);
-int test__task_exit(struct test *test, int subtest);
-int test__mem(struct test *test, int subtest);
-int test__sw_clock_freq(struct test *test, int subtest);
-int test__code_reading(struct test *test, int subtest);
-int test__sample_parsing(struct test *test, int subtest);
-int test__keep_tracking(struct test *test, int subtest);
-int test__parse_no_sample_id_all(struct test *test, int subtest);
-int test__dwarf_unwind(struct test *test, int subtest);
-int test__expr(struct test *test, int subtest);
-int test__hists_filter(struct test *test, int subtest);
-int test__mmap_thread_lookup(struct test *test, int subtest);
-int test__thread_maps_share(struct test *test, int subtest);
-int test__hists_output(struct test *test, int subtest);
-int test__hists_cumulate(struct test *test, int subtest);
-int test__switch_tracking(struct test *test, int subtest);
-int test__fdarray__filter(struct test *test, int subtest);
-int test__fdarray__add(struct test *test, int subtest);
-int test__kmod_path__parse(struct test *test, int subtest);
-int test__thread_map(struct test *test, int subtest);
-int test__llvm(struct test *test, int subtest);
-const char *test__llvm_subtest_get_desc(int subtest);
-int test__llvm_subtest_get_nr(void);
-int test__bpf(struct test *test, int subtest);
-const char *test__bpf_subtest_get_desc(int subtest);
-int test__bpf_subtest_get_nr(void);
-int test__session_topology(struct test *test, int subtest);
-int test__thread_map_synthesize(struct test *test, int subtest);
-int test__thread_map_remove(struct test *test, int subtest);
-int test__cpu_map_synthesize(struct test *test, int subtest);
-int test__synthesize_stat_config(struct test *test, int subtest);
-int test__synthesize_stat(struct test *test, int subtest);
-int test__synthesize_stat_round(struct test *test, int subtest);
-int test__event_update(struct test *test, int subtest);
-int test__event_times(struct test *test, int subtest);
-int test__backward_ring_buffer(struct test *test, int subtest);
-int test__cpu_map_print(struct test *test, int subtest);
-int test__cpu_map_merge(struct test *test, int subtest);
-int test__sdt_event(struct test *test, int subtest);
-int test__is_printable_array(struct test *test, int subtest);
-int test__bitmap_print(struct test *test, int subtest);
-int test__perf_hooks(struct test *test, int subtest);
-int test__clang(struct test *test, int subtest);
-const char *test__clang_subtest_get_desc(int subtest);
-int test__clang_subtest_get_nr(void);
-int test__unit_number__scnprint(struct test *test, int subtest);
-int test__mem2node(struct test *t, int subtest);
-int test__maps__merge_in(struct test *t, int subtest);
-int test__time_utils(struct test *t, int subtest);
-int test__jit_write_elf(struct test *test, int subtest);
-int test__api_io(struct test *test, int subtest);
-int test__demangle_java(struct test *test, int subtest);
-int test__demangle_ocaml(struct test *test, int subtest);
-int test__pfm(struct test *test, int subtest);
-const char *test__pfm_subtest_get_desc(int subtest);
-int test__pfm_subtest_get_nr(void);
-int test__parse_metric(struct test *test, int subtest);
-int test__pe_file_parsing(struct test *test, int subtest);
-int test__expand_cgroup_events(struct test *test, int subtest);
-int test__perf_time_to_tsc(struct test *test, int subtest);
-int test__dlfilter(struct test *test, int subtest);
+#define DECLARE_SUITE(name) \
+	extern struct test_suite suite__##name;
 
-bool test__bp_signal_is_supported(void);
-bool test__bp_account_is_supported(void);
-bool test__wp_is_supported(void);
-bool test__tsc_is_supported(void);
+#define TEST_CASE(description, _name)			\
+	{						\
+		.name = #_name,				\
+		.desc = description,			\
+		.run_case = test__##_name,		\
+	}
+
+#define TEST_CASE_REASON(description, _name, _reason)	\
+	{						\
+		.name = #_name,				\
+		.desc = description,			\
+		.run_case = test__##_name,		\
+		.skip_reason = _reason,			\
+	}
+
+#define DEFINE_SUITE(description, _name)		\
+	struct test_case tests__##_name[] = {           \
+		TEST_CASE(description, _name),		\
+		{	.name = NULL, }			\
+	};						\
+	struct test_suite suite__##_name = {		\
+		.desc = description,			\
+		.test_cases = tests__##_name,		\
+	}
+
+/* Tests */
+DECLARE_SUITE(vmlinux_matches_kallsyms);
+DECLARE_SUITE(openat_syscall_event);
+DECLARE_SUITE(openat_syscall_event_on_all_cpus);
+DECLARE_SUITE(basic_mmap);
+DECLARE_SUITE(PERF_RECORD);
+DECLARE_SUITE(perf_evsel__roundtrip_name_test);
+DECLARE_SUITE(perf_evsel__tp_sched_test);
+DECLARE_SUITE(syscall_openat_tp_fields);
+DECLARE_SUITE(pmu);
+DECLARE_SUITE(pmu_events);
+DECLARE_SUITE(attr);
+DECLARE_SUITE(dso_data);
+DECLARE_SUITE(dso_data_cache);
+DECLARE_SUITE(dso_data_reopen);
+DECLARE_SUITE(parse_events);
+DECLARE_SUITE(hists_link);
+DECLARE_SUITE(python_use);
+DECLARE_SUITE(bp_signal);
+DECLARE_SUITE(bp_signal_overflow);
+DECLARE_SUITE(bp_accounting);
+DECLARE_SUITE(wp);
+DECLARE_SUITE(task_exit);
+DECLARE_SUITE(mem);
+DECLARE_SUITE(sw_clock_freq);
+DECLARE_SUITE(code_reading);
+DECLARE_SUITE(sample_parsing);
+DECLARE_SUITE(keep_tracking);
+DECLARE_SUITE(parse_no_sample_id_all);
+DECLARE_SUITE(dwarf_unwind);
+DECLARE_SUITE(expr);
+DECLARE_SUITE(hists_filter);
+DECLARE_SUITE(mmap_thread_lookup);
+DECLARE_SUITE(thread_maps_share);
+DECLARE_SUITE(hists_output);
+DECLARE_SUITE(hists_cumulate);
+DECLARE_SUITE(switch_tracking);
+DECLARE_SUITE(fdarray__filter);
+DECLARE_SUITE(fdarray__add);
+DECLARE_SUITE(kmod_path__parse);
+DECLARE_SUITE(thread_map);
+DECLARE_SUITE(llvm);
+DECLARE_SUITE(bpf);
+DECLARE_SUITE(session_topology);
+DECLARE_SUITE(thread_map_synthesize);
+DECLARE_SUITE(thread_map_remove);
+DECLARE_SUITE(cpu_map_synthesize);
+DECLARE_SUITE(synthesize_stat_config);
+DECLARE_SUITE(synthesize_stat);
+DECLARE_SUITE(synthesize_stat_round);
+DECLARE_SUITE(event_update);
+DECLARE_SUITE(event_times);
+DECLARE_SUITE(backward_ring_buffer);
+DECLARE_SUITE(cpu_map_print);
+DECLARE_SUITE(cpu_map_merge);
+DECLARE_SUITE(sdt_event);
+DECLARE_SUITE(is_printable_array);
+DECLARE_SUITE(bitmap_print);
+DECLARE_SUITE(perf_hooks);
+DECLARE_SUITE(clang);
+DECLARE_SUITE(unit_number__scnprint);
+DECLARE_SUITE(mem2node);
+DECLARE_SUITE(maps__merge_in);
+DECLARE_SUITE(time_utils);
+DECLARE_SUITE(jit_write_elf);
+DECLARE_SUITE(api_io);
+DECLARE_SUITE(demangle_java);
+DECLARE_SUITE(demangle_ocaml);
+DECLARE_SUITE(pfm);
+DECLARE_SUITE(parse_metric);
+DECLARE_SUITE(pe_file_parsing);
+DECLARE_SUITE(expand_cgroup_events);
+DECLARE_SUITE(perf_time_to_tsc);
+DECLARE_SUITE(dlfilter);
+
+/*
+ * PowerPC and S390 do not support creation of instruction breakpoints using the
+ * perf_event interface.
+ *
+ * ARM requires explicit rounding down of the instruction pointer in Thumb mode,
+ * and then requires the single-step to be handled explicitly in the overflow
+ * handler to avoid stepping into the SIGIO handler and getting stuck on the
+ * breakpointed instruction.
+ *
+ * Since arm64 has the same issue with arm for the single-step handling, this
+ * case also gets stuck on the breakpointed instruction.
+ *
+ * Just disable the test for these architectures until these issues are
+ * resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || defined(__aarch64__)
+#define BP_SIGNAL_IS_SUPPORTED 0
+#else
+#define BP_SIGNAL_IS_SUPPORTED 1
+#endif
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
@@ -142,7 +176,7 @@
 #endif
 
 #if defined(__arm__)
-int test__vectors_page(struct test *test, int subtest);
+DECLARE_SUITE(vectors_page);
 #endif
 
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index d1e208b..e413c13 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -19,7 +19,7 @@
 #define NAME	(const char *) "perf"
 #define NAMEUL	(unsigned long) NAME
 
-int test__thread_map(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_map(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_thread_map *map;
 
@@ -86,7 +86,7 @@
 	return 0;
 }
 
-int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_thread_map *threads;
 
@@ -106,7 +106,7 @@
 	return 0;
 }
 
-int test__thread_map_remove(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_map_remove(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_thread_map *threads;
 	char *str;
@@ -145,3 +145,7 @@
 	perf_thread_map__put(threads);
 	return 0;
 }
+
+DEFINE_SUITE("Thread map", thread_map);
+DEFINE_SUITE("Synthesize thread map", thread_map_synthesize);
+DEFINE_SUITE("Remove thread map", thread_map_remove);
diff --git a/tools/perf/tests/thread-maps-share.c b/tools/perf/tests/thread-maps-share.c
index 9371484..84edd82 100644
--- a/tools/perf/tests/thread-maps-share.c
+++ b/tools/perf/tests/thread-maps-share.c
@@ -4,7 +4,7 @@
 #include "thread.h"
 #include "debug.h"
 
-int test__thread_maps_share(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_maps_share(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct machines machines;
 	struct machine *machine;
@@ -96,3 +96,5 @@
 	machines__exit(&machines);
 	return 0;
 }
+
+DEFINE_SUITE("Share thread maps", thread_maps_share);
diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c
index fe57ca3..38df103 100644
--- a/tools/perf/tests/time-utils-test.c
+++ b/tools/perf/tests/time-utils-test.c
@@ -131,7 +131,7 @@
 	return pass;
 }
 
-int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__time_utils(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
 	bool pass = true;
 
@@ -249,3 +249,5 @@
 
 	return pass ? 0 : TEST_FAIL;
 }
+
+DEFINE_SUITE("time utils", time_utils);
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 4574c46..8699861 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -175,7 +175,7 @@
 	return 0;
 }
 
-int test__session_topology(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__session_topology(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	char path[PATH_MAX];
 	struct perf_cpu_map *map;
@@ -201,3 +201,5 @@
 	unlink(path);
 	return ret;
 }
+
+DEFINE_SUITE("Session topology", session_topology);
diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c
index 3721757..88bcada1 100644
--- a/tools/perf/tests/unit_number__scnprintf.c
+++ b/tools/perf/tests/unit_number__scnprintf.c
@@ -7,7 +7,7 @@
 #include "units.h"
 #include "debug.h"
 
-int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__unit_number__scnprint(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
 	struct {
 		u64		 n;
@@ -38,3 +38,5 @@
 
 	return TEST_OK;
 }
+
+DEFINE_SUITE("unit_number__scnprintf", unit_number__scnprint);
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 4f884aa..e80df13 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -111,7 +111,8 @@
 	return false;
 }
 
-int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused,
+					int subtest __maybe_unused)
 {
 	int err = -1;
 	struct rb_node *nd;
@@ -352,3 +353,5 @@
 	machine__exit(&vmlinux);
 	return err;
 }
+
+DEFINE_SUITE("vmlinux symtab matches kallsyms", vmlinux_matches_kallsyms);
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
index 9387fa7..9d4c451 100644
--- a/tools/perf/tests/wp.c
+++ b/tools/perf/tests/wp.c
@@ -21,6 +21,7 @@
 volatile u64 data1;
 volatile u8 data2[3];
 
+#ifndef __s390x__
 static int wp_read(int fd, long long *count, int size)
 {
 	int ret = read(fd, count, size);
@@ -61,9 +62,14 @@
 
 	return fd;
 }
+#endif
 
-static int wp_ro_test(void)
+static int test__wp_ro(struct test_suite *test __maybe_unused,
+		       int subtest __maybe_unused)
 {
+#if defined(__s390x__) || defined(__x86_64__) || defined(__i386__)
+	return TEST_SKIP;
+#else
 	int fd;
 	unsigned long tmp, tmp1 = rand();
 
@@ -79,10 +85,15 @@
 
 	close(fd);
 	return 0;
+#endif
 }
 
-static int wp_wo_test(void)
+static int test__wp_wo(struct test_suite *test __maybe_unused,
+		       int subtest __maybe_unused)
 {
+#if defined(__s390x__)
+	return TEST_SKIP;
+#else
 	int fd;
 	unsigned long tmp, tmp1 = rand();
 
@@ -98,10 +109,15 @@
 
 	close(fd);
 	return 0;
+#endif
 }
 
-static int wp_rw_test(void)
+static int test__wp_rw(struct test_suite *test __maybe_unused,
+		       int subtest __maybe_unused)
 {
+#if defined(__s390x__)
+	return TEST_SKIP;
+#else
 	int fd;
 	unsigned long tmp, tmp1 = rand();
 
@@ -118,10 +134,15 @@
 
 	close(fd);
 	return 0;
+#endif
 }
 
-static int wp_modify_test(void)
+static int test__wp_modify(struct test_suite *test __maybe_unused,
+			   int subtest __maybe_unused)
 {
+#if defined(__s390x__)
+	return TEST_SKIP;
+#else
 	int fd, ret;
 	unsigned long tmp = rand();
 	struct perf_event_attr new_attr;
@@ -163,93 +184,18 @@
 
 	close(fd);
 	return 0;
-}
-
-static bool wp_ro_supported(void)
-{
-#if defined (__x86_64__) || defined (__i386__)
-	return false;
-#else
-	return true;
 #endif
 }
 
-static const char *wp_ro_skip_msg(void)
-{
-#if defined (__x86_64__) || defined (__i386__)
-	return "missing hardware support";
-#else
-	return NULL;
-#endif
-}
-
-static struct {
-	const char *desc;
-	int (*target_func)(void);
-	bool (*is_supported)(void);
-	const char *(*skip_msg)(void);
-} wp_testcase_table[] = {
-	{
-		.desc = "Read Only Watchpoint",
-		.target_func = &wp_ro_test,
-		.is_supported = &wp_ro_supported,
-		.skip_msg = &wp_ro_skip_msg,
-	},
-	{
-		.desc = "Write Only Watchpoint",
-		.target_func = &wp_wo_test,
-	},
-	{
-		.desc = "Read / Write Watchpoint",
-		.target_func = &wp_rw_test,
-	},
-	{
-		.desc = "Modify Watchpoint",
-		.target_func = &wp_modify_test,
-	},
+static struct test_case wp_tests[] = {
+	TEST_CASE_REASON("Read Only Watchpoint", wp_ro, "missing hardware support"),
+	TEST_CASE_REASON("Write Only Watchpoint", wp_wo, "missing hardware support"),
+	TEST_CASE_REASON("Read / Write Watchpoint", wp_rw, "missing hardware support"),
+	TEST_CASE_REASON("Modify Watchpoint", wp_modify, "missing hardware support"),
+	{ .name = NULL, }
 };
 
-int test__wp_subtest_get_nr(void)
-{
-	return (int)ARRAY_SIZE(wp_testcase_table);
-}
-
-const char *test__wp_subtest_get_desc(int i)
-{
-	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
-		return NULL;
-	return wp_testcase_table[i].desc;
-}
-
-const char *test__wp_subtest_skip_reason(int i)
-{
-	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
-		return NULL;
-	if (!wp_testcase_table[i].skip_msg)
-		return NULL;
-	return wp_testcase_table[i].skip_msg();
-}
-
-int test__wp(struct test *test __maybe_unused, int i)
-{
-	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
-		return TEST_FAIL;
-
-	if (wp_testcase_table[i].is_supported &&
-	    !wp_testcase_table[i].is_supported())
-		return TEST_SKIP;
-
-	return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL;
-}
-
-/* The s390 so far does not have support for
- * instruction breakpoint using the perf_event_open() system call.
- */
-bool test__wp_is_supported(void)
-{
-#if defined(__s390x__)
-	return false;
-#else
-	return true;
-#endif
-}
+struct test_suite suite__wp = {
+	.desc = "Watchpoint",
+	.test_cases = wp_tests,
+};
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index d6dfe68..f527a46 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -62,6 +62,8 @@
 
 extern struct strarray strarray__socket_families;
 
+extern struct strarray strarray__socket_level;
+
 /**
  * augmented_arg: extra payload for syscall pointer arguments
  
@@ -230,6 +232,9 @@
 size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol
 
+size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_SK_LEVEL syscall_arg__scnprintf_socket_level
+
 size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
 
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index cd11063..2e0e867 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -7,7 +7,7 @@
 #include <sys/un.h>
 #include <arpa/inet.h>
 
-#include "trace/beauty/generated/socket_arrays.c"
+#include "trace/beauty/generated/sockaddr.c"
 DEFINE_STRARRAY(socket_families, "PF_");
 
 static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)
diff --git a/tools/perf/trace/beauty/sockaddr.sh b/tools/perf/trace/beauty/sockaddr.sh
new file mode 100755
index 0000000..3820e5c
--- /dev/null
+++ b/tools/perf/trace/beauty/sockaddr.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+# This one uses a copy from the kernel sources headers that is in a
+# place used just for these tools/perf/beauty/ usage, we shouldn't not
+# put it in tools/include/linux otherwise they would be used in the
+# normal compiler building process and would drag needless stuff from the
+# kernel.
+
+# When what these scripts need is already in tools/include/ then use it,
+# otherwise grab and check the copy from the kernel sources just for these
+# string table building scripts.
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/
+
+printf "static const char *socket_families[] = {\n"
+# #define AF_LOCAL	1	/* POSIX name for AF_UNIX	*/
+regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
+
+egrep $regex ${header_dir}/socket.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[%s] = \"%s\",\n" | \
+	egrep -v "\"(UNIX|MAX)\""
+printf "};\n"
diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c
index f23a3dda..b0870c7 100644
--- a/tools/perf/trace/beauty/socket.c
+++ b/tools/perf/trace/beauty/socket.c
@@ -9,9 +9,10 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 
+#include "trace/beauty/generated/socket.c"
+
 static size_t socket__scnprintf_ipproto(int protocol, char *bf, size_t size, bool show_prefix)
 {
-#include "trace/beauty/generated/socket_ipproto_array.c"
 	static DEFINE_STRARRAY(socket_ipproto, "IPPROTO_");
 
 	return strarray__scnprintf(&strarray__socket_ipproto, bf, size, "%d", show_prefix, protocol);
@@ -26,3 +27,21 @@
 
 	return syscall_arg__scnprintf_int(bf, size, arg);
 }
+
+static size_t socket__scnprintf_level(int level, char *bf, size_t size, bool show_prefix)
+{
+#if defined(__alpha__) || defined(__hppa__) || defined(__mips__) || defined(__sparc__)
+	const int sol_socket = 0xffff;
+#else
+	const int sol_socket = 1;
+#endif
+	if (level == sol_socket)
+		return scnprintf(bf, size, "%sSOCKET", show_prefix ? "SOL_" : "");
+
+	return strarray__scnprintf(&strarray__socket_level, bf, size, "%d", show_prefix, level);
+}
+
+size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg)
+{
+	return socket__scnprintf_level(arg->val, bf, size, arg->show_string_prefix);
+}
diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh
index 3820e5c..76330ac 100755
--- a/tools/perf/trace/beauty/socket.sh
+++ b/tools/perf/trace/beauty/socket.sh
@@ -1,24 +1,28 @@
 #!/bin/sh
 # SPDX-License-Identifier: LGPL-2.1
 
-# This one uses a copy from the kernel sources headers that is in a
-# place used just for these tools/perf/beauty/ usage, we shouldn't not
-# put it in tools/include/linux otherwise they would be used in the
-# normal compiler building process and would drag needless stuff from the
-# kernel.
+if [ $# -gt 0 ] ; then
+	uapi_header_dir=$1
+	beauty_header_dir=$2
+else
+	uapi_header_dir=tools/include/uapi/linux/
+	beauty_header_dir=tools/perf/trace/beauty/include/linux/
+fi
 
-# When what these scripts need is already in tools/include/ then use it,
-# otherwise grab and check the copy from the kernel sources just for these
-# string table building scripts.
+printf "static const char *socket_ipproto[] = {\n"
+ipproto_regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*'
 
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/
+egrep $ipproto_regex ${uapi_header_dir}/in.h | \
+	sed -r "s/$ipproto_regex/\2 \1/g"	| \
+	sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n\n"
 
-printf "static const char *socket_families[] = {\n"
-# #define AF_LOCAL	1	/* POSIX name for AF_UNIX	*/
-regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
+printf "static const char *socket_level[] = {\n"
+socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?'
 
-egrep $regex ${header_dir}/socket.h | \
-	sed -r "s/$regex/\2 \1/g"	| \
-	xargs printf "\t[%s] = \"%s\",\n" | \
-	egrep -v "\"(UNIX|MAX)\""
-printf "};\n"
+egrep $socket_level_regex ${beauty_header_dir}/socket.h | \
+	sed -r "s/$socket_level_regex/\2 \1/g"	| \
+	sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n\n"
+
+printf 'DEFINE_STRARRAY(socket_level, "SOL_");\n'
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
deleted file mode 100755
index de0f2f2..0000000
--- a/tools/perf/trace/beauty/socket_ipproto.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: LGPL-2.1
-
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
-
-printf "static const char *socket_ipproto[] = {\n"
-regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*'
-
-egrep $regex ${header_dir}/in.h | \
-	sed -r "s/$regex/\2 \1/g"	| \
-	sort | xargs printf "\t[%s] = \"%s\",\n"
-printf "};\n"
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index c1f24d0..5075ece 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -535,6 +535,18 @@
 #undef __HPP_SORT_ACC_FN
 #undef __HPP_SORT_RAW_FN
 
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+	/*
+	 * At this point fmt should be completely
+	 * unhooked, if not it's a bug.
+	 */
+	BUG_ON(!list_empty(&fmt->list));
+	BUG_ON(!list_empty(&fmt->sort_list));
+
+	if (fmt->free)
+		fmt->free(fmt);
+}
 
 void perf_hpp__init(void)
 {
@@ -598,9 +610,10 @@
 	list_add(&format->sort_list, &list->sorts);
 }
 
-void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+static void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
 {
 	list_del_init(&format->list);
+	fmt_free(format);
 }
 
 void perf_hpp__cancel_cumulate(void)
@@ -672,19 +685,6 @@
 }
 
 
-static void fmt_free(struct perf_hpp_fmt *fmt)
-{
-	/*
-	 * At this point fmt should be completely
-	 * unhooked, if not it's a bug.
-	 */
-	BUG_ON(!list_empty(&fmt->list));
-	BUG_ON(!list_empty(&fmt->sort_list));
-
-	if (fmt->free)
-		fmt->free(fmt);
-}
-
 void perf_hpp__reset_output_field(struct perf_hpp_list *list)
 {
 	struct perf_hpp_fmt *fmt, *tmp;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 8511af5..0190068 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1255,6 +1255,17 @@
 	return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
 }
 
+void annotation__init(struct annotation *notes)
+{
+	pthread_mutex_init(&notes->lock, NULL);
+}
+
+void annotation__exit(struct annotation *notes)
+{
+	annotated_source__delete(notes->src);
+	pthread_mutex_destroy(&notes->lock);
+}
+
 static void annotation_line__add(struct annotation_line *al, struct list_head *head)
 {
 	list_add_tail(&al->node, head);
@@ -3132,7 +3143,7 @@
 	notes->nr_events = nr_pcnt;
 
 	annotation__update_column_widths(notes);
-	sym->annotate2 = true;
+	sym->annotate2 = 1;
 
 	return 0;
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 3757416..986f2bb 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -299,6 +299,9 @@
 	struct annotated_source *src;
 };
 
+void annotation__init(struct annotation *notes);
+void annotation__exit(struct annotation *notes);
+
 static inline int annotation__cycles_width(struct annotation *notes)
 {
 	if (notes->have_cycles && notes->options->show_minmax_cycle)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 32fe4183..3fc528c 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -151,6 +151,7 @@
 	u64 payload, ip;
 
 	memset(&decoder->record, 0x0, sizeof(decoder->record));
+	decoder->record.context_id = (u64)-1;
 
 	while (1) {
 		err = arm_spe_get_next_packet(decoder);
@@ -180,6 +181,7 @@
 		case ARM_SPE_COUNTER:
 			break;
 		case ARM_SPE_CONTEXT:
+			decoder->record.context_id = payload;
 			break;
 		case ARM_SPE_OP_TYPE:
 			if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) {
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 59bdb73..46a8556 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -38,6 +38,7 @@
 	u64 timestamp;
 	u64 virt_addr;
 	u64 phys_addr;
+	u64 context_id;
 };
 
 struct arm_spe_insn;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 58b7069..fccac06 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -51,6 +51,7 @@
 	u8				timeless_decoding;
 	u8				data_queued;
 
+	u64				sample_type;
 	u8				sample_flc;
 	u8				sample_llc;
 	u8				sample_tlb;
@@ -71,6 +72,7 @@
 	u64				kernel_start;
 
 	unsigned long			num_events;
+	u8				use_ctx_pkt_for_pid;
 };
 
 struct arm_spe_queue {
@@ -100,7 +102,7 @@
 	const char *color = PERF_COLOR_BLUE;
 
 	color_fprintf(stdout, color,
-		      ". ... ARM SPE data: size %zu bytes\n",
+		      ". ... ARM SPE data: size %#zx bytes\n",
 		      len);
 
 	while (len) {
@@ -226,6 +228,44 @@
 		PERF_RECORD_MISC_USER;
 }
 
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+				    struct auxtrace_queue *queue)
+{
+	struct arm_spe_queue *speq = queue->priv;
+	pid_t tid;
+
+	tid = machine__get_current_tid(spe->machine, speq->cpu);
+	if (tid != -1) {
+		speq->tid = tid;
+		thread__zput(speq->thread);
+	} else
+		speq->tid = queue->tid;
+
+	if ((!speq->thread) && (speq->tid != -1)) {
+		speq->thread = machine__find_thread(spe->machine, -1,
+						    speq->tid);
+	}
+
+	if (speq->thread) {
+		speq->pid = speq->thread->pid_;
+		if (queue->cpu == -1)
+			speq->cpu = speq->thread->cpu;
+	}
+}
+
+static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
+{
+	struct arm_spe *spe = speq->spe;
+	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
+
+	if (err)
+		return err;
+
+	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
+
+	return 0;
+}
+
 static void arm_spe_prep_sample(struct arm_spe *spe,
 				struct arm_spe_queue *speq,
 				union perf_event *event,
@@ -248,6 +288,12 @@
 	event->sample.header.size = sizeof(struct perf_event_header);
 }
 
+static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
+{
+	event->header.size = perf_event__sample_event_size(sample, type, 0);
+	return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
 static inline int
 arm_spe_deliver_synth_event(struct arm_spe *spe,
 			    struct arm_spe_queue *speq __maybe_unused,
@@ -256,6 +302,12 @@
 {
 	int ret;
 
+	if (spe->synth_opts.inject) {
+		ret = arm_spe__inject_event(event, sample, spe->sample_type);
+		if (ret)
+			return ret;
+	}
+
 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
 	if (ret)
 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
@@ -460,6 +512,19 @@
 		 * can correlate samples between Arm SPE trace data and other
 		 * perf events with correct time ordering.
 		 */
+
+		/*
+		 * Update pid/tid info.
+		 */
+		record = &speq->decoder->record;
+		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
+			ret = arm_spe_set_tid(speq, record->context_id);
+			if (ret)
+				return ret;
+
+			spe->use_ctx_pkt_for_pid = true;
+		}
+
 		ret = arm_spe_sample(speq);
 		if (ret)
 			return ret;
@@ -586,31 +651,6 @@
 	return timeless_decoding;
 }
 
-static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
-				    struct auxtrace_queue *queue)
-{
-	struct arm_spe_queue *speq = queue->priv;
-	pid_t tid;
-
-	tid = machine__get_current_tid(spe->machine, speq->cpu);
-	if (tid != -1) {
-		speq->tid = tid;
-		thread__zput(speq->thread);
-	} else
-		speq->tid = queue->tid;
-
-	if ((!speq->thread) && (speq->tid != -1)) {
-		speq->thread = machine__find_thread(spe->machine, -1,
-						    speq->tid);
-	}
-
-	if (speq->thread) {
-		speq->pid = speq->thread->pid_;
-		if (queue->cpu == -1)
-			speq->cpu = speq->thread->cpu;
-	}
-}
-
 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 {
 	unsigned int queue_nr;
@@ -641,7 +681,12 @@
 			ts = timestamp;
 		}
 
-		arm_spe_set_pid_tid_cpu(spe, queue);
+		/*
+		 * A previous context-switch event has set pid/tid in the machine's context, so
+		 * here we need to update the pid/tid in the thread and SPE queue.
+		 */
+		if (!spe->use_ctx_pkt_for_pid)
+			arm_spe_set_pid_tid_cpu(spe, queue);
 
 		ret = arm_spe_run_decoder(speq, &ts);
 		if (ret < 0) {
@@ -681,6 +726,25 @@
 	return 0;
 }
 
+static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
+				  struct perf_sample *sample)
+{
+	pid_t pid, tid;
+	int cpu;
+
+	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
+		return 0;
+
+	pid = event->context_switch.next_prev_pid;
+	tid = event->context_switch.next_prev_tid;
+	cpu = sample->cpu;
+
+	if (tid == -1)
+		pr_warning("context_switch event has no tid\n");
+
+	return machine__set_current_tid(spe->machine, cpu, pid, tid);
+}
+
 static int arm_spe_process_event(struct perf_session *session,
 				 union perf_event *event,
 				 struct perf_sample *sample,
@@ -718,6 +782,13 @@
 		}
 	} else if (timestamp) {
 		err = arm_spe_process_queues(spe, timestamp);
+		if (err)
+			return err;
+
+		if (!spe->use_ctx_pkt_for_pid &&
+		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
+		    event->header.type == PERF_RECORD_SWITCH))
+			err = arm_spe_context_switch(spe, event, sample);
 	}
 
 	return err;
@@ -783,7 +854,15 @@
 		return arm_spe_process_timeless_queues(spe, -1,
 				MAX_TIMESTAMP - 1);
 
-	return arm_spe_process_queues(spe, MAX_TIMESTAMP);
+	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
+	if (ret)
+		return ret;
+
+	if (!spe->use_ctx_pkt_for_pid)
+		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
+			    "Matching of TIDs to SPE events could be inaccurate.\n");
+
+	return 0;
 }
 
 static void arm_spe_free_queue(void *priv)
@@ -920,6 +999,8 @@
 	else
 		attr.sample_type |= PERF_SAMPLE_TIME;
 
+	spe->sample_type = attr.sample_type;
+
 	attr.exclude_user = evsel->core.attr.exclude_user;
 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
 	attr.exclude_hv = evsel->core.attr.exclude_hv;
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 4d3b4cd..a517eaa 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -33,6 +33,33 @@
        return err ? ERR_PTR(err) : btf;
 }
 
+struct bpf_program * __weak
+bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return bpf_program__next(prev, obj);
+#pragma GCC diagnostic pop
+}
+
+struct bpf_map * __weak
+bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return bpf_map__next(prev, obj);
+#pragma GCC diagnostic pop
+}
+
+const void * __weak
+btf__raw_data(const struct btf *btf_ro, __u32 *size)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+	return btf__get_raw_data(btf_ro, size);
+#pragma GCC diagnostic pop
+}
+
 static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
 {
 	int ret = 0;
@@ -119,7 +146,11 @@
 	node->data_size = data_size;
 	memcpy(node->data, data, data_size);
 
-	perf_env__insert_btf(env, node);
+	if (!perf_env__insert_btf(env, node)) {
+		/* Insertion failed because of a duplicate. */
+		free(node);
+		return -1;
+	}
 	return 0;
 }
 
diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h
index 2df8a45..d3731a8 100644
--- a/tools/perf/util/c++/clang-c.h
+++ b/tools/perf/util/c++/clang-c.h
@@ -12,8 +12,9 @@
 extern void perf_clang__init(void);
 extern void perf_clang__cleanup(void);
 
-extern int test__clang_to_IR(void);
-extern int test__clang_to_obj(void);
+struct test_suite;
+extern int test__clang_to_IR(struct test_suite *test, int subtest);
+extern int test__clang_to_obj(struct test_suite *test, int subtest);
 
 extern int perf_clang__compile_bpf(const char *filename,
 				   void **p_obj_buf,
@@ -26,9 +27,6 @@
 static inline void perf_clang__init(void) { }
 static inline void perf_clang__cleanup(void) { }
 
-static inline int test__clang_to_IR(void) { return -1; }
-static inline int test__clang_to_obj(void) { return -1;}
-
 static inline int
 perf_clang__compile_bpf(const char *filename __maybe_unused,
 			void **p_obj_buf __maybe_unused,
diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp
index 21b2360..a4683ca 100644
--- a/tools/perf/util/c++/clang-test.cpp
+++ b/tools/perf/util/c++/clang-test.cpp
@@ -35,7 +35,8 @@
 }
 
 extern "C" {
-int test__clang_to_IR(void)
+int test__clang_to_IR(struct test_suite *test __maybe_unused,
+                      int subtest __maybe_unused)
 {
 	perf_clang_scope _scope;
 
@@ -48,7 +49,8 @@
 	return -1;
 }
 
-int test__clang_to_obj(void)
+int test__clang_to_obj(struct test_suite *test __maybe_unused,
+                       int subtest __maybe_unused)
 {
 	perf_clang_scope _scope;
 
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index ec77e2a..51b429c 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -14,14 +14,16 @@
 #include "env.h"
 #include "pmu-hybrid.h"
 
-#define CORE_SIB_FMT \
+#define PACKAGE_CPUS_FMT \
+	"%s/devices/system/cpu/cpu%d/topology/package_cpus_list"
+#define PACKAGE_CPUS_FMT_OLD \
 	"%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
-#define DIE_SIB_FMT \
+#define DIE_CPUS_FMT \
 	"%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
-#define THRD_SIB_FMT \
-	"%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
-#define THRD_SIB_FMT_NEW \
+#define CORE_CPUS_FMT \
 	"%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
+#define CORE_CPUS_FMT_OLD \
+	"%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
 #define NODE_ONLINE_FMT \
 	"%s/devices/system/node/online"
 #define NODE_MEMINFO_FMT \
@@ -39,8 +41,12 @@
 	u32 i = 0;
 	int ret = -1;
 
-	scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT,
+	scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT,
 		  sysfs__mountpoint(), cpu);
+	if (access(filename, F_OK) == -1) {
+		scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD,
+			sysfs__mountpoint(), cpu);
+	}
 	fp = fopen(filename, "r");
 	if (!fp)
 		goto try_dies;
@@ -54,23 +60,23 @@
 	if (p)
 		*p = '\0';
 
-	for (i = 0; i < tp->core_sib; i++) {
-		if (!strcmp(buf, tp->core_siblings[i]))
+	for (i = 0; i < tp->package_cpus_lists; i++) {
+		if (!strcmp(buf, tp->package_cpus_list[i]))
 			break;
 	}
-	if (i == tp->core_sib) {
-		tp->core_siblings[i] = buf;
-		tp->core_sib++;
+	if (i == tp->package_cpus_lists) {
+		tp->package_cpus_list[i] = buf;
+		tp->package_cpus_lists++;
 		buf = NULL;
 		len = 0;
 	}
 	ret = 0;
 
 try_dies:
-	if (!tp->die_siblings)
+	if (!tp->die_cpus_list)
 		goto try_threads;
 
-	scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+	scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
 		  sysfs__mountpoint(), cpu);
 	fp = fopen(filename, "r");
 	if (!fp)
@@ -85,23 +91,23 @@
 	if (p)
 		*p = '\0';
 
-	for (i = 0; i < tp->die_sib; i++) {
-		if (!strcmp(buf, tp->die_siblings[i]))
+	for (i = 0; i < tp->die_cpus_lists; i++) {
+		if (!strcmp(buf, tp->die_cpus_list[i]))
 			break;
 	}
-	if (i == tp->die_sib) {
-		tp->die_siblings[i] = buf;
-		tp->die_sib++;
+	if (i == tp->die_cpus_lists) {
+		tp->die_cpus_list[i] = buf;
+		tp->die_cpus_lists++;
 		buf = NULL;
 		len = 0;
 	}
 	ret = 0;
 
 try_threads:
-	scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW,
+	scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT,
 		  sysfs__mountpoint(), cpu);
 	if (access(filename, F_OK) == -1) {
-		scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
+		scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD,
 			  sysfs__mountpoint(), cpu);
 	}
 	fp = fopen(filename, "r");
@@ -115,13 +121,13 @@
 	if (p)
 		*p = '\0';
 
-	for (i = 0; i < tp->thread_sib; i++) {
-		if (!strcmp(buf, tp->thread_siblings[i]))
+	for (i = 0; i < tp->core_cpus_lists; i++) {
+		if (!strcmp(buf, tp->core_cpus_list[i]))
 			break;
 	}
-	if (i == tp->thread_sib) {
-		tp->thread_siblings[i] = buf;
-		tp->thread_sib++;
+	if (i == tp->core_cpus_lists) {
+		tp->core_cpus_list[i] = buf;
+		tp->core_cpus_lists++;
 		buf = NULL;
 	}
 	ret = 0;
@@ -139,16 +145,14 @@
 	if (!tp)
 		return;
 
-	for (i = 0 ; i < tp->core_sib; i++)
-		zfree(&tp->core_siblings[i]);
+	for (i = 0 ; i < tp->package_cpus_lists; i++)
+		zfree(&tp->package_cpus_list[i]);
 
-	if (tp->die_sib) {
-		for (i = 0 ; i < tp->die_sib; i++)
-			zfree(&tp->die_siblings[i]);
-	}
+	for (i = 0 ; i < tp->die_cpus_lists; i++)
+		zfree(&tp->die_cpus_list[i]);
 
-	for (i = 0 ; i < tp->thread_sib; i++)
-		zfree(&tp->thread_siblings[i]);
+	for (i = 0 ; i < tp->core_cpus_lists; i++)
+		zfree(&tp->core_cpus_list[i]);
 
 	free(tp);
 }
@@ -164,7 +168,7 @@
 	if (strncmp(uts.machine, "x86_64", 6))
 		return false;
 
-	scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+	scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
 		  sysfs__mountpoint(), 0);
 	if (access(filename, F_OK) == -1)
 		return false;
@@ -205,13 +209,13 @@
 
 	tp = addr;
 	addr += sizeof(*tp);
-	tp->core_siblings = addr;
+	tp->package_cpus_list = addr;
 	addr += sz;
 	if (has_die) {
-		tp->die_siblings = addr;
+		tp->die_cpus_list = addr;
 		addr += sz;
 	}
-	tp->thread_siblings = addr;
+	tp->core_cpus_list = addr;
 
 	for (i = 0; i < nr; i++) {
 		if (!cpu_map__has(map, i))
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index d9af971..854e18f 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -5,12 +5,33 @@
 #include <linux/types.h>
 
 struct cpu_topology {
-	u32	  core_sib;
-	u32	  die_sib;
-	u32	  thread_sib;
-	char	**core_siblings;
-	char	**die_siblings;
-	char	**thread_siblings;
+	/* The number of unique package_cpus_lists below. */
+	u32	  package_cpus_lists;
+	/* The number of unique die_cpu_lists below. */
+	u32	  die_cpus_lists;
+	/* The number of unique core_cpu_lists below. */
+	u32	  core_cpus_lists;
+	/*
+	 * An array of strings where each string is unique and read from
+	 * /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI
+	 * each of these is a human-readable list of CPUs sharing the same
+	 * physical_package_id. The format is like 0-3, 8-11, 14,17.
+	 */
+	const char **package_cpus_list;
+	/*
+	 * An array of string where each string is unique and from
+	 * /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI
+	 * each of these is a human-readable list of CPUs within the same die.
+	 * The format is like 0-3, 8-11, 14,17.
+	 */
+	const char **die_cpus_list;
+	/*
+	 * An array of string where each string is unique and from
+	 * /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI
+	 * each of these is a human-readable list of CPUs within the same
+	 * core. The format is like 0-3, 8-11, 14,17.
+	 */
+	const char **core_cpus_list;
 };
 
 struct numa_topology_node {
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index f323adb..4f672f7 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -537,7 +537,7 @@
 
 	fprintf(stdout, "\n");
 	color_fprintf(stdout, color,
-		     ". ... CoreSight %s Trace data: size %zu bytes\n",
+		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
 		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
 
 	do {
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 17f1dd0..b990489 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -75,12 +75,13 @@
 	return node;
 }
 
-void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
 {
 	struct rb_node *parent = NULL;
 	__u32 btf_id = btf_node->id;
 	struct btf_node *node;
 	struct rb_node **p;
+	bool ret = true;
 
 	down_write(&env->bpf_progs.lock);
 	p = &env->bpf_progs.btfs.rb_node;
@@ -94,6 +95,7 @@
 			p = &(*p)->rb_right;
 		} else {
 			pr_debug("duplicated btf %u\n", btf_id);
+			ret = false;
 			goto out;
 		}
 	}
@@ -103,6 +105,7 @@
 	env->bpf_progs.btfs_cnt++;
 out:
 	up_write(&env->bpf_progs.lock);
+	return ret;
 }
 
 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1383876..163e5ec 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -167,7 +167,7 @@
 				    struct bpf_prog_info_node *info_node);
 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
 							__u32 prog_id);
-void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
 
 int perf_env__numa_node(struct perf_env *env, int cpu);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ec967fb..ac0127b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -241,7 +241,7 @@
 {
 	perf_evsel__init(&evsel->core, attr, idx);
 	evsel->tracking	   = !idx;
-	evsel->unit	   = "";
+	evsel->unit	   = strdup("");
 	evsel->scale	   = 1.0;
 	evsel->max_events  = ULONG_MAX;
 	evsel->evlist	   = NULL;
@@ -276,13 +276,8 @@
 	}
 
 	if (evsel__is_clock(evsel)) {
-		/*
-		 * The evsel->unit points to static alias->unit
-		 * so it's ok to use static string in here.
-		 */
-		static const char *unit = "msec";
-
-		evsel->unit = unit;
+		free((char *)evsel->unit);
+		evsel->unit = strdup("msec");
 		evsel->scale = 1e-6;
 	}
 
@@ -420,7 +415,11 @@
 
 	evsel->max_events = orig->max_events;
 	evsel->tool_event = orig->tool_event;
-	evsel->unit = orig->unit;
+	free((char *)evsel->unit);
+	evsel->unit = strdup(orig->unit);
+	if (evsel->unit == NULL)
+		goto out_err;
+
 	evsel->scale = orig->scale;
 	evsel->snapshot = orig->snapshot;
 	evsel->per_pkg = orig->per_pkg;
@@ -1441,6 +1440,7 @@
 	zfree(&evsel->group_name);
 	zfree(&evsel->name);
 	zfree(&evsel->pmu_name);
+	zfree(&evsel->unit);
 	zfree(&evsel->metric_id);
 	evsel__zero_per_pkg(evsel);
 	hashmap__free(evsel->per_pkg_mask);
@@ -3037,3 +3037,15 @@
 {
 	evsel->core.leader = &leader->core;
 }
+
+int evsel__source_count(const struct evsel *evsel)
+{
+	struct evsel *pos;
+	int count = 0;
+
+	evlist__for_each_entry(evsel->evlist, pos) {
+		if (pos->metric_leader == evsel)
+			count++;
+	}
+	return count;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3ea6871..29d49a8 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -489,6 +489,7 @@
 bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
 bool evsel__is_leader(struct evsel *evsel);
 void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
+int evsel__source_count(const struct evsel *evsel);
 
 /*
  * Macro to swap the bit-field postition and size.
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 77c6ad8..1d532b9 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -5,13 +5,17 @@
 #include <stdlib.h>
 #include <string.h>
 #include "metricgroup.h"
+#include "cpumap.h"
+#include "cputopo.h"
 #include "debug.h"
 #include "expr.h"
 #include "expr-bison.h"
 #include "expr-flex.h"
+#include "smt.h"
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <ctype.h>
+#include <math.h>
 
 #ifdef PARSER_DEBUG
 extern int expr_debug;
@@ -19,7 +23,10 @@
 
 struct expr_id_data {
 	union {
-		double val;
+		struct {
+			double val;
+			int source_count;
+		} val;
 		struct {
 			double val;
 			const char *metric_name;
@@ -137,6 +144,13 @@
 /* Caller must make sure id is allocated */
 int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
 {
+	return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+				  double val, int source_count)
+{
 	struct expr_id_data *data_ptr = NULL, *old_data = NULL;
 	char *old_key = NULL;
 	int ret;
@@ -144,7 +158,8 @@
 	data_ptr = malloc(sizeof(*data_ptr));
 	if (!data_ptr)
 		return -ENOMEM;
-	data_ptr->val = val;
+	data_ptr->val.val = val;
+	data_ptr->val.source_count = source_count;
 	data_ptr->kind = EXPR_ID_DATA__VALUE;
 
 	ret = hashmap__set(ctx->ids, id, data_ptr,
@@ -240,7 +255,7 @@
 
 	switch (data->kind) {
 	case EXPR_ID_DATA__VALUE:
-		pr_debug2("lookup(%s): val %f\n", id, data->val);
+		pr_debug2("lookup(%s): val %f\n", id, data->val.val);
 		break;
 	case EXPR_ID_DATA__REF:
 		pr_debug2("lookup(%s): ref metric name %s\n", id,
@@ -251,7 +266,7 @@
 			pr_debug("%s failed to count\n", id);
 			return -1;
 		}
-		pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+		pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val);
 		break;
 	case EXPR_ID_DATA__REF_VALUE:
 		pr_debug2("lookup(%s): ref val %f metric name %s\n", id,
@@ -366,7 +381,47 @@
 double expr_id_data__value(const struct expr_id_data *data)
 {
 	if (data->kind == EXPR_ID_DATA__VALUE)
-		return data->val;
+		return data->val.val;
 	assert(data->kind == EXPR_ID_DATA__REF_VALUE);
 	return data->ref.val;
 }
+
+double expr_id_data__source_count(const struct expr_id_data *data)
+{
+	assert(data->kind == EXPR_ID_DATA__VALUE);
+	return data->val.source_count;
+}
+
+double expr__get_literal(const char *literal)
+{
+	static struct cpu_topology *topology;
+
+	if (!strcmp("#smt_on", literal))
+		return smt_on() > 0 ? 1.0 : 0.0;
+
+	if (!strcmp("#num_cpus", literal))
+		return cpu__max_present_cpu();
+
+	/*
+	 * Assume that topology strings are consistent, such as CPUs "0-1"
+	 * wouldn't be listed as "0,1", and so after deduplication the number of
+	 * these strings gives an indication of the number of packages, dies,
+	 * etc.
+	 */
+	if (!topology) {
+		topology = cpu_topology__new();
+		if (!topology) {
+			pr_err("Error creating CPU topology");
+			return NAN;
+		}
+	}
+	if (!strcmp("#num_packages", literal))
+		return topology->package_cpus_lists;
+	if (!strcmp("#num_dies", literal))
+		return topology->die_cpus_lists;
+	if (!strcmp("#num_cores", literal))
+		return topology->core_cpus_lists;
+
+	pr_err("Unrecognized literal '%s'", literal);
+	return NAN;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index cf81f91..bd21169 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -40,6 +40,8 @@
 void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
 int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
 int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+				double val, int source_count);
 int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
 int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
 		 struct expr_id_data **data);
@@ -55,5 +57,7 @@
 		   struct expr_parse_ctx *ids);
 
 double expr_id_data__value(const struct expr_id_data *data);
+double expr_id_data__source_count(const struct expr_id_data *data);
+double expr__get_literal(const char *literal);
 
 #endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index bd20f33..0a13eb2 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -6,6 +6,7 @@
 #include <linux/compiler.h>
 #include "expr.h"
 #include "expr-bison.h"
+#include <math.h>
 
 char *expr_get_text(yyscan_t yyscanner);
 YYSTYPE *expr_get_lval(yyscan_t yyscanner);
@@ -77,6 +78,17 @@
 	yylval->str = normalize(yylval->str, runtime);
 	return token;
 }
+
+static int literal(yyscan_t scanner)
+{
+	YYSTYPE *yylval = expr_get_lval(scanner);
+
+	yylval->num = expr__get_literal(expr_get_text(scanner));
+	if (isnan(yylval->num))
+		return EXPR_ERROR;
+
+	return LITERAL;
+}
 %}
 
 number		([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
@@ -85,6 +97,7 @@
 spec		\\{sch}
 sym		[0-9a-zA-Z_\.:@?]+
 symbol		({spec}|{sym})+
+literal		#[0-9a-zA-Z_\.\-]+
 
 %%
 	struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
@@ -94,7 +107,8 @@
 min		{ return MIN; }
 if		{ return IF; }
 else		{ return ELSE; }
-#smt_on		{ return SMT_ON; }
+source_count	{ return SOURCE_COUNT; }
+{literal}	{ return literal(yyscanner); }
 {number}	{ return value(yyscanner); }
 {symbol}	{ return str(yyscanner, ID, sctx->runtime); }
 "|"		{ return '|'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index f969dfa..a30b825 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -3,8 +3,8 @@
 #define YYDEBUG 1
 #include <assert.h>
 #include <math.h>
+#include <stdlib.h>
 #include "util/debug.h"
-#include "smt.h"
 #define IN_EXPR_Y 1
 #include "expr.h"
 %}
@@ -37,7 +37,7 @@
 	} ids;
 }
 
-%token ID NUMBER MIN MAX IF ELSE SMT_ON D_RATIO EXPR_ERROR
+%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR
 %left MIN MAX IF
 %left '|'
 %left '^'
@@ -46,7 +46,7 @@
 %left '-' '+'
 %left '*' '/' '%'
 %left NEG NOT
-%type <num> NUMBER
+%type <num> NUMBER LITERAL
 %type <str> ID
 %destructor { free ($$); } <str>
 %type <ids> expr if_expr
@@ -83,6 +83,41 @@
 	return result;
 }
 
+static struct ids handle_id(struct expr_parse_ctx *ctx, char *id,
+			    bool compute_ids, bool source_count)
+{
+	struct ids result;
+
+	if (!compute_ids) {
+		/*
+		 * Compute the event's value from ID. If the ID isn't known then
+		 * it isn't used to compute the formula so set to NAN.
+		 */
+		struct expr_id_data *data;
+
+		result.val = NAN;
+		if (expr__resolve_id(ctx, id, &data) == 0) {
+			result.val = source_count
+				? expr_id_data__source_count(data)
+				: expr_id_data__value(data);
+		}
+		result.ids = NULL;
+		free(id);
+	} else {
+		/*
+		 * Set the value to BOTTOM to show that any value is possible
+		 * when the event is computed. Create a set of just the ID.
+		 */
+		result.val = BOTTOM;
+		result.ids = ids__new();
+		if (!result.ids || ids__insert(result.ids, id)) {
+			pr_err("Error creating IDs for '%s'", id);
+			free(id);
+		}
+	}
+	return result;
+}
+
 /*
  * If we're not computing ids or $1 and $3 are constants, compute the new
  * constant value using OP. Its invariant that there are no ids.  If computing
@@ -168,32 +203,8 @@
 	$$.val = $1;
 	$$.ids = NULL;
 }
-| ID
-{
-	if (!compute_ids) {
-		/*
-		 * Compute the event's value from ID. If the ID isn't known then
-		 * it isn't used to compute the formula so set to NAN.
-		 */
-		struct expr_id_data *data;
-
-		$$.val = NAN;
-		if (expr__resolve_id(ctx, $1, &data) == 0)
-			$$.val = expr_id_data__value(data);
-
-		$$.ids = NULL;
-		free($1);
-	} else {
-		/*
-		 * Set the value to BOTTOM to show that any value is possible
-		 * when the event is computed. Create a set of just the ID.
-		 */
-		$$.val = BOTTOM;
-		$$.ids = ids__new();
-		if (!$$.ids || ids__insert($$.ids, $1))
-			YYABORT;
-	}
-}
+| ID				{ $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); }
+| SOURCE_COUNT '(' ID ')'	{ $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); }
 | expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); }
 | expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); }
 | expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); }
@@ -280,9 +291,9 @@
 		$$ = union_expr($3, $5);
 	}
 }
-| SMT_ON
+| LITERAL
 {
-	$$.val = smt_on() > 0 ? 1.0 : 0.0;
+	$$.val = $1;
 	$$.ids = NULL;
 }
 ;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 56511db..79cce21 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -583,21 +583,21 @@
 	if (!tp)
 		return -1;
 
-	ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib));
+	ret = do_write(ff, &tp->package_cpus_lists, sizeof(tp->package_cpus_lists));
 	if (ret < 0)
 		goto done;
 
-	for (i = 0; i < tp->core_sib; i++) {
-		ret = do_write_string(ff, tp->core_siblings[i]);
+	for (i = 0; i < tp->package_cpus_lists; i++) {
+		ret = do_write_string(ff, tp->package_cpus_list[i]);
 		if (ret < 0)
 			goto done;
 	}
-	ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib));
+	ret = do_write(ff, &tp->core_cpus_lists, sizeof(tp->core_cpus_lists));
 	if (ret < 0)
 		goto done;
 
-	for (i = 0; i < tp->thread_sib; i++) {
-		ret = do_write_string(ff, tp->thread_siblings[i]);
+	for (i = 0; i < tp->core_cpus_lists; i++) {
+		ret = do_write_string(ff, tp->core_cpus_list[i]);
 		if (ret < 0)
 			break;
 	}
@@ -617,15 +617,15 @@
 			return ret;
 	}
 
-	if (!tp->die_sib)
+	if (!tp->die_cpus_lists)
 		goto done;
 
-	ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib));
+	ret = do_write(ff, &tp->die_cpus_lists, sizeof(tp->die_cpus_lists));
 	if (ret < 0)
 		goto done;
 
-	for (i = 0; i < tp->die_sib; i++) {
-		ret = do_write_string(ff, tp->die_siblings[i]);
+	for (i = 0; i < tp->die_cpus_lists; i++) {
+		ret = do_write_string(ff, tp->die_cpus_list[i]);
 		if (ret < 0)
 			goto done;
 	}
@@ -4257,9 +4257,11 @@
 
 	switch (ev->type) {
 	case PERF_EVENT_UPDATE__UNIT:
+		free((char *)evsel->unit);
 		evsel->unit = strdup(ev->data);
 		break;
 	case PERF_EVENT_UPDATE__NAME:
+		free(evsel->name);
 		evsel->name = strdup(ev->data);
 		break;
 	case PERF_EVENT_UPDATE__SCALE:
@@ -4268,11 +4270,11 @@
 		break;
 	case PERF_EVENT_UPDATE__CPUS:
 		ev_cpus = (struct perf_record_event_update_cpus *)ev->data;
-
 		map = cpu_map__new_data(&ev_cpus->cpus);
-		if (map)
+		if (map) {
+			perf_cpu_map__put(evsel->core.own_cpus);
 			evsel->core.own_cpus = map;
-		else
+		} else
 			pr_err("failed to get event_update cpus\n");
 	default:
 		break;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 65fe65b..b776465 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -289,15 +289,10 @@
 	return htime;
 }
 
-static void he_stat__add_period(struct he_stat *he_stat, u64 period,
-				u64 weight, u64 ins_lat, u64 p_stage_cyc)
+static void he_stat__add_period(struct he_stat *he_stat, u64 period)
 {
-
 	he_stat->period		+= period;
-	he_stat->weight		+= weight;
 	he_stat->nr_events	+= 1;
-	he_stat->ins_lat	+= ins_lat;
-	he_stat->p_stage_cyc	+= p_stage_cyc;
 }
 
 static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
@@ -308,9 +303,6 @@
 	dest->period_guest_sys	+= src->period_guest_sys;
 	dest->period_guest_us	+= src->period_guest_us;
 	dest->nr_events		+= src->nr_events;
-	dest->weight		+= src->weight;
-	dest->ins_lat		+= src->ins_lat;
-	dest->p_stage_cyc		+= src->p_stage_cyc;
 }
 
 static void he_stat__decay(struct he_stat *he_stat)
@@ -598,9 +590,6 @@
 	struct hist_entry *he;
 	int64_t cmp;
 	u64 period = entry->stat.period;
-	u64 weight = entry->stat.weight;
-	u64 ins_lat = entry->stat.ins_lat;
-	u64 p_stage_cyc = entry->stat.p_stage_cyc;
 	bool leftmost = true;
 
 	p = &hists->entries_in->rb_root.rb_node;
@@ -619,11 +608,11 @@
 
 		if (!cmp) {
 			if (sample_self) {
-				he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc);
+				he_stat__add_period(&he->stat, period);
 				hist_entry__add_callchain_period(he, period);
 			}
 			if (symbol_conf.cumulate_callchain)
-				he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc);
+				he_stat__add_period(he->stat_acc, period);
 
 			/*
 			 * This mem info was allocated from sample__resolve_mem
@@ -733,9 +722,6 @@
 		.stat = {
 			.nr_events = 1,
 			.period	= sample->period,
-			.weight = sample->weight,
-			.ins_lat = sample->ins_lat,
-			.p_stage_cyc = sample->p_stage_cyc,
 		},
 		.parent = sym_parent,
 		.filtered = symbol__parent_filter(sym_parent) | al->filtered,
@@ -748,6 +734,9 @@
 		.raw_size = sample->raw_size,
 		.ops = ops,
 		.time = hist_time(sample->time),
+		.weight = sample->weight,
+		.ins_lat = sample->ins_lat,
+		.p_stage_cyc = sample->p_stage_cyc,
 	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
 
 	if (!hists->has_callchains && he && he->callchain_size != 0)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 5343b62..621f35a 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -369,7 +369,6 @@
 };
 
 void perf_hpp__init(void);
-void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
 void perf_hpp__cancel_cumulate(void);
 void perf_hpp__setup_output_field(struct perf_hpp_list *list);
 void perf_hpp__reset_output_field(struct perf_hpp_list *list);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5bfb6f8..ba74fdf 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -402,8 +402,10 @@
 	if (!evsel)
 		return -ENOMEM;
 	evsel->tool_event = tool_event;
-	if (tool_event == PERF_TOOL_DURATION_TIME)
-		evsel->unit = "ns";
+	if (tool_event == PERF_TOOL_DURATION_TIME) {
+		free((char *)evsel->unit);
+		evsel->unit = strdup("ns");
+	}
 	return 0;
 }
 
@@ -1630,7 +1632,8 @@
 	if (parse_state->fake_pmu)
 		return 0;
 
-	evsel->unit = info.unit;
+	free((char *)evsel->unit);
+	evsel->unit = strdup(info.unit);
 	evsel->scale = info.scale;
 	evsel->per_pkg = info.per_pkg;
 	evsel->snapshot = info.snapshot;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 568a88c..a111065b 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1325,88 +1325,68 @@
 	.se_width_idx	= HISTC_MISPREDICT,
 };
 
-static u64 he_weight(struct hist_entry *he)
-{
-	return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
-}
-
 static int64_t
-sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+sort__weight_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return he_weight(left) - he_weight(right);
+	return left->weight - right->weight;
 }
 
 static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
 				    size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
+	return repsep_snprintf(bf, size, "%-*llu", width, he->weight);
 }
 
 struct sort_entry sort_local_weight = {
 	.se_header	= "Local Weight",
-	.se_cmp		= sort__local_weight_cmp,
+	.se_cmp		= sort__weight_cmp,
 	.se_snprintf	= hist_entry__local_weight_snprintf,
 	.se_width_idx	= HISTC_LOCAL_WEIGHT,
 };
 
-static int64_t
-sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-	return left->stat.weight - right->stat.weight;
-}
-
 static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
 					      size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
+	return repsep_snprintf(bf, size, "%-*llu", width,
+			       he->weight * he->stat.nr_events);
 }
 
 struct sort_entry sort_global_weight = {
 	.se_header	= "Weight",
-	.se_cmp		= sort__global_weight_cmp,
+	.se_cmp		= sort__weight_cmp,
 	.se_snprintf	= hist_entry__global_weight_snprintf,
 	.se_width_idx	= HISTC_GLOBAL_WEIGHT,
 };
 
-static u64 he_ins_lat(struct hist_entry *he)
-{
-		return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0;
-}
-
 static int64_t
-sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
+sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-		return he_ins_lat(left) - he_ins_lat(right);
+	return left->ins_lat - right->ins_lat;
 }
 
 static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf,
 					      size_t size, unsigned int width)
 {
-		return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he));
+	return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat);
 }
 
 struct sort_entry sort_local_ins_lat = {
 	.se_header	= "Local INSTR Latency",
-	.se_cmp		= sort__local_ins_lat_cmp,
+	.se_cmp		= sort__ins_lat_cmp,
 	.se_snprintf	= hist_entry__local_ins_lat_snprintf,
 	.se_width_idx	= HISTC_LOCAL_INS_LAT,
 };
 
-static int64_t
-sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-		return left->stat.ins_lat - right->stat.ins_lat;
-}
-
 static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf,
 					       size_t size, unsigned int width)
 {
-		return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat);
+	return repsep_snprintf(bf, size, "%-*u", width,
+			       he->ins_lat * he->stat.nr_events);
 }
 
 struct sort_entry sort_global_ins_lat = {
 	.se_header	= "INSTR Latency",
-	.se_cmp		= sort__global_ins_lat_cmp,
+	.se_cmp		= sort__ins_lat_cmp,
 	.se_snprintf	= hist_entry__global_ins_lat_snprintf,
 	.se_width_idx	= HISTC_GLOBAL_INS_LAT,
 };
@@ -1414,13 +1394,13 @@
 static int64_t
 sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return left->stat.p_stage_cyc - right->stat.p_stage_cyc;
+	return left->p_stage_cyc - right->p_stage_cyc;
 }
 
 static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
 					size_t size, unsigned int width)
 {
-	return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc);
+	return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
 }
 
 struct sort_entry sort_p_stage_cyc = {
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b67c469..7b71455 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -49,9 +49,6 @@
 	u64			period_us;
 	u64			period_guest_sys;
 	u64			period_guest_us;
-	u64			weight;
-	u64			ins_lat;
-	u64			p_stage_cyc;
 	u32			nr_events;
 };
 
@@ -109,6 +106,9 @@
 	s32			socket;
 	s32			cpu;
 	u64			code_page_size;
+	u64			weight;
+	u64			ins_lat;
+	u64			p_stage_cyc;
 	u8			cpumode;
 	u8			depth;
 
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index e4fb02b..5c7308e 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -829,10 +829,12 @@
 		struct saved_value *v;
 		struct stats *stats;
 		u64 metric_total = 0;
+		int source_count;
 
 		if (!strcmp(metric_events[i]->name, "duration_time")) {
 			stats = &walltime_nsecs_stats;
 			scale = 1e-9;
+			source_count = 1;
 		} else {
 			v = saved_value_lookup(metric_events[i], cpu, false,
 					       STAT_NONE, 0, st,
@@ -841,6 +843,7 @@
 				break;
 			stats = &v->stats;
 			scale = 1.0;
+			source_count = evsel__source_count(metric_events[i]);
 
 			if (v->metric_other)
 				metric_total = v->metric_total;
@@ -849,7 +852,9 @@
 		if (!n)
 			return -ENOMEM;
 
-		expr__add_id_val(pctx, n, metric_total ? : avg_stats(stats) * scale);
+		expr__add_id_val_source_count(pctx, n,
+					metric_total ? : avg_stats(stats) * scale,
+					source_count);
 	}
 
 	for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index aa1b7c1..b2ed314 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -274,7 +274,7 @@
 	if (symbol_conf.priv_size) {
 		if (symbol_conf.init_annotation) {
 			struct annotation *notes = (void *)sym;
-			pthread_mutex_init(&notes->lock, NULL);
+			annotation__init(notes);
 		}
 		sym = ((void *)sym) + symbol_conf.priv_size;
 	}
@@ -294,6 +294,13 @@
 
 void symbol__delete(struct symbol *sym)
 {
+	if (symbol_conf.priv_size) {
+		if (symbol_conf.init_annotation) {
+			struct annotation *notes = symbol__annotation(sym);
+
+			annotation__exit(notes);
+		}
+	}
 	free(((void *)sym) - symbol_conf.priv_size);
 }
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1661966..fbf866d 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -40,22 +40,33 @@
 			     GElf_Shdr *shp, const char *name, size_t *idx);
 #endif
 
-/** struct symbol - symtab entry
- *
- * @ignore - resolvable but tools ignore it (e.g. idle routines)
+/**
+ * A symtab entry. When allocated this may be preceded by an annotation (see
+ * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node
+ * to sort by name (see struct symbol_name_rb_node).
  */
 struct symbol {
 	struct rb_node	rb_node;
+	/** Range of symbol [start, end). */
 	u64		start;
 	u64		end;
+	/** Length of the string name. */
 	u16		namelen;
+	/** ELF symbol type as defined for st_info. E.g STT_OBJECT or STT_FUNC. */
 	u8		type:4;
+	/** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
 	u8		binding:4;
+	/** Set true for kernel symbols of idle routines. */
 	u8		idle:1;
+	/** Resolvable but tools ignore it (e.g. idle routines). */
 	u8		ignore:1;
+	/** Symbol for an inlined function. */
 	u8		inlined:1;
+	/** Has symbol__annotate2 been performed. */
+	u8		annotate2:1;
+	/** Architecture specific. Unused except on PPC where it holds st_other. */
 	u8		arch_sym;
-	bool		annotate2;
+	/** The name of length namelen associated with the symbol. */
 	char		name[];
 };
 
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 37a9492..df3c467 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -379,32 +379,32 @@
 	return 0;
 }
 
-const char *perf_tip(const char *dirpath)
+int perf_tip(char **strp, const char *dirpath)
 {
 	struct strlist *tips;
 	struct str_node *node;
-	char *tip = NULL;
 	struct strlist_config conf = {
 		.dirname = dirpath,
 		.file_only = true,
 	};
+	int ret = 0;
 
+	*strp = NULL;
 	tips = strlist__new("tips.txt", &conf);
 	if (tips == NULL)
-		return errno == ENOENT ? NULL :
-			"Tip: check path of tips.txt or get more memory! ;-p";
+		return -errno;
 
 	if (strlist__nr_entries(tips) == 0)
 		goto out;
 
 	node = strlist__entry(tips, random() % strlist__nr_entries(tips));
-	if (asprintf(&tip, "Tip: %s", node->s) < 0)
-		tip = (char *)"Tip: get more memory! ;-)";
+	if (asprintf(strp, "Tip: %s", node->s) < 0)
+		ret = -ENOMEM;
 
 out:
 	strlist__delete(tips);
 
-	return tip;
+	return ret;
 }
 
 char *perf_exe(char *buf, int len)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index ad73705..9f0d36b 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -39,7 +39,7 @@
 #define KVER_FMT	"%d.%d.%d"
 #define KVER_PARAM(x)	KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
 
-const char *perf_tip(const char *dirpath);
+int perf_tip(char **strp, const char *dirpath);
 
 #ifndef HAVE_SCHED_GETCPU_SUPPORT
 int sched_getcpu(void);
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
index 565fccd..016cff4 100644
--- a/tools/testing/radix-tree/linux/lockdep.h
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -1,5 +1,8 @@
 #ifndef _LINUX_LOCKDEP_H
 #define _LINUX_LOCKDEP_H
+
+#include <linux/spinlock.h>
+
 struct lock_class_key {
 	unsigned int a;
 };
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 54b0a41..62fafbe 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -187,7 +187,7 @@
 $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	       \
 		    OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF)     \
-		    BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/		       \
+		    BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/		       \
 		    BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf			       \
 		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&	       \
 		    cp $(RUNQSLOWER_OUTPUT)runqslower $@
diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
new file mode 100644
index 0000000..e1de5f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_helper_restricted.skel.h"
+
+void test_helper_restricted(void)
+{
+	int prog_i = 0, prog_cnt;
+	int duration = 0;
+
+	do {
+		struct test_helper_restricted *test;
+		int maybeOK;
+
+		test = test_helper_restricted__open();
+		if (!ASSERT_OK_PTR(test, "open"))
+			return;
+
+		prog_cnt = test->skeleton->prog_cnt;
+
+		for (int j = 0; j < prog_cnt; ++j) {
+			struct bpf_program *prog = *test->skeleton->progs[j].prog;
+
+			maybeOK = bpf_program__set_autoload(prog, prog_i == j);
+			ASSERT_OK(maybeOK, "set autoload");
+		}
+
+		maybeOK = test_helper_restricted__load(test);
+		CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted");
+
+		test_helper_restricted__destroy(test);
+	} while (++prog_i < prog_cnt);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
new file mode 100644
index 0000000..68d64c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <time.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct timer {
+	struct bpf_timer t;
+};
+
+struct lock {
+	struct bpf_spin_lock l;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct timer);
+} timers SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct lock);
+} locks SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct timer *timer)
+{
+	return 0;
+}
+
+static void timer_work(void)
+{
+	struct timer *timer;
+	const int key = 0;
+
+	timer  = bpf_map_lookup_elem(&timers, &key);
+	if (timer) {
+		bpf_timer_init(&timer->t, &timers, CLOCK_MONOTONIC);
+		bpf_timer_set_callback(&timer->t, timer_cb);
+		bpf_timer_start(&timer->t, 10E9, 0);
+		bpf_timer_cancel(&timer->t);
+	}
+}
+
+static void spin_lock_work(void)
+{
+	const int key = 0;
+	struct lock *lock;
+
+	lock = bpf_map_lookup_elem(&locks, &key);
+	if (lock) {
+		bpf_spin_lock(&lock->l);
+		bpf_spin_unlock(&lock->l);
+	}
+}
+
+SEC("raw_tp/sys_enter")
+int raw_tp_timer(void *ctx)
+{
+	timer_work();
+
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int tp_timer(void *ctx)
+{
+	timer_work();
+
+	return 0;
+}
+
+SEC("kprobe/sys_nanosleep")
+int kprobe_timer(void *ctx)
+{
+	timer_work();
+
+	return 0;
+}
+
+SEC("perf_event")
+int perf_event_timer(void *ctx)
+{
+	timer_work();
+
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int raw_tp_spin_lock(void *ctx)
+{
+	spin_lock_work();
+
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int tp_spin_lock(void *ctx)
+{
+	spin_lock_work();
+
+	return 0;
+}
+
+SEC("kprobe/sys_nanosleep")
+int kprobe_spin_lock(void *ctx)
+{
+	spin_lock_work();
+
+	return 0;
+}
+
+SEC("perf_event")
+int perf_event_spin_lock(void *ctx)
+{
+	spin_lock_work();
+
+	return 0;
+}
+
+const char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 25afe42..465ef3f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -92,6 +92,7 @@
 	int fixup_map_event_output[MAX_FIXUPS];
 	int fixup_map_reuseport_array[MAX_FIXUPS];
 	int fixup_map_ringbuf[MAX_FIXUPS];
+	int fixup_map_timer[MAX_FIXUPS];
 	/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
 	 * Can be a tab-separated sequence of expected strings. An empty string
 	 * means no log verification.
@@ -604,8 +605,15 @@
  *   int cnt;
  *   struct bpf_spin_lock l;
  * };
+ * struct bpf_timer {
+ *   __u64 :64;
+ *   __u64 :64;
+ * } __attribute__((aligned(8)));
+ * struct timer {
+ *   struct bpf_timer t;
+ * };
  */
-static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l";
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t";
 static __u32 btf_raw_types[] = {
 	/* int */
 	BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
@@ -616,6 +624,11 @@
 	BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
 	BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
 	BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+	/* struct bpf_timer */                          /* [4] */
+	BTF_TYPE_ENC(25, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0), 16),
+	/* struct timer */                              /* [5] */
+	BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16),
+	BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
 };
 
 static int load_btf(void)
@@ -696,6 +709,29 @@
 	return fd;
 }
 
+static int create_map_timer(void)
+{
+	struct bpf_create_map_attr attr = {
+		.name = "test_map",
+		.map_type = BPF_MAP_TYPE_ARRAY,
+		.key_size = 4,
+		.value_size = 16,
+		.max_entries = 1,
+		.btf_key_type_id = 1,
+		.btf_value_type_id = 5,
+	};
+	int fd, btf_fd;
+
+	btf_fd = load_btf();
+	if (btf_fd < 0)
+		return -1;
+	attr.btf_fd = btf_fd;
+	fd = bpf_create_map_xattr(&attr);
+	if (fd < 0)
+		printf("Failed to create map with timer\n");
+	return fd;
+}
+
 static char bpf_vlog[UINT_MAX >> 8];
 
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
@@ -722,6 +758,7 @@
 	int *fixup_map_event_output = test->fixup_map_event_output;
 	int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
 	int *fixup_map_ringbuf = test->fixup_map_ringbuf;
+	int *fixup_map_timer = test->fixup_map_timer;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -907,6 +944,13 @@
 			fixup_map_ringbuf++;
 		} while (*fixup_map_ringbuf);
 	}
+	if (*fixup_map_timer) {
+		map_fds[21] = create_map_timer();
+		do {
+			prog[*fixup_map_timer].imm = map_fds[21];
+			fixup_map_timer++;
+		} while (*fixup_map_timer);
+	}
 }
 
 struct libcap {
diff --git a/tools/testing/selftests/bpf/verifier/helper_restricted.c b/tools/testing/selftests/bpf/verifier/helper_restricted.c
new file mode 100644
index 0000000..a067b70
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/helper_restricted.c
@@ -0,0 +1,196 @@
+{
+	"bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE",
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "unknown func bpf_ktime_get_coarse_ns",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_KPROBE,
+},
+{
+	"bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT",
+	.insns = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "unknown func bpf_ktime_get_coarse_ns",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.errstr = "unknown func bpf_ktime_get_coarse_ns",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+},
+{
+	"bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.errstr = "unknown func bpf_ktime_get_coarse_ns",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
+},
+{
+	"bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_EMIT_CALL(BPF_FUNC_timer_init),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_timer = { 3, 8 },
+	.errstr = "tracing progs cannot use bpf_timer yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_KPROBE,
+},
+{
+	"bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_EMIT_CALL(BPF_FUNC_timer_init),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_timer = { 3, 8 },
+	.errstr = "tracing progs cannot use bpf_timer yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+},
+{
+	"bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_EMIT_CALL(BPF_FUNC_timer_init),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_timer = { 3, 8 },
+	.errstr = "tracing progs cannot use bpf_timer yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_EMIT_CALL(BPF_FUNC_timer_init),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_timer = { 3, 8 },
+	.errstr = "tracing progs cannot use bpf_timer yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
+},
+{
+	"bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_spin_lock = { 3 },
+	.errstr = "tracing progs cannot use bpf_spin_lock yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_KPROBE,
+},
+{
+	"bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_spin_lock = { 3 },
+	.errstr = "tracing progs cannot use bpf_spin_lock yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_spin_lock = { 3 },
+	.errstr = "tracing progs cannot use bpf_spin_lock yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+},
+{
+	"bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_spin_lock = { 3 },
+	.errstr = "tracing progs cannot use bpf_spin_lock yet",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_in_map.c b/tools/testing/selftests/bpf/verifier/map_in_map.c
index 2798927..128a348 100644
--- a/tools/testing/selftests/bpf/verifier/map_in_map.c
+++ b/tools/testing/selftests/bpf/verifier/map_in_map.c
@@ -19,6 +19,40 @@
 	.result = ACCEPT,
 },
 {
+	"map in map state pruning",
+	.insns = {
+	BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 11),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_in_map = { 4, 14 },
+	.flags = BPF_F_TEST_STATE_FREQ,
+	.result = VERBOSE_ACCEPT,
+	.errstr = "processed 25 insns",
+	.prog_type = BPF_PROG_TYPE_XDP,
+},
+{
 	"invalid inner map pointer",
 	.insns = {
 	BPF_ST_MEM(0, BPF_REG_10, -4, 0),
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 39f2bbe..d7b312b 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -3,5 +3,6 @@
 TEST_PROGS := gpio-mockup.sh
 TEST_FILES := gpio-mockup-sysfs.sh
 TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev
+CFLAGS += -O2 -g -Wall -I../../../../usr/include/
 
 include ../lib.mk
diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
index e83eac7..d1640f4 100644
--- a/tools/testing/selftests/gpio/gpio-mockup-cdev.c
+++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
@@ -117,7 +117,7 @@
 {
 	char *chip;
 	int opt, ret, cfd, lfd;
-	unsigned int offset, val, abiv;
+	unsigned int offset, val = 0, abiv;
 	uint32_t flags_v1;
 	uint64_t flags_v2;
 
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index d4a8301..3763105 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -23,6 +23,7 @@
 /x86_64/platform_info_test
 /x86_64/set_boot_cpu_id
 /x86_64/set_sregs_test
+/x86_64/sev_migrate_tests
 /x86_64/smm_test
 /x86_64/state_test
 /x86_64/svm_vmcall_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c23e89d..c4e3471 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -73,7 +73,8 @@
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
+TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 5d95113..d890903 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -47,7 +47,7 @@
 #include "guest_modes.h"
 
 /* Global variable used to synchronize all of the vCPU threads. */
-static int iteration = -1;
+static int iteration;
 
 /* Defines what vCPU threads should do during a given iteration. */
 static enum {
@@ -215,12 +215,11 @@
 	return true;
 }
 
-static void *vcpu_thread_main(void *arg)
+static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args)
 {
-	struct perf_test_vcpu_args *vcpu_args = arg;
 	struct kvm_vm *vm = perf_test_args.vm;
 	int vcpu_id = vcpu_args->vcpu_id;
-	int current_iteration = -1;
+	int current_iteration = 0;
 
 	while (spin_wait_for_next_iteration(&current_iteration)) {
 		switch (READ_ONCE(iteration_work)) {
@@ -235,8 +234,6 @@
 
 		vcpu_last_completed_iteration[vcpu_id] = current_iteration;
 	}
-
-	return NULL;
 }
 
 static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
@@ -277,8 +274,7 @@
 static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
 			  const char *description)
 {
-	perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
-	sync_global_to_guest(vm, perf_test_args);
+	perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1);
 	iteration_work = ITERATION_ACCESS_MEMORY;
 	run_iteration(vm, vcpus, description);
 }
@@ -296,48 +292,16 @@
 	run_iteration(vm, vcpus, "Mark memory idle");
 }
 
-static pthread_t *create_vcpu_threads(int vcpus)
-{
-	pthread_t *vcpu_threads;
-	int i;
-
-	vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
-	TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
-
-	for (i = 0; i < vcpus; i++) {
-		vcpu_last_completed_iteration[i] = iteration;
-		pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
-			       &perf_test_args.vcpu_args[i]);
-	}
-
-	return vcpu_threads;
-}
-
-static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
-{
-	int i;
-
-	/* Set done to signal the vCPU threads to exit */
-	done = true;
-
-	for (i = 0; i < vcpus; i++)
-		pthread_join(vcpu_threads[i], NULL);
-}
-
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *params = arg;
 	struct kvm_vm *vm;
-	pthread_t *vcpu_threads;
 	int vcpus = params->vcpus;
 
 	vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1,
-				 params->backing_src);
+				 params->backing_src, !overlap_memory_access);
 
-	perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
-			      !overlap_memory_access);
-
-	vcpu_threads = create_vcpu_threads(vcpus);
+	perf_test_start_vcpu_threads(vcpus, vcpu_thread_main);
 
 	pr_info("\n");
 	access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
@@ -352,8 +316,10 @@
 	mark_memory_idle(vm, vcpus);
 	access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
 
-	terminate_vcpu_threads(vcpu_threads, vcpus);
-	free(vcpu_threads);
+	/* Set done to signal the vCPU threads to exit */
+	done = true;
+
+	perf_test_join_vcpu_threads(vcpus);
 	perf_test_destroy_vm(vm);
 }
 
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 1510b21..6a719d0 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -42,10 +42,9 @@
 static size_t demand_paging_size;
 static char *guest_data_prototype;
 
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 {
 	int ret;
-	struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
 	int vcpu_id = vcpu_args->vcpu_id;
 	struct kvm_vm *vm = perf_test_args.vm;
 	struct kvm_run *run;
@@ -68,8 +67,6 @@
 	ts_diff = timespec_elapsed(start);
 	PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
 		       ts_diff.tv_sec, ts_diff.tv_nsec);
-
-	return NULL;
 }
 
 static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
@@ -282,7 +279,6 @@
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *p = arg;
-	pthread_t *vcpu_threads;
 	pthread_t *uffd_handler_threads = NULL;
 	struct uffd_handler_args *uffd_args = NULL;
 	struct timespec start;
@@ -293,9 +289,7 @@
 	int r;
 
 	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
-				 p->src_type);
-
-	perf_test_args.wr_fract = 1;
+				 p->src_type, p->partition_vcpu_memory_access);
 
 	demand_paging_size = get_backing_src_pagesz(p->src_type);
 
@@ -304,12 +298,6 @@
 		    "Failed to allocate buffer for guest data pattern");
 	memset(guest_data_prototype, 0xAB, demand_paging_size);
 
-	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
-	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
-	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
-			      p->partition_vcpu_memory_access);
-
 	if (p->uffd_mode) {
 		uffd_handler_threads =
 			malloc(nr_vcpus * sizeof(*uffd_handler_threads));
@@ -322,26 +310,15 @@
 		TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
 
 		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-			vm_paddr_t vcpu_gpa;
+			struct perf_test_vcpu_args *vcpu_args;
 			void *vcpu_hva;
 			void *vcpu_alias;
-			uint64_t vcpu_mem_size;
 
-
-			if (p->partition_vcpu_memory_access) {
-				vcpu_gpa = guest_test_phys_mem +
-					   (vcpu_id * guest_percpu_mem_size);
-				vcpu_mem_size = guest_percpu_mem_size;
-			} else {
-				vcpu_gpa = guest_test_phys_mem;
-				vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
-			}
-			PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-				       vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
+			vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
 
 			/* Cache the host addresses of the region */
-			vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
-			vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
+			vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
+			vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
 
 			/*
 			 * Set up user fault fd to handle demand paging
@@ -355,32 +332,18 @@
 					    pipefds[vcpu_id * 2], p->uffd_mode,
 					    p->uffd_delay, &uffd_args[vcpu_id],
 					    vcpu_hva, vcpu_alias,
-					    vcpu_mem_size);
+					    vcpu_args->pages * perf_test_args.guest_page_size);
 		}
 	}
 
-	/* Export the shared variables to the guest */
-	sync_global_to_guest(vm, perf_test_args);
-
 	pr_info("Finished creating vCPUs and starting uffd threads\n");
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
-
-	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-			       &perf_test_args.vcpu_args[vcpu_id]);
-	}
-
+	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 	pr_info("Started all vCPUs\n");
 
-	/* Wait for the vcpu threads to quit */
-	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-		pthread_join(vcpu_threads[vcpu_id], NULL);
-		PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
-	}
-
+	perf_test_join_vcpu_threads(nr_vcpus);
 	ts_diff = timespec_elapsed(start);
-
 	pr_info("All vCPU threads joined\n");
 
 	if (p->uffd_mode) {
@@ -404,7 +367,6 @@
 	perf_test_destroy_vm(vm);
 
 	free(guest_data_prototype);
-	free(vcpu_threads);
 	if (p->uffd_mode) {
 		free(uffd_handler_threads);
 		free(uffd_args);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 7ffab5b..1954b96 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -31,7 +31,7 @@
 static int iteration;
 static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 {
 	int ret;
 	struct kvm_vm *vm = perf_test_args.vm;
@@ -41,7 +41,6 @@
 	struct timespec ts_diff;
 	struct timespec total = (struct timespec){0};
 	struct timespec avg;
-	struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
 	int vcpu_id = vcpu_args->vcpu_id;
 
 	run = vcpu_state(vm, vcpu_id);
@@ -83,8 +82,6 @@
 	pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
 		vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
 		total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-
-	return NULL;
 }
 
 struct test_params {
@@ -170,7 +167,6 @@
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *p = arg;
-	pthread_t *vcpu_threads;
 	struct kvm_vm *vm;
 	unsigned long **bitmaps;
 	uint64_t guest_num_pages;
@@ -186,9 +182,10 @@
 	struct timespec clear_dirty_log_total = (struct timespec){0};
 
 	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
-				 p->slots, p->backing_src);
+				 p->slots, p->backing_src,
+				 p->partition_vcpu_memory_access);
 
-	perf_test_args.wr_fract = p->wr_fract;
+	perf_test_set_wr_fract(vm, p->wr_fract);
 
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
@@ -203,25 +200,15 @@
 		vm_enable_cap(vm, &cap);
 	}
 
-	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
-	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
-	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
-			      p->partition_vcpu_memory_access);
-
-	sync_global_to_guest(vm, perf_test_args);
-
 	/* Start the iterations */
 	iteration = 0;
 	host_quit = false;
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
 		vcpu_last_completed_iteration[vcpu_id] = -1;
 
-		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-			       &perf_test_args.vcpu_args[vcpu_id]);
-	}
+	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
 	/* Allow the vCPUs to populate memory */
 	pr_debug("Starting iteration %d - Populating\n", iteration);
@@ -290,8 +277,7 @@
 
 	/* Tell the vcpu thread to quit */
 	host_quit = true;
-	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
-		pthread_join(vcpu_threads[vcpu_id], NULL);
+	perf_test_join_vcpu_threads(nr_vcpus);
 
 	avg = timespec_div(get_dirty_log_total, p->iterations);
 	pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
@@ -306,7 +292,6 @@
 	}
 
 	free_bitmaps(bitmaps, p->slots);
-	free(vcpu_threads);
 	perf_test_destroy_vm(vm);
 }
 
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 792c60e1..3fcd89e 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -115,7 +115,7 @@
 			addr = guest_test_virt_mem;
 			addr += (READ_ONCE(random_array[i]) % guest_num_pages)
 				* guest_page_size;
-			addr &= ~(host_page_size - 1);
+			addr = align_down(addr, host_page_size);
 			*(uint64_t *)addr = READ_ONCE(iteration);
 		}
 
@@ -737,14 +737,14 @@
 	if (!p->phys_offset) {
 		guest_test_phys_mem = (vm_get_max_gfn(vm) -
 				       guest_num_pages) * guest_page_size;
-		guest_test_phys_mem &= ~(host_page_size - 1);
+		guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size);
 	} else {
 		guest_test_phys_mem = p->phys_offset;
 	}
 
 #ifdef __s390x__
 	/* Align to 1M (segment size) */
-	guest_test_phys_mem &= ~((1 << 20) - 1);
+	guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
 #endif
 
 	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index f6b3794..6a1a37f 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -82,6 +82,7 @@
 };
 extern const struct vm_guest_mode_params vm_guest_mode_params[];
 
+int open_path_or_exit(const char *path, int flags);
 int open_kvm_dev_path_or_exit(void);
 int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index df9f1a3..a86f953 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -8,6 +8,8 @@
 #ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
 #define SELFTEST_KVM_PERF_TEST_UTIL_H
 
+#include <pthread.h>
+
 #include "kvm_util.h"
 
 /* Default guest test virtual memory offset */
@@ -18,6 +20,7 @@
 #define PERF_TEST_MEM_SLOT_INDEX	1
 
 struct perf_test_vcpu_args {
+	uint64_t gpa;
 	uint64_t gva;
 	uint64_t pages;
 
@@ -27,7 +30,7 @@
 
 struct perf_test_args {
 	struct kvm_vm *vm;
-	uint64_t host_page_size;
+	uint64_t gpa;
 	uint64_t guest_page_size;
 	int wr_fract;
 
@@ -36,19 +39,15 @@
 
 extern struct perf_test_args perf_test_args;
 
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-extern uint64_t guest_test_phys_mem;
-
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 				   uint64_t vcpu_memory_bytes, int slots,
-				   enum vm_mem_backing_src_type backing_src);
+				   enum vm_mem_backing_src_type backing_src,
+				   bool partition_vcpu_memory_access);
 void perf_test_destroy_vm(struct kvm_vm *vm);
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
-			   uint64_t vcpu_memory_bytes,
-			   bool partition_vcpu_memory_access);
+
+void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
+
+void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
+void perf_test_join_vcpu_threads(int vcpus);
 
 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index f8fddc8..99e0dcd 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -104,6 +104,7 @@
 size_t get_def_hugetlb_pagesz(void);
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
 size_t get_backing_src_pagesz(uint32_t i);
+bool is_backing_src_hugetlb(uint32_t i);
 void backing_src_help(const char *flag);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 long get_run_delay(void);
@@ -117,4 +118,29 @@
 	return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
 }
 
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static inline uint64_t align_up(uint64_t x, uint64_t size)
+{
+	uint64_t mask = size - 1;
+
+	TEST_ASSERT(size != 0 && !(size & (size - 1)),
+		    "size not a power of 2: %lu", size);
+	return ((x + mask) & ~mask);
+}
+
+static inline uint64_t align_down(uint64_t x, uint64_t size)
+{
+	uint64_t x_aligned_up = align_up(x, size);
+
+	if (x == x_aligned_up)
+		return x;
+	else
+		return x_aligned_up - size;
+}
+
+static inline void *align_ptr_up(void *x, size_t size)
+{
+	return (void *)align_up((unsigned long)x, size);
+}
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index b7531c8..587fbe4 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -46,4 +46,6 @@
 	return ecx & CPUID_SVM;
 }
 
+int open_sev_dev_path_or_exit(void);
+
 #endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index f968dfd..aed9dc3 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -12,6 +12,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/resource.h>
 
 #include "test_util.h"
 
@@ -40,11 +41,40 @@
 {
 	int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
 	int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+	/*
+	 * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
+	 * an arbitrary number for everything else.
+	 */
+	int nr_fds_wanted = kvm_max_vcpus + 100;
+	struct rlimit rl;
 
 	pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
 	pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
 
 	/*
+	 * Check that we're allowed to open nr_fds_wanted file descriptors and
+	 * try raising the limits if needed.
+	 */
+	TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+	if (rl.rlim_cur < nr_fds_wanted) {
+		rl.rlim_cur = nr_fds_wanted;
+		if (rl.rlim_max < nr_fds_wanted) {
+			int old_rlim_max = rl.rlim_max;
+			rl.rlim_max = nr_fds_wanted;
+
+			int r = setrlimit(RLIMIT_NOFILE, &rl);
+			if (r < 0) {
+				printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
+				       old_rlim_max, nr_fds_wanted);
+				exit(KSFT_SKIP);
+			}
+		} else {
+			TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+		}
+	}
+
+	/*
 	 * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
 	 * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
 	 * in this case.
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 36407cb..ba1fdc3 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -280,7 +280,7 @@
 #ifdef __s390x__
 	alignment = max(0x100000, alignment);
 #endif
-	guest_test_phys_mem &= ~(alignment - 1);
+	guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
 
 	/* Set up the shared data structure test_args */
 	test_args.vm = vm;
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index eac44f5..13e8e3d 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -157,8 +157,7 @@
 			"memsize of 0,\n"
 			"  phdr index: %u p_memsz: 0x%" PRIx64,
 			n1, (uint64_t) phdr.p_memsz);
-		vm_vaddr_t seg_vstart = phdr.p_vaddr;
-		seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+		vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
 		vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
 		seg_vend |= vm->page_size - 1;
 		size_t seg_size = seg_vend - seg_vstart + 1;
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 041004c..8f2e0bb 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -22,13 +22,17 @@
 
 static int vcpu_mmap_sz(void);
 
-/* Aligns x up to the next multiple of size. Size must be a power of 2. */
-static void *align(void *x, size_t size)
+int open_path_or_exit(const char *path, int flags)
 {
-	size_t mask = size - 1;
-	TEST_ASSERT(size != 0 && !(size & (size - 1)),
-		    "size not a power of 2: %lu", size);
-	return (void *) (((size_t) x + mask) & ~mask);
+	int fd;
+
+	fd = open(path, flags);
+	if (fd < 0) {
+		print_skip("%s not available (errno: %d)", path, errno);
+		exit(KSFT_SKIP);
+	}
+
+	return fd;
 }
 
 /*
@@ -42,16 +46,7 @@
  */
 static int _open_kvm_dev_path_or_exit(int flags)
 {
-	int fd;
-
-	fd = open(KVM_DEV_PATH, flags);
-	if (fd < 0) {
-		print_skip("%s not available, is KVM loaded? (errno: %d)",
-			   KVM_DEV_PATH, errno);
-		exit(KSFT_SKIP);
-	}
-
-	return fd;
+	return open_path_or_exit(KVM_DEV_PATH, flags);
 }
 
 int open_kvm_dev_path_or_exit(void)
@@ -187,15 +182,15 @@
 }
 
 const struct vm_guest_mode_params vm_guest_mode_params[] = {
-	{ 52, 48,  0x1000, 12 },
-	{ 52, 48, 0x10000, 16 },
-	{ 48, 48,  0x1000, 12 },
-	{ 48, 48, 0x10000, 16 },
-	{ 40, 48,  0x1000, 12 },
-	{ 40, 48, 0x10000, 16 },
-	{  0,  0,  0x1000, 12 },
-	{ 47, 64,  0x1000, 12 },
-	{ 44, 64,  0x1000, 12 },
+	[VM_MODE_P52V48_4K]	= { 52, 48,  0x1000, 12 },
+	[VM_MODE_P52V48_64K]	= { 52, 48, 0x10000, 16 },
+	[VM_MODE_P48V48_4K]	= { 48, 48,  0x1000, 12 },
+	[VM_MODE_P48V48_64K]	= { 48, 48, 0x10000, 16 },
+	[VM_MODE_P40V48_4K]	= { 40, 48,  0x1000, 12 },
+	[VM_MODE_P40V48_64K]	= { 40, 48, 0x10000, 16 },
+	[VM_MODE_PXXV48_4K]	= {  0,  0,  0x1000, 12 },
+	[VM_MODE_P47V64_4K]	= { 47, 64,  0x1000, 12 },
+	[VM_MODE_P44V64_4K]	= { 44, 64,  0x1000, 12 },
 };
 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
 	       "Missing new mode params?");
@@ -875,9 +870,17 @@
 	alignment = 1;
 #endif
 
+	/*
+	 * When using THP mmap is not guaranteed to returned a hugepage aligned
+	 * address so we have to pad the mmap. Padding is not needed for HugeTLB
+	 * because mmap will always return an address aligned to the HugeTLB
+	 * page size.
+	 */
 	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
 		alignment = max(backing_src_pagesz, alignment);
 
+	ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+
 	/* Add enough memory to align up if necessary */
 	if (alignment > 1)
 		region->mmap_size += alignment;
@@ -910,8 +913,13 @@
 		    "test_malloc failed, mmap_start: %p errno: %i",
 		    region->mmap_start, errno);
 
+	TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
+		    region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
+		    "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
+		    region->mmap_start, backing_src_pagesz);
+
 	/* Align host address */
-	region->host_mem = align(region->mmap_start, alignment);
+	region->host_mem = align_ptr_up(region->mmap_start, alignment);
 
 	/* As needed perform madvise */
 	if ((src_type == VM_MEM_SRC_ANONYMOUS ||
@@ -954,7 +962,7 @@
 			    "mmap of alias failed, errno: %i", errno);
 
 		/* Align host alias address */
-		region->host_alias = align(region->mmap_alias, alignment);
+		region->host_alias = align_ptr_up(region->mmap_alias, alignment);
 	}
 }
 
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 0ef80db..722df3a 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -10,21 +10,40 @@
 
 struct perf_test_args perf_test_args;
 
-uint64_t guest_test_phys_mem;
-
 /*
  * Guest virtual memory offset of the testing memory slot.
  * Must not conflict with identity mapped test code.
  */
 static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
 
+struct vcpu_thread {
+	/* The id of the vCPU. */
+	int vcpu_id;
+
+	/* The pthread backing the vCPU. */
+	pthread_t thread;
+
+	/* Set to true once the vCPU thread is up and running. */
+	bool running;
+};
+
+/* The vCPU threads involved in this test. */
+static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS];
+
+/* The function run by each vCPU thread, as provided by the test. */
+static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *);
+
+/* Set to true once all vCPU threads are up and running. */
+static bool all_vcpu_threads_running;
+
 /*
  * Continuously write to the first 8 bytes of each page in the
  * specified region.
  */
 static void guest_code(uint32_t vcpu_id)
 {
-	struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+	struct perf_test_args *pta = &perf_test_args;
+	struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
 	uint64_t gva;
 	uint64_t pages;
 	int i;
@@ -37,9 +56,9 @@
 
 	while (true) {
 		for (i = 0; i < pages; i++) {
-			uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+			uint64_t addr = gva + (i * pta->guest_page_size);
 
-			if (i % perf_test_args.wr_fract == 0)
+			if (i % pta->wr_fract == 0)
 				*(uint64_t *)addr = 0x0123456789ABCDEF;
 			else
 				READ_ONCE(*(uint64_t *)addr);
@@ -49,35 +68,81 @@
 	}
 }
 
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+			   uint64_t vcpu_memory_bytes,
+			   bool partition_vcpu_memory_access)
+{
+	struct perf_test_args *pta = &perf_test_args;
+	struct perf_test_vcpu_args *vcpu_args;
+	int vcpu_id;
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		vcpu_args = &pta->vcpu_args[vcpu_id];
+
+		vcpu_args->vcpu_id = vcpu_id;
+		if (partition_vcpu_memory_access) {
+			vcpu_args->gva = guest_test_virt_mem +
+					 (vcpu_id * vcpu_memory_bytes);
+			vcpu_args->pages = vcpu_memory_bytes /
+					   pta->guest_page_size;
+			vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes);
+		} else {
+			vcpu_args->gva = guest_test_virt_mem;
+			vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
+					   pta->guest_page_size;
+			vcpu_args->gpa = pta->gpa;
+		}
+
+		vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+			 vcpu_id, vcpu_args->gpa, vcpu_args->gpa +
+			 (vcpu_args->pages * pta->guest_page_size));
+	}
+}
+
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 				   uint64_t vcpu_memory_bytes, int slots,
-				   enum vm_mem_backing_src_type backing_src)
+				   enum vm_mem_backing_src_type backing_src,
+				   bool partition_vcpu_memory_access)
 {
+	struct perf_test_args *pta = &perf_test_args;
 	struct kvm_vm *vm;
 	uint64_t guest_num_pages;
+	uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
 	int i;
 
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-	perf_test_args.host_page_size = getpagesize();
-	perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
+	/* By default vCPUs will write to memory. */
+	pta->wr_fract = 1;
+
+	/*
+	 * Snapshot the non-huge page size.  This is used by the guest code to
+	 * access/dirty pages at the logging granularity.
+	 */
+	pta->guest_page_size = vm_guest_mode_params[mode].page_size;
 
 	guest_num_pages = vm_adjust_num_guest_pages(mode,
-				(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
+				(vcpus * vcpu_memory_bytes) / pta->guest_page_size);
 
-	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+	TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
 		    "Guest memory size is not host page size aligned.");
-	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+	TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0,
 		    "Guest memory size is not guest page size aligned.");
 	TEST_ASSERT(guest_num_pages % slots == 0,
 		    "Guest memory cannot be evenly divided into %d slots.",
 		    slots);
 
+	/*
+	 * Pass guest_num_pages to populate the page tables for test memory.
+	 * The memory is also added to memslot 0, but that's a benign side
+	 * effect as KVM allows aliasing HVAs in meslots.
+	 */
 	vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
-				  (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
-				  0, guest_code, NULL);
+				  guest_num_pages, 0, guest_code, NULL);
 
-	perf_test_args.vm = vm;
+	pta->vm = vm;
 
 	/*
 	 * If there should be more memory in the guest test region than there
@@ -90,20 +155,18 @@
 		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
 		    vcpu_memory_bytes);
 
-	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
-			      perf_test_args.guest_page_size;
-	guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+	pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
+	pta->gpa = align_down(pta->gpa, backing_src_pagesz);
 #ifdef __s390x__
 	/* Align to 1M (segment size) */
-	guest_test_phys_mem &= ~((1 << 20) - 1);
+	pta->gpa = align_down(pta->gpa, 1 << 20);
 #endif
-	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+	pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
 
 	/* Add extra memory slots for testing */
 	for (i = 0; i < slots; i++) {
 		uint64_t region_pages = guest_num_pages / slots;
-		vm_paddr_t region_start = guest_test_phys_mem +
-			region_pages * perf_test_args.guest_page_size * i;
+		vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i;
 
 		vm_userspace_mem_region_add(vm, backing_src, region_start,
 					    PERF_TEST_MEM_SLOT_INDEX + i,
@@ -111,10 +174,15 @@
 	}
 
 	/* Do mapping for the demand paging memory slot */
-	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
+	virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages);
+
+	perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
 
 	ucall_init(vm, NULL);
 
+	/* Export the shared variables to the guest. */
+	sync_global_to_guest(vm, perf_test_args);
+
 	return vm;
 }
 
@@ -124,36 +192,60 @@
 	kvm_vm_free(vm);
 }
 
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
-			   uint64_t vcpu_memory_bytes,
-			   bool partition_vcpu_memory_access)
+void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
 {
-	vm_paddr_t vcpu_gpa;
-	struct perf_test_vcpu_args *vcpu_args;
+	perf_test_args.wr_fract = wr_fract;
+	sync_global_to_guest(vm, perf_test_args);
+}
+
+static void *vcpu_thread_main(void *data)
+{
+	struct vcpu_thread *vcpu = data;
+
+	WRITE_ONCE(vcpu->running, true);
+
+	/*
+	 * Wait for all vCPU threads to be up and running before calling the test-
+	 * provided vCPU thread function. This prevents thread creation (which
+	 * requires taking the mmap_sem in write mode) from interfering with the
+	 * guest faulting in its memory.
+	 */
+	while (!READ_ONCE(all_vcpu_threads_running))
+		;
+
+	vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]);
+
+	return NULL;
+}
+
+void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *))
+{
 	int vcpu_id;
 
+	vcpu_thread_fn = vcpu_fn;
+	WRITE_ONCE(all_vcpu_threads_running, false);
+
 	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
-		vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+		struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id];
 
-		vcpu_args->vcpu_id = vcpu_id;
-		if (partition_vcpu_memory_access) {
-			vcpu_args->gva = guest_test_virt_mem +
-					 (vcpu_id * vcpu_memory_bytes);
-			vcpu_args->pages = vcpu_memory_bytes /
-					   perf_test_args.guest_page_size;
-			vcpu_gpa = guest_test_phys_mem +
-				   (vcpu_id * vcpu_memory_bytes);
-		} else {
-			vcpu_args->gva = guest_test_virt_mem;
-			vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
-					   perf_test_args.guest_page_size;
-			vcpu_gpa = guest_test_phys_mem;
-		}
+		vcpu->vcpu_id = vcpu_id;
+		WRITE_ONCE(vcpu->running, false);
 
-		vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
-
-		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-			 vcpu_id, vcpu_gpa, vcpu_gpa +
-			 (vcpu_args->pages * perf_test_args.guest_page_size));
+		pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
 	}
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		while (!READ_ONCE(vcpu_threads[vcpu_id].running))
+			;
+	}
+
+	WRITE_ONCE(all_vcpu_threads_running, true);
+}
+
+void perf_test_join_vcpu_threads(int vcpus)
+{
+	int vcpu_id;
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+		pthread_join(vcpu_threads[vcpu_id].thread, NULL);
 }
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index b724291..6d23878 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -283,6 +283,11 @@
 	}
 }
 
+bool is_backing_src_hugetlb(uint32_t i)
+{
+	return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB);
+}
+
 static void print_available_backing_src_types(const char *prefix)
 {
 	int i;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 161eba7..0ebc03c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -13,6 +13,8 @@
 #include "processor.h"
 #include "svm_util.h"
 
+#define SEV_DEV_PATH "/dev/sev"
+
 struct gpr64_regs guest_regs;
 u64 rflags;
 
@@ -172,3 +174,14 @@
 		exit(KSFT_SKIP);
 	}
 }
+
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ *   The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
+{
+	return open_path_or_exit(SEV_DEV_PATH, 0);
+}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 4cfcafe..1410d0a 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -36,11 +36,9 @@
 
 static bool run_vcpus = true;
 
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 {
 	int ret;
-	struct perf_test_vcpu_args *vcpu_args =
-		(struct perf_test_vcpu_args *)data;
 	int vcpu_id = vcpu_args->vcpu_id;
 	struct kvm_vm *vm = perf_test_args.vm;
 	struct kvm_run *run;
@@ -59,8 +57,6 @@
 			    "Invalid guest sync status: exit_reason=%s\n",
 			    exit_reason_str(run->exit_reason));
 	}
-
-	return NULL;
 }
 
 struct memslot_antagonist_args {
@@ -80,7 +76,7 @@
 	 * Add the dummy memslot just below the perf_test_util memslot, which is
 	 * at the top of the guest physical address space.
 	 */
-	gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
+	gpa = perf_test_args.gpa - pages * vm_get_page_size(vm);
 
 	for (i = 0; i < nr_modifications; i++) {
 		usleep(delay);
@@ -100,29 +96,15 @@
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *p = arg;
-	pthread_t *vcpu_threads;
 	struct kvm_vm *vm;
-	int vcpu_id;
 
 	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
-				 VM_MEM_SRC_ANONYMOUS);
-
-	perf_test_args.wr_fract = 1;
-
-	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
-	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
-	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
-			      p->partition_vcpu_memory_access);
-
-	/* Export the shared variables to the guest */
-	sync_global_to_guest(vm, perf_test_args);
+				 VM_MEM_SRC_ANONYMOUS,
+				 p->partition_vcpu_memory_access);
 
 	pr_info("Finished creating vCPUs\n");
 
-	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
-		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-			       &perf_test_args.vcpu_args[vcpu_id]);
+	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
 	pr_info("Started all vCPUs\n");
 
@@ -131,16 +113,10 @@
 
 	run_vcpus = false;
 
-	/* Wait for the vcpu threads to quit */
-	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
-		pthread_join(vcpu_threads[vcpu_id], NULL);
-
+	perf_test_join_vcpu_threads(nr_vcpus);
 	pr_info("All vCPU threads joined\n");
 
-	ucall_uninit(vm);
-	kvm_vm_free(vm);
-
-	free(vcpu_threads);
+	perf_test_destroy_vm(vm);
 }
 
 static void help(char *name)
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 91d88aa..672915c 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -165,10 +165,10 @@
 	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
 }
 
-static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
-				   struct kvm_cpuid2 *best)
+static void guest_test_msrs_access(void)
 {
 	struct kvm_run *run;
+	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0, r;
 	struct kvm_cpuid_entry2 feat = {
@@ -180,11 +180,34 @@
 	struct kvm_cpuid_entry2 dbg = {
 		.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
 	};
-	struct kvm_enable_cap cap = {0};
-
-	run = vcpu_state(vm, VCPU_ID);
+	struct kvm_cpuid2 *best;
+	vm_vaddr_t msr_gva;
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+		.args = {1}
+	};
+	struct msr_data *msr;
 
 	while (true) {
+		vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+		msr_gva = vm_vaddr_alloc_page(vm);
+		memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+		msr = addr_gva2hva(vm, msr_gva);
+
+		vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+		vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+		vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+		best = kvm_get_supported_hv_cpuid();
+
+		vm_init_descriptor_tables(vm);
+		vcpu_init_descriptor_tables(vm, VCPU_ID);
+		vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+		run = vcpu_state(vm, VCPU_ID);
+
 		switch (stage) {
 		case 0:
 			/*
@@ -315,6 +338,7 @@
 			 * capability enabled and guest visible CPUID bit unset.
 			 */
 			cap.cap = KVM_CAP_HYPERV_SYNIC2;
+			cap.args[0] = 0;
 			vcpu_enable_cap(vm, VCPU_ID, &cap);
 			break;
 		case 22:
@@ -461,9 +485,9 @@
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_SYNC:
-			TEST_ASSERT(uc.args[1] == stage,
-				    "Unexpected stage: %ld (%d expected)\n",
-				    uc.args[1], stage);
+			TEST_ASSERT(uc.args[1] == 0,
+				    "Unexpected stage: %ld (0 expected)\n",
+				    uc.args[1]);
 			break;
 		case UCALL_ABORT:
 			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -474,13 +498,14 @@
 		}
 
 		stage++;
+		kvm_vm_free(vm);
 	}
 }
 
-static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
-				     void *input, void *output, struct kvm_cpuid2 *best)
+static void guest_test_hcalls_access(void)
 {
 	struct kvm_run *run;
+	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0, r;
 	struct kvm_cpuid_entry2 feat = {
@@ -493,10 +518,38 @@
 	struct kvm_cpuid_entry2 dbg = {
 		.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
 	};
-
-	run = vcpu_state(vm, VCPU_ID);
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+		.args = {1}
+	};
+	vm_vaddr_t hcall_page, hcall_params;
+	struct hcall_data *hcall;
+	struct kvm_cpuid2 *best;
 
 	while (true) {
+		vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+		vm_init_descriptor_tables(vm);
+		vcpu_init_descriptor_tables(vm, VCPU_ID);
+		vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+		/* Hypercall input/output */
+		hcall_page = vm_vaddr_alloc_pages(vm, 2);
+		hcall = addr_gva2hva(vm, hcall_page);
+		memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+		hcall_params = vm_vaddr_alloc_page(vm);
+		memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+
+		vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+		vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+		vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+		best = kvm_get_supported_hv_cpuid();
+
+		run = vcpu_state(vm, VCPU_ID);
+
 		switch (stage) {
 		case 0:
 			hcall->control = 0xdeadbeef;
@@ -606,9 +659,9 @@
 
 		switch (get_ucall(vm, VCPU_ID, &uc)) {
 		case UCALL_SYNC:
-			TEST_ASSERT(uc.args[1] == stage,
-				    "Unexpected stage: %ld (%d expected)\n",
-				    uc.args[1], stage);
+			TEST_ASSERT(uc.args[1] == 0,
+				    "Unexpected stage: %ld (0 expected)\n",
+				    uc.args[1]);
 			break;
 		case UCALL_ABORT:
 			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -619,66 +672,15 @@
 		}
 
 		stage++;
+		kvm_vm_free(vm);
 	}
 }
 
 int main(void)
 {
-	struct kvm_cpuid2 *best;
-	struct kvm_vm *vm;
-	vm_vaddr_t msr_gva, hcall_page, hcall_params;
-	struct kvm_enable_cap cap = {
-		.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
-		.args = {1}
-	};
-
-	/* Test MSRs */
-	vm = vm_create_default(VCPU_ID, 0, guest_msr);
-
-	msr_gva = vm_vaddr_alloc_page(vm);
-	memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
-	vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
-	vcpu_enable_cap(vm, VCPU_ID, &cap);
-
-	vcpu_set_hv_cpuid(vm, VCPU_ID);
-
-	best = kvm_get_supported_hv_cpuid();
-
-	vm_init_descriptor_tables(vm);
-	vcpu_init_descriptor_tables(vm, VCPU_ID);
-	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
 	pr_info("Testing access to Hyper-V specific MSRs\n");
-	guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
-			       best);
-	kvm_vm_free(vm);
-
-	/* Test hypercalls */
-	vm = vm_create_default(VCPU_ID, 0, guest_hcall);
-
-	vm_init_descriptor_tables(vm);
-	vcpu_init_descriptor_tables(vm, VCPU_ID);
-	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-
-	/* Hypercall input/output */
-	hcall_page = vm_vaddr_alloc_pages(vm, 2);
-	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-	hcall_params = vm_vaddr_alloc_page(vm);
-	memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
-
-	vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
-	vcpu_enable_cap(vm, VCPU_ID, &cap);
-
-	vcpu_set_hv_cpuid(vm, VCPU_ID);
-
-	best = kvm_get_supported_hv_cpuid();
+	guest_test_msrs_access();
 
 	pr_info("Testing access to Hyper-V hypercalls\n");
-	guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
-				 addr_gva2hva(vm, hcall_page),
-				 addr_gva2hva(vm, hcall_page) + getpagesize(),
-				 best);
-
-	kvm_vm_free(vm);
+	guest_test_hcalls_access();
 }
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
new file mode 100644
index 0000000..29b18d5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+#include "../lib/kvm_util_internal.h"
+
+#define SEV_POLICY_ES 0b100
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+	struct kvm_sev_cmd cmd = {
+		.id = cmd_id,
+		.data = (uint64_t)data,
+		.sev_fd = open_sev_dev_path_or_exit(),
+	};
+	int ret;
+
+	ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+	TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS),
+		    "%d failed: return code: %d, errno: %d, fw error: %d",
+		    cmd_id, ret, errno, cmd.error);
+}
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+	struct kvm_vm *vm;
+	struct kvm_sev_launch_start start = { 0 };
+	int i;
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+	sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
+	for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+		vm_vcpu_add(vm, i);
+	if (es)
+		start.policy |= SEV_POLICY_ES;
+	sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
+	if (es)
+		sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+	return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+	struct kvm_vm *vm;
+	int i;
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+	if (!with_vcpus)
+		return vm;
+
+	for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+		vm_vcpu_add(vm, i);
+
+	return vm;
+}
+
+static int __sev_migrate_from(int dst_fd, int src_fd)
+{
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
+		.args = { src_fd }
+	};
+
+	return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+}
+
+
+static void sev_migrate_from(int dst_fd, int src_fd)
+{
+	int ret;
+
+	ret = __sev_migrate_from(dst_fd, src_fd);
+	TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+	struct kvm_vm *src_vm;
+	struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+	int i, ret;
+
+	src_vm = sev_vm_create(es);
+	for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+		dst_vms[i] = aux_vm_create(true);
+
+	/* Initial migration from the src to the first dst. */
+	sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
+
+	for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+		sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
+
+	/* Migrate the guest back to the original VM. */
+	ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
+	TEST_ASSERT(ret == -1 && errno == EIO,
+		    "VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
+		    errno);
+
+	kvm_vm_free(src_vm);
+	for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+		kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+	struct kvm_vm *vm;
+	int source_fds[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+	int i, j;
+	struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+	for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+		j = i % NR_LOCK_TESTING_THREADS;
+		__sev_migrate_from(input->vm->fd, input->source_fds[j]);
+	}
+
+	return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+	struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+	pthread_t pt[NR_LOCK_TESTING_THREADS];
+	int i;
+
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+		input[i].vm = sev_vm_create(/* es= */ false);
+		input[0].source_fds[i] = input[i].vm->fd;
+	}
+	for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+		memcpy(input[i].source_fds, input[0].source_fds,
+		       sizeof(input[i].source_fds));
+
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+		pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+		pthread_join(pt[i], NULL);
+	for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+		kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+	struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+		*sev_es_vm_no_vmsa;
+	int ret;
+
+	sev_vm = sev_vm_create(/* es= */ false);
+	sev_es_vm = sev_vm_create(/* es= */ true);
+	vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+	vm_no_sev = aux_vm_create(true);
+	sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+	sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
+	vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+	ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_migrate_from(sev_es_vm->fd, sev_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm_no_vmsa->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd);
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
+		    errno);
+
+	kvm_vm_free(sev_vm);
+	kvm_vm_free(sev_es_vm);
+	kvm_vm_free(sev_es_vm_no_vmsa);
+	kvm_vm_free(vm_no_vcpu);
+	kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(int dst_fd, int src_fd)
+{
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM,
+		.args = { src_fd }
+	};
+
+	return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+}
+
+
+static void sev_mirror_create(int dst_fd, int src_fd)
+{
+	int ret;
+
+	ret = __sev_mirror_create(dst_fd, src_fd);
+	TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
+}
+
+static void test_sev_mirror(bool es)
+{
+	struct kvm_vm *src_vm, *dst_vm;
+	struct kvm_sev_launch_start start = {
+		.policy = es ? SEV_POLICY_ES : 0
+	};
+	int i;
+
+	src_vm = sev_vm_create(es);
+	dst_vm = aux_vm_create(false);
+
+	sev_mirror_create(dst_vm->fd, src_vm->fd);
+
+	/* Check that we can complete creation of the mirror VM.  */
+	for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+		vm_vcpu_add(dst_vm, i);
+	sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start);
+	if (es)
+		sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+	kvm_vm_free(src_vm);
+	kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+	struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+	int ret;
+
+	sev_vm = sev_vm_create(/* es= */ false);
+	sev_es_vm = sev_vm_create(/* es= */ true);
+	vm_with_vcpu = aux_vm_create(true);
+	vm_no_vcpu = aux_vm_create(false);
+
+	ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able copy context to self. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
+		ret, errno);
+
+	ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd);
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
+		    errno);
+
+	ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd);
+	TEST_ASSERT(
+		ret == -1 && errno == EINVAL,
+		"SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
+		ret, errno);
+
+	kvm_vm_free(sev_vm);
+	kvm_vm_free(sev_es_vm);
+	kvm_vm_free(vm_with_vcpu);
+	kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+	struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm;
+	int ret;
+
+	sev_vm = sev_vm_create(/* es= */ false);
+	dst_vm = aux_vm_create(true);
+	mirror_vm = aux_vm_create(false);
+	dst_mirror_vm = aux_vm_create(false);
+
+	sev_mirror_create(mirror_vm->fd, sev_vm->fd);
+	ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
+	TEST_ASSERT(ret == -1 && errno == EBUSY,
+		    "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
+		    errno);
+
+	/* The mirror itself can be migrated.  */
+	sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
+	ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
+	TEST_ASSERT(ret == -1 && errno == EBUSY,
+		    "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
+		    errno);
+
+	/*
+	 * mirror_vm is not a mirror anymore, dst_mirror_vm is.  Thus,
+	 * the owner can be copied as soon as dst_mirror_vm is gone.
+	 */
+	kvm_vm_free(dst_mirror_vm);
+	sev_migrate_from(dst_vm->fd, sev_vm->fd);
+
+	kvm_vm_free(mirror_vm);
+	kvm_vm_free(dst_vm);
+	kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+	if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+		test_sev_migrate_from(/* es= */ false);
+		test_sev_migrate_from(/* es= */ true);
+		test_sev_migrate_locking();
+		test_sev_migrate_parameters();
+		if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+			test_sev_move_copy();
+	}
+	if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+		test_sev_mirror(/* es= */ false);
+		test_sev_mirror(/* es= */ true);
+		test_sev_mirror_parameters();
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index eda0d2a..a0699f0 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -24,8 +24,12 @@
 
 #define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
 #define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_ADDR	(SHINFO_REGION_GPA + 0x40)
 
 #define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_VADDR	(SHINFO_REGION_GVA + 0x40)
+
+#define EVTCHN_VECTOR	0x10
 
 static struct kvm_vm *vm;
 
@@ -56,15 +60,44 @@
     uint64_t time[4];
 };
 
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+        uint8_t evtchn_upcall_pending;
+        uint8_t evtchn_upcall_mask;
+        unsigned long evtchn_pending_sel;
+        struct arch_vcpu_info arch;
+        struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
 #define RUNSTATE_running  0
 #define RUNSTATE_runnable 1
 #define RUNSTATE_blocked  2
 #define RUNSTATE_offline  3
 
+static void evtchn_handler(struct ex_regs *regs)
+{
+	struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+	vi->evtchn_upcall_pending = 0;
+
+	GUEST_SYNC(0x20);
+}
+
 static void guest_code(void)
 {
 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
 
+	__asm__ __volatile__(
+		"sti\n"
+		"nop\n"
+	);
+
+	/* Trigger an interrupt injection */
+	GUEST_SYNC(0);
+
 	/* Test having the host set runstates manually */
 	GUEST_SYNC(RUNSTATE_runnable);
 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
@@ -153,7 +186,7 @@
 
 	struct kvm_xen_vcpu_attr vi = {
 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
-		.u.gpa = SHINFO_REGION_GPA + 0x40,
+		.u.gpa = VCPU_INFO_ADDR,
 	};
 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
 
@@ -163,6 +196,16 @@
 	};
 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
 
+	struct kvm_xen_hvm_attr vec = {
+		.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+		.u.vector = EVTCHN_VECTOR,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
 	if (do_runstate_tests) {
 		struct kvm_xen_vcpu_attr st = {
 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
@@ -171,9 +214,14 @@
 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
 	}
 
+	struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+	vinfo->evtchn_upcall_pending = 0;
+
 	struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
 	rs->state = 0x5a;
 
+	bool evtchn_irq_expected = false;
+
 	for (;;) {
 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
 		struct ucall uc;
@@ -193,16 +241,21 @@
 			struct kvm_xen_vcpu_attr rst;
 			long rundelay;
 
-			/* If no runstate support, bail out early */
-			if (!do_runstate_tests)
-				goto done;
-
-			TEST_ASSERT(rs->state_entry_time == rs->time[0] +
-				    rs->time[1] + rs->time[2] + rs->time[3],
-				    "runstate times don't add up");
+			if (do_runstate_tests)
+				TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+					    rs->time[1] + rs->time[2] + rs->time[3],
+					    "runstate times don't add up");
 
 			switch (uc.args[1]) {
-			case RUNSTATE_running...RUNSTATE_offline:
+			case 0:
+				evtchn_irq_expected = true;
+				vinfo->evtchn_upcall_pending = 1;
+				break;
+
+			case RUNSTATE_runnable...RUNSTATE_offline:
+				TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+				if (!do_runstate_tests)
+					goto done;
 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
 				rst.u.runstate.state = uc.args[1];
 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
@@ -236,6 +289,10 @@
 					sched_yield();
 				} while (get_run_delay() < rundelay);
 				break;
+			case 0x20:
+				TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+				evtchn_irq_expected = false;
+				break;
 			}
 			break;
 		}
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7615f29..9897fa9a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -34,6 +34,7 @@
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
+TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index b5a69ad..d444ee6 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -629,6 +629,66 @@
 	log_test $? 0 "Nexthops removed on admin down"
 }
 
+ipv6_grp_refs()
+{
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP link set dev veth1 up"
+	run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
+	run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
+	run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
+	run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
+	run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
+	run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
+	run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
+	run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
+	run_cmd "$IP nexthop add id 102 group 100"
+	run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
+
+	# create per-cpu dsts through nh 100
+	run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+
+	# remove nh 100 from the group to delete the route potentially leaving
+	# a stale per-cpu dst which holds a reference to the nexthop's net
+	# device and to the IPv6 route
+	run_cmd "$IP nexthop replace id 102 group 101"
+	run_cmd "$IP route del 2001:db8:101::1/128"
+
+	# add both nexthops to the group so a reference is taken on them
+	run_cmd "$IP nexthop replace id 102 group 100/101"
+
+	# if the bug described in commit "net: nexthop: release IPv6 per-cpu
+	# dsts when replacing a nexthop group" exists at this point we have
+	# an unlinked IPv6 route (but not freed due to stale dst) with a
+	# reference over the group so we delete the group which will again
+	# only unlink it due to the route reference
+	run_cmd "$IP nexthop del id 102"
+
+	# delete the nexthop with stale dst, since we have an unlinked
+	# group with a ref to it and an unlinked IPv6 route with ref to the
+	# group, the nh will only be unlinked and not freed so the stale dst
+	# remains forever and we get a net device refcount imbalance
+	run_cmd "$IP nexthop del id 100"
+
+	# if a reference was lost this command will hang because the net device
+	# cannot be removed
+	timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1
+
+	# we can't cleanup if the command is hung trying to delete the netdev
+	if [ $? -eq 137 ]; then
+		return 1
+	fi
+
+	# cleanup
+	run_cmd "$IP link del veth1.20"
+	run_cmd "$IP nexthop flush"
+
+	return 0
+}
+
 ipv6_grp_fcnal()
 {
 	local rc
@@ -734,6 +794,9 @@
 
 	run_cmd "$IP nexthop add id 108 group 31/24"
 	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+
+	ipv6_grp_refs
+	log_test $? 0 "Nexthop group replace refcounts"
 }
 
 ipv6_res_grp_fcnal()
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index a4bd1b0..697994a 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -6,6 +6,7 @@
 CONFIG_NET_VRF=m
 CONFIG_BPF_SYSCALL=y
 CONFIG_CGROUP_BPF=y
+CONFIG_NET_ACT_CT=m
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_MPLS=m
 CONFIG_NET_ACT_VLAN=m
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index d9eca22..de19eb6 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
 	mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
-	gact_trap_test"
+	gact_trap_test mirred_egress_to_ingress_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -13,10 +13,12 @@
 h1_create()
 {
 	simple_if_init $h1 192.0.2.1/24
+	tc qdisc add dev $h1 clsact
 }
 
 h1_destroy()
 {
+	tc qdisc del dev $h1 clsact
 	simple_if_fini $h1 192.0.2.1/24
 }
 
@@ -153,6 +155,49 @@
 	log_test "trap ($tcflags)"
 }
 
+mirred_egress_to_ingress_test()
+{
+	RET=0
+
+	tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
+		ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \
+			ct commit nat src addr 192.0.2.2 pipe \
+			ct clear pipe \
+			ct commit nat dst addr 192.0.2.1 pipe \
+			mirred ingress redirect dev $h1
+
+	tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \
+		ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop
+	tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \
+		ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t icmp "ping,id=42,seq=10" -q
+
+	tc_check_packets "dev $h1 egress" 100 1
+	check_err $? "didn't mirror first packet"
+
+	tc_check_packets "dev $swp1 ingress" 111 1
+	check_fail $? "didn't redirect first packet"
+	tc_check_packets "dev $swp1 ingress" 112 1
+	check_err $? "didn't receive reply to first packet"
+
+	ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1
+
+	tc_check_packets "dev $h1 egress" 100 2
+	check_err $? "didn't mirror second packet"
+	tc_check_packets "dev $swp1 ingress" 111 1
+	check_fail $? "didn't redirect second packet"
+	tc_check_packets "dev $swp1 ingress" 112 2
+	check_err $? "didn't receive reply to second packet"
+
+	tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
+	tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower
+	tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower
+
+	log_test "mirred_egress_to_ingress ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
index fdeb44d..3224651 100755
--- a/tools/testing/selftests/net/gre_gso.sh
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -118,16 +118,18 @@
 	local addr=$2
 	local proto=$3
 
-	$NS_EXEC nc $proto -kl $port >/dev/null &
+	[ "$proto" == 6 ] && addr="[$addr]"
+
+	$NS_EXEC socat - tcp${proto}-listen:$port,reuseaddr,fork >/dev/null &
 	PID=$!
 	while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
 
-	cat $TMPFILE | timeout 1 nc $proto -N $addr $port
+	cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
 	log_test $? 0 "$name - copy file w/ TSO"
 
 	ethtool -K veth0 tso off
 
-	cat $TMPFILE | timeout 1 nc $proto -N $addr $port
+	cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
 	log_test $? 0 "$name - copy file w/ GSO"
 
 	ethtool -K veth0 tso on
@@ -155,8 +157,8 @@
 
 	sleep 2
 
-	gre_gst_test_checks GREv6/v4 172.16.2.2
-	gre_gst_test_checks GREv6/v6 2001:db8:1::2 -6
+	gre_gst_test_checks GREv6/v4 172.16.2.2 4
+	gre_gst_test_checks GREv6/v6 2001:db8:1::2 6
 
 	cleanup
 }
@@ -212,8 +214,8 @@
 	exit $ksft_skip
 fi
 
-if [ ! -x "$(command -v nc)" ]; then
-	echo "SKIP: Could not run test without nc tool"
+if [ ! -x "$(command -v socat)" ]; then
+	echo "SKIP: Could not run test without socat tool"
 	exit $ksft_skip
 fi
 
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index e61fc4c..8a22db0 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -78,26 +78,21 @@
 		*byte++ = rand();
 }
 
-FIXTURE(tls_basic)
-{
-	int fd, cfd;
-	bool notls;
-};
-
-FIXTURE_SETUP(tls_basic)
+static void ulp_sock_pair(struct __test_metadata *_metadata,
+			  int *fd, int *cfd, bool *notls)
 {
 	struct sockaddr_in addr;
 	socklen_t len;
 	int sfd, ret;
 
-	self->notls = false;
+	*notls = false;
 	len = sizeof(addr);
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = htonl(INADDR_ANY);
 	addr.sin_port = 0;
 
-	self->fd = socket(AF_INET, SOCK_STREAM, 0);
+	*fd = socket(AF_INET, SOCK_STREAM, 0);
 	sfd = socket(AF_INET, SOCK_STREAM, 0);
 
 	ret = bind(sfd, &addr, sizeof(addr));
@@ -108,26 +103,96 @@
 	ret = getsockname(sfd, &addr, &len);
 	ASSERT_EQ(ret, 0);
 
-	ret = connect(self->fd, &addr, sizeof(addr));
+	ret = connect(*fd, &addr, sizeof(addr));
 	ASSERT_EQ(ret, 0);
 
-	self->cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(self->cfd, 0);
+	*cfd = accept(sfd, &addr, &len);
+	ASSERT_GE(*cfd, 0);
 
 	close(sfd);
 
-	ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
 	if (ret != 0) {
 		ASSERT_EQ(errno, ENOENT);
-		self->notls = true;
+		*notls = true;
 		printf("Failure setting TCP_ULP, testing without tls\n");
 		return;
 	}
 
-	ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
 	ASSERT_EQ(ret, 0);
 }
 
+/* Produce a basic cmsg */
+static int tls_send_cmsg(int fd, unsigned char record_type,
+			 void *data, size_t len, int flags)
+{
+	char cbuf[CMSG_SPACE(sizeof(char))];
+	int cmsg_len = sizeof(char);
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	struct iovec vec;
+
+	vec.iov_base = data;
+	vec.iov_len = len;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_TLS;
+	/* test sending non-record types. */
+	cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+	cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+	*CMSG_DATA(cmsg) = record_type;
+	msg.msg_controllen = cmsg->cmsg_len;
+
+	return sendmsg(fd, &msg, flags);
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+			 int fd, unsigned char record_type,
+			 void *data, size_t len, int flags)
+{
+	char cbuf[CMSG_SPACE(sizeof(char))];
+	struct cmsghdr *cmsg;
+	unsigned char ctype;
+	struct msghdr msg;
+	struct iovec vec;
+	int n;
+
+	vec.iov_base = data;
+	vec.iov_len = len;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+
+	n = recvmsg(fd, &msg, flags);
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	EXPECT_NE(cmsg, NULL);
+	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+	ctype = *((unsigned char *)CMSG_DATA(cmsg));
+	EXPECT_EQ(ctype, record_type);
+
+	return n;
+}
+
+FIXTURE(tls_basic)
+{
+	int fd, cfd;
+	bool notls;
+};
+
+FIXTURE_SETUP(tls_basic)
+{
+	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+}
+
 FIXTURE_TEARDOWN(tls_basic)
 {
 	close(self->fd);
@@ -199,60 +264,21 @@
 FIXTURE_SETUP(tls)
 {
 	struct tls_crypto_info_keys tls12;
-	struct sockaddr_in addr;
-	socklen_t len;
-	int sfd, ret;
-
-	self->notls = false;
-	len = sizeof(addr);
+	int ret;
 
 	tls_crypto_info_init(variant->tls_version, variant->cipher_type,
 			     &tls12);
 
-	addr.sin_family = AF_INET;
-	addr.sin_addr.s_addr = htonl(INADDR_ANY);
-	addr.sin_port = 0;
+	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
 
-	self->fd = socket(AF_INET, SOCK_STREAM, 0);
-	sfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (self->notls)
+		return;
 
-	ret = bind(sfd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-	ret = listen(sfd, 10);
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
 	ASSERT_EQ(ret, 0);
 
-	ret = getsockname(sfd, &addr, &len);
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
 	ASSERT_EQ(ret, 0);
-
-	ret = connect(self->fd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-
-	ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-	if (ret != 0) {
-		self->notls = true;
-		printf("Failure setting TCP_ULP, testing without tls\n");
-	}
-
-	if (!self->notls) {
-		ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
-				 tls12.len);
-		ASSERT_EQ(ret, 0);
-	}
-
-	self->cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(self->cfd, 0);
-
-	if (!self->notls) {
-		ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
-				 sizeof("tls"));
-		ASSERT_EQ(ret, 0);
-
-		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
-				 tls12.len);
-		ASSERT_EQ(ret, 0);
-	}
-
-	close(sfd);
 }
 
 FIXTURE_TEARDOWN(tls)
@@ -613,6 +639,95 @@
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
+TEST_F(tls, splice_cmsg_to_pipe)
+{
+	char *test_str = "test_read";
+	char record_type = 100;
+	int send_len = 10;
+	char buf[10];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+	EXPECT_EQ(errno, EINVAL);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(errno, EIO);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, splice_dec_cmsg_to_pipe)
+{
+	char *test_str = "test_read";
+	char record_type = 100;
+	int send_len = 10;
+	char buf[10];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(errno, EIO);
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+	EXPECT_EQ(errno, EINVAL);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_and_splice)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int half = send_len / 2;
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+	/* Recv hald of the record, splice the other half */
+	EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half);
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK),
+		  half);
+	EXPECT_EQ(read(p[0], &mem_recv[half], half), half);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, peek_and_splice)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int chunk = TLS_PAYLOAD_MAX_LEN / 4;
+	int n, i, p[2];
+
+	memrnd(mem_send, sizeof(mem_send));
+
+	ASSERT_GE(pipe(p), 0);
+	for (i = 0; i < 4; i++)
+		EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0),
+			  chunk);
+
+	EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2,
+		       MSG_WAITALL | MSG_PEEK),
+		  chunk * 5 / 2);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0);
+
+	n = 0;
+	while (n < send_len) {
+		i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0);
+		EXPECT_GT(i, 0);
+		n += i;
+	}
+	EXPECT_EQ(n, send_len);
+	EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
 TEST_F(tls, recvmsg_single)
 {
 	char const *test_str = "test_recvmsg_single";
@@ -1193,60 +1308,30 @@
 
 TEST_F(tls, control_msg)
 {
-	if (self->notls)
-		return;
-
-	char cbuf[CMSG_SPACE(sizeof(char))];
-	char const *test_str = "test_read";
-	int cmsg_len = sizeof(char);
+	char *test_str = "test_read";
 	char record_type = 100;
-	struct cmsghdr *cmsg;
-	struct msghdr msg;
 	int send_len = 10;
-	struct iovec vec;
 	char buf[10];
 
-	vec.iov_base = (char *)test_str;
-	vec.iov_len = 10;
-	memset(&msg, 0, sizeof(struct msghdr));
-	msg.msg_iov = &vec;
-	msg.msg_iovlen = 1;
-	msg.msg_control = cbuf;
-	msg.msg_controllen = sizeof(cbuf);
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_TLS;
-	/* test sending non-record types. */
-	cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
-	cmsg->cmsg_len = CMSG_LEN(cmsg_len);
-	*CMSG_DATA(cmsg) = record_type;
-	msg.msg_controllen = cmsg->cmsg_len;
+	if (self->notls)
+		SKIP(return, "no TLS support");
 
-	EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+	EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0),
+		  send_len);
 	/* Should fail because we didn't provide a control message */
 	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
 
-	vec.iov_base = buf;
-	EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len);
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	EXPECT_NE(cmsg, NULL);
-	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
-	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
-	record_type = *((unsigned char *)CMSG_DATA(cmsg));
-	EXPECT_EQ(record_type, 100);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL | MSG_PEEK),
+		  send_len);
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 
 	/* Recv the message again without MSG_PEEK */
-	record_type = 0;
 	memset(buf, 0, sizeof(buf));
 
-	EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
-	cmsg = CMSG_FIRSTHDR(&msg);
-	EXPECT_NE(cmsg, NULL);
-	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
-	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
-	record_type = *((unsigned char *)CMSG_DATA(cmsg));
-	EXPECT_EQ(record_type, 100);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
@@ -1301,6 +1386,160 @@
 	EXPECT_EQ(errno, EISCONN);
 }
 
+FIXTURE(tls_err)
+{
+	int fd, cfd;
+	int fd2, cfd2;
+	bool notls;
+};
+
+FIXTURE_VARIANT(tls_err)
+{
+	uint16_t tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm)
+{
+	.tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm)
+{
+	.tls_version = TLS_1_3_VERSION,
+};
+
+FIXTURE_SETUP(tls_err)
+{
+	struct tls_crypto_info_keys tls12;
+	int ret;
+
+	tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
+			     &tls12);
+
+	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+	ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
+	if (self->notls)
+		return;
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(tls_err)
+{
+	close(self->fd);
+	close(self->cfd);
+	close(self->fd2);
+	close(self->cfd2);
+}
+
+TEST_F(tls_err, bad_rec)
+{
+	char buf[64];
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	memset(buf, 0x55, sizeof(buf));
+	EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EMSGSIZE);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1);
+	EXPECT_EQ(errno, EAGAIN);
+}
+
+TEST_F(tls_err, bad_auth)
+{
+	char buf[128];
+	int n;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	memrnd(buf, sizeof(buf) / 2);
+	EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2);
+	n = recv(self->cfd, buf, sizeof(buf), 0);
+	EXPECT_GT(n, sizeof(buf) / 2);
+
+	buf[n - 1]++;
+
+	EXPECT_EQ(send(self->fd2, buf, n, 0), n);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_in_large_read)
+{
+	char txt[3][64];
+	char cip[3][128];
+	char buf[3 * 128];
+	int i, n;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	/* Put 3 records in the sockets */
+	for (i = 0; i < 3; i++) {
+		memrnd(txt[i], sizeof(txt[i]));
+		EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0),
+			  sizeof(txt[i]));
+		n = recv(self->cfd, cip[i], sizeof(cip[i]), 0);
+		EXPECT_GT(n, sizeof(txt[i]));
+		/* Break the third message */
+		if (i == 2)
+			cip[2][n - 1]++;
+		EXPECT_EQ(send(self->fd2, cip[i], n, 0), n);
+	}
+
+	/* We should be able to receive the first two messages */
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2);
+	EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0);
+	EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0);
+	/* Third mesasge is bad */
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_cmsg)
+{
+	char *test_str = "test_read";
+	int send_len = 10;
+	char cip[128];
+	char buf[128];
+	char txt[64];
+	int n;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	/* Queue up one data record */
+	memrnd(txt, sizeof(txt));
+	EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt));
+	n = recv(self->cfd, cip, sizeof(cip), 0);
+	EXPECT_GT(n, sizeof(txt));
+	EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+	n = recv(self->cfd, cip, sizeof(cip), 0);
+	cip[n - 1]++; /* Break it */
+	EXPECT_GT(n, send_len);
+	EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt));
+	EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+}
+
 TEST(non_established) {
 	struct tls12_crypto_info_aes_gcm_256 tls12;
 	struct sockaddr_in addr;
@@ -1355,64 +1594,82 @@
 
 TEST(keysizes) {
 	struct tls12_crypto_info_aes_gcm_256 tls12;
-	struct sockaddr_in addr;
-	int sfd, ret, fd, cfd;
-	socklen_t len;
+	int ret, fd, cfd;
 	bool notls;
 
-	notls = false;
-	len = sizeof(addr);
-
 	memset(&tls12, 0, sizeof(tls12));
 	tls12.info.version = TLS_1_2_VERSION;
 	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
 
-	addr.sin_family = AF_INET;
-	addr.sin_addr.s_addr = htonl(INADDR_ANY);
-	addr.sin_port = 0;
-
-	fd = socket(AF_INET, SOCK_STREAM, 0);
-	sfd = socket(AF_INET, SOCK_STREAM, 0);
-
-	ret = bind(sfd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-	ret = listen(sfd, 10);
-	ASSERT_EQ(ret, 0);
-
-	ret = getsockname(sfd, &addr, &len);
-	ASSERT_EQ(ret, 0);
-
-	ret = connect(fd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-
-	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-	if (ret != 0) {
-		notls = true;
-		printf("Failure setting TCP_ULP, testing without tls\n");
-	}
+	ulp_sock_pair(_metadata, &fd, &cfd, &notls);
 
 	if (!notls) {
 		ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
 				 sizeof(tls12));
 		EXPECT_EQ(ret, 0);
-	}
-
-	cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(cfd, 0);
-
-	if (!notls) {
-		ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
-				 sizeof("tls"));
-		EXPECT_EQ(ret, 0);
 
 		ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
 				 sizeof(tls12));
 		EXPECT_EQ(ret, 0);
 	}
 
-	close(sfd);
 	close(fd);
 	close(cfd);
 }
 
+TEST(tls_v6ops) {
+	struct tls_crypto_info_keys tls12;
+	struct sockaddr_in6 addr, addr2;
+	int sfd, ret, fd;
+	socklen_t len, len2;
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_addr = in6addr_any;
+	addr.sin6_port = 0;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	sfd = socket(AF_INET6, SOCK_STREAM, 0);
+
+	ret = bind(sfd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+	ret = listen(sfd, 10);
+	ASSERT_EQ(ret, 0);
+
+	len = sizeof(addr);
+	ret = getsockname(sfd, &addr, &len);
+	ASSERT_EQ(ret, 0);
+
+	ret = connect(fd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+
+	len = sizeof(addr);
+	ret = getsockname(fd, &addr, &len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (ret) {
+		ASSERT_EQ(errno, ENOENT);
+		SKIP(return, "no TLS support");
+	}
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	len2 = sizeof(addr2);
+	ret = getsockname(fd, &addr2, &len2);
+	ASSERT_EQ(ret, 0);
+
+	EXPECT_EQ(len2, len);
+	EXPECT_EQ(memcmp(&addr, &addr2, len), 0);
+
+	close(fd);
+	close(sfd);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 8748199..ffca314 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -5,7 +5,8 @@
 	conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
 	nft_concat_range.sh nft_conntrack_helper.sh \
 	nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
-	ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
+	ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+	conntrack_vrf.sh
 
 LDLIBS = -lmnl
 TEST_GEN_FILES =  nf-queue
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755
index 0000000..91f3ef0
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
@@ -0,0 +1,219 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device.  In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+	ip netns pids $ns0 | xargs kill 2>/dev/null
+	ip netns pids $ns1 | xargs kill 2>/dev/null
+
+	ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace $ns0"
+	exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not add veth device"
+	exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not add vrf device"
+	exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not start iperf3"
+	exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+	chain rawpre {
+		type filter hook prerouting priority raw;
+
+		iif { veth0, tvrf } counter meta nftrace set 1
+		iif veth0 counter ct zone set 1 counter return
+		iif tvrf counter ct zone set 2 counter return
+		ip protocol icmp counter
+		notrack counter
+	}
+
+	chain rawout {
+		type filter hook output priority raw;
+
+		oif veth0 counter ct zone set 1 counter return
+		oif tvrf counter ct zone set 2 counter return
+		notrack counter
+	}
+}
+EOF
+	ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+	# should be in zone 1, not zone 2
+	count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+	if [ $count -eq 1 ]; then
+		echo "PASS: entry found in conntrack zone 1"
+	else
+		echo "FAIL: entry not found in conntrack zone 1"
+		count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+		if [ $count -eq 1 ]; then
+			echo "FAIL: entry found in zone 2 instead"
+		else
+			echo "FAIL: entry not in zone 1 or 2, dumping table"
+			ip netns exec $ns0 conntrack -L
+			ip netns exec $ns0 nft list ruleset
+		fi
+	fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+	ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0;
+		# NB: masquerade should always be combined with 'oif(name) bla',
+		# lack of this is intentional here, we want to exercise double-snat.
+		ip saddr 172.30.30.0/30 counter masquerade random
+	}
+}
+EOF
+	ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+	if [ $? -ne 0 ]; then
+		echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+		ret=1
+		return
+	fi
+
+	# must also check that nat table was evaluated on second (lower device) iteration.
+	ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+	if [ $? -eq 0 ]; then
+		echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
+	else
+		echo "FAIL: vrf masq rule has unexpected counter value"
+		ret=1
+	fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+	ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0;
+		meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+	}
+}
+EOF
+	ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+	if [ $? -ne 0 ]; then
+		echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+		ret=1
+		return
+	fi
+
+	# must also check that nat table was evaluated on second (lower device) iteration.
+	ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+	if [ $? -eq 0 ]; then
+		echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+	else
+		echo "FAIL: vrf masq rule has unexpected counter value"
+		ret=1
+	fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf
+test_masquerade_veth
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index da1c1e4..d88867d 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -759,19 +759,21 @@
 	local result=""
 	local logmsg=""
 
-	echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
-	nc_r=$!
-
-	echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
-	nc_c=$!
-
 	# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
-	echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+	echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
+
+	echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+	sc_r=$!
+
+	echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+	sc_c=$!
+
+	sleep 0.3
 
 	# ns1 tries to connect to ns0:1405.  With default settings this should connect
 	# to client, it matches the conntrack entry created above.
 
-	result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
+	result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
 
 	if [ "$result" = "$expect" ] ;then
 		echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
@@ -780,7 +782,7 @@
 		ret=1
 	fi
 
-	kill $nc_r $nc_c 2>/dev/null
+	kill $sc_r $sc_c 2>/dev/null
 
 	# flush udp entries for next test round, if any
 	ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
@@ -816,11 +818,10 @@
 	chain prerouting {
 		type filter hook prerouting priority -300; policy accept;
 		meta iif veth0 udp dport 1405 notrack
-		udp dport 1405 notrack
 	}
 	chain output {
 		type filter hook output priority -300; policy accept;
-		udp sport 1405 notrack
+		meta oif veth0 udp sport 1405 notrack
 	}
 }
 EOF
@@ -851,6 +852,18 @@
 {
 	local family="ip"
 
+	conntrack -h >/dev/null 2>&1
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+		return
+	fi
+
+	socat -h > /dev/null 2>&1
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run nat port shadowing test without socat tool"
+		return
+	fi
+
 	ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 	ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
 
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 3d202b9..7d27f1f 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -16,6 +16,10 @@
 
 cleanup()
 {
+	ip netns pids ${ns1} | xargs kill 2>/dev/null
+	ip netns pids ${ns2} | xargs kill 2>/dev/null
+	ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
 	ip netns del ${ns1}
 	ip netns del ${ns2}
 	ip netns del ${nsrouter}
@@ -332,6 +336,55 @@
 	echo "PASS: tcp via loopback and re-queueing"
 }
 
+test_icmp_vrf() {
+	ip -net $ns1 link add tvrf type vrf table 9876
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not add vrf device"
+		return
+	fi
+
+	ip -net $ns1 li set eth0 master tvrf
+	ip -net $ns1 li set tvrf up
+
+	ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta oifname "tvrf" icmp type echo-request counter queue num 1
+		meta oifname "eth0" icmp type echo-request counter queue num 1
+	}
+	chain post {
+		type filter hook postrouting priority 0; policy accept;
+		meta oifname "tvrf" icmp type echo-request counter queue num 1
+		meta oifname "eth0" icmp type echo-request counter queue num 1
+	}
+}
+EOF
+	ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+	local nfqpid=$!
+
+	sleep 1
+	ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+	for n in output post; do
+		for d in tvrf eth0; do
+			ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+			if [ $? -ne 0 ] ; then
+				echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+				ip netns exec ${ns1} nft list ruleset
+				ret=1
+				return
+			fi
+		done
+	done
+
+	wait $nfqpid
+	[ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+	wait 2>/dev/null
+}
+
 ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -372,5 +425,6 @@
 test_tcp_forward
 test_tcp_localhost
 test_tcp_localhost_requeue
+test_icmp_vrf
 
 exit $ret
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 503982b..9183240 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -68,7 +68,7 @@
         "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
             "$TC action flush action bpf"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
index 88a20c7..c604609 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -15,7 +15,7 @@
 	    "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
 	    "expExitCode": "0",
 	    "verifyCmd": "$TC qdisc show dev $ETH",
-	    "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+	    "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
 	    "matchCount": "4",
 	    "teardown": [
 		    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -37,7 +37,7 @@
 	    "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
 	    "expExitCode": "0",
 	    "verifyCmd": "$TC qdisc show dev $ETH",
-	    "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+	    "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}",
 	    "matchCount": "256",
 	    "teardown": [
 		    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -60,7 +60,7 @@
 	    "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
 	    "expExitCode": "2",
 	    "verifyCmd": "$TC qdisc show dev $ETH",
-	    "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+	    "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
 	    "matchCount": "4",
 	    "teardown": [
 		    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -82,7 +82,7 @@
 	    "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
 	    "expExitCode": "2",
 	    "verifyCmd": "$TC qdisc show dev $ETH",
-	    "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+	    "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
 	    "matchCount": "0",
 	    "teardown": [
 		    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -106,7 +106,7 @@
 	    "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
 	    "expExitCode": "2",
 	    "verifyCmd": "$TC qdisc show dev $ETH",
-	    "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+	    "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
 	    "matchCount": "0",
 	    "teardown": [
 		    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -128,7 +128,7 @@
 	    "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
 	    "expExitCode": "2",
 	    "verifyCmd": "$TC qdisc show dev $ETH",
-	    "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+	    "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
 	    "matchCount": "0",
 	    "teardown": [
 		    "echo \"1\" > /sys/bus/netdevsim/del_device"
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3f6d450..72c4e6b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1531,11 +1531,10 @@
 
 static int kvm_set_memslot(struct kvm *kvm,
 			   const struct kvm_userspace_memory_region *mem,
-			   struct kvm_memory_slot *old,
 			   struct kvm_memory_slot *new, int as_id,
 			   enum kvm_mr_change change)
 {
-	struct kvm_memory_slot *slot;
+	struct kvm_memory_slot *slot, old;
 	struct kvm_memslots *slots;
 	int r;
 
@@ -1566,7 +1565,7 @@
 		 * Note, the INVALID flag needs to be in the appropriate entry
 		 * in the freshly allocated memslots, not in @old or @new.
 		 */
-		slot = id_to_memslot(slots, old->id);
+		slot = id_to_memslot(slots, new->id);
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
 		/*
@@ -1597,6 +1596,26 @@
 		kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
 	}
 
+	/*
+	 * Make a full copy of the old memslot, the pointer will become stale
+	 * when the memslots are re-sorted by update_memslots(), and the old
+	 * memslot needs to be referenced after calling update_memslots(), e.g.
+	 * to free its resources and for arch specific behavior.  This needs to
+	 * happen *after* (re)acquiring slots_arch_lock.
+	 */
+	slot = id_to_memslot(slots, new->id);
+	if (slot) {
+		old = *slot;
+	} else {
+		WARN_ON_ONCE(change != KVM_MR_CREATE);
+		memset(&old, 0, sizeof(old));
+		old.id = new->id;
+		old.as_id = as_id;
+	}
+
+	/* Copy the arch-specific data, again after (re)acquiring slots_arch_lock. */
+	memcpy(&new->arch, &old.arch, sizeof(old.arch));
+
 	r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
 	if (r)
 		goto out_slots;
@@ -1604,14 +1623,18 @@
 	update_memslots(slots, new, change);
 	slots = install_new_memslots(kvm, as_id, slots);
 
-	kvm_arch_commit_memory_region(kvm, mem, old, new, change);
+	kvm_arch_commit_memory_region(kvm, mem, &old, new, change);
+
+	/* Free the old memslot's metadata.  Note, this is the full copy!!! */
+	if (change == KVM_MR_DELETE)
+		kvm_free_memslot(kvm, &old);
 
 	kvfree(slots);
 	return 0;
 
 out_slots:
 	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
-		slot = id_to_memslot(slots, old->id);
+		slot = id_to_memslot(slots, new->id);
 		slot->flags &= ~KVM_MEMSLOT_INVALID;
 		slots = install_new_memslots(kvm, as_id, slots);
 	} else {
@@ -1626,7 +1649,6 @@
 			      struct kvm_memory_slot *old, int as_id)
 {
 	struct kvm_memory_slot new;
-	int r;
 
 	if (!old->npages)
 		return -EINVAL;
@@ -1639,12 +1661,7 @@
 	 */
 	new.as_id = as_id;
 
-	r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE);
-	if (r)
-		return r;
-
-	kvm_free_memslot(kvm, old);
-	return 0;
+	return kvm_set_memslot(kvm, mem, &new, as_id, KVM_MR_DELETE);
 }
 
 /*
@@ -1672,7 +1689,8 @@
 	id = (u16)mem->slot;
 
 	/* General sanity checks */
-	if (mem->memory_size & (PAGE_SIZE - 1))
+	if ((mem->memory_size & (PAGE_SIZE - 1)) ||
+	    (mem->memory_size != (unsigned long)mem->memory_size))
 		return -EINVAL;
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		return -EINVAL;
@@ -1718,7 +1736,6 @@
 	if (!old.npages) {
 		change = KVM_MR_CREATE;
 		new.dirty_bitmap = NULL;
-		memset(&new.arch, 0, sizeof(new.arch));
 	} else { /* Modify an existing slot. */
 		if ((new.userspace_addr != old.userspace_addr) ||
 		    (new.npages != old.npages) ||
@@ -1732,9 +1749,8 @@
 		else /* Nothing to change. */
 			return 0;
 
-		/* Copy dirty_bitmap and arch from the current memslot. */
+		/* Copy dirty_bitmap from the current memslot. */
 		new.dirty_bitmap = old.dirty_bitmap;
-		memcpy(&new.arch, &old.arch, sizeof(new.arch));
 	}
 
 	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
@@ -1760,7 +1776,7 @@
 			bitmap_set(new.dirty_bitmap, 0, new.npages);
 	}
 
-	r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
+	r = kvm_set_memslot(kvm, mem, &new, as_id, change);
 	if (r)
 		goto out_bitmap;
 
@@ -2548,72 +2564,36 @@
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty)
 {
 	if (pfn == 0)
 		return;
 
-	if (cache)
-		cache->pfn = cache->gfn = 0;
-
 	if (dirty)
 		kvm_release_pfn_dirty(pfn);
 	else
 		kvm_release_pfn_clean(pfn);
 }
 
-static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
-				 struct gfn_to_pfn_cache *cache, u64 gen)
-{
-	kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
-	cache->pfn = gfn_to_pfn_memslot(slot, gfn);
-	cache->gfn = gfn;
-	cache->dirty = false;
-	cache->generation = gen;
-}
-
-static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
-			 struct kvm_host_map *map,
-			 struct gfn_to_pfn_cache *cache,
-			 bool atomic)
+int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
 {
 	kvm_pfn_t pfn;
 	void *hva = NULL;
 	struct page *page = KVM_UNMAPPED_PAGE;
-	struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
-	u64 gen = slots->generation;
 
 	if (!map)
 		return -EINVAL;
 
-	if (cache) {
-		if (!cache->pfn || cache->gfn != gfn ||
-			cache->generation != gen) {
-			if (atomic)
-				return -EAGAIN;
-			kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
-		}
-		pfn = cache->pfn;
-	} else {
-		if (atomic)
-			return -EAGAIN;
-		pfn = gfn_to_pfn_memslot(slot, gfn);
-	}
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	if (is_error_noslot_pfn(pfn))
 		return -EINVAL;
 
 	if (pfn_valid(pfn)) {
 		page = pfn_to_page(pfn);
-		if (atomic)
-			hva = kmap_atomic(page);
-		else
-			hva = kmap(page);
+		hva = kmap(page);
 #ifdef CONFIG_HAS_IOMEM
-	} else if (!atomic) {
-		hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
 	} else {
-		return -EINVAL;
+		hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
 #endif
 	}
 
@@ -2627,27 +2607,9 @@
 
 	return 0;
 }
-
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-		struct gfn_to_pfn_cache *cache, bool atomic)
-{
-	return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
-			cache, atomic);
-}
-EXPORT_SYMBOL_GPL(kvm_map_gfn);
-
-int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
-{
-	return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
-		NULL, false);
-}
 EXPORT_SYMBOL_GPL(kvm_vcpu_map);
 
-static void __kvm_unmap_gfn(struct kvm *kvm,
-			struct kvm_memory_slot *memslot,
-			struct kvm_host_map *map,
-			struct gfn_to_pfn_cache *cache,
-			bool dirty, bool atomic)
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
 {
 	if (!map)
 		return;
@@ -2655,45 +2617,21 @@
 	if (!map->hva)
 		return;
 
-	if (map->page != KVM_UNMAPPED_PAGE) {
-		if (atomic)
-			kunmap_atomic(map->hva);
-		else
-			kunmap(map->page);
-	}
+	if (map->page != KVM_UNMAPPED_PAGE)
+		kunmap(map->page);
 #ifdef CONFIG_HAS_IOMEM
-	else if (!atomic)
-		memunmap(map->hva);
 	else
-		WARN_ONCE(1, "Unexpected unmapping in atomic context");
+		memunmap(map->hva);
 #endif
 
 	if (dirty)
-		mark_page_dirty_in_slot(kvm, memslot, map->gfn);
+		kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
 
-	if (cache)
-		cache->dirty |= dirty;
-	else
-		kvm_release_pfn(map->pfn, dirty, NULL);
+	kvm_release_pfn(map->pfn, dirty);
 
 	map->hva = NULL;
 	map->page = NULL;
 }
-
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
-		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
-{
-	__kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
-			cache, dirty, atomic);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
-
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
-{
-	__kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
-			map, NULL, dirty, false);
-}
 EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
 
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -2993,7 +2931,8 @@
 	int r;
 	gpa_t gpa = ghc->gpa + offset;
 
-	BUG_ON(len + offset > ghc->len);
+	if (WARN_ON_ONCE(len + offset > ghc->len))
+		return -EINVAL;
 
 	if (slots->generation != ghc->generation) {
 		if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@@ -3030,7 +2969,8 @@
 	int r;
 	gpa_t gpa = ghc->gpa + offset;
 
-	BUG_ON(len + offset > ghc->len);
+	if (WARN_ON_ONCE(len + offset > ghc->len))
+		return -EINVAL;
 
 	if (slots->generation != ghc->generation) {
 		if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@@ -3747,7 +3687,7 @@
 	struct kvm_fpu *fpu = NULL;
 	struct kvm_sregs *kvm_sregs = NULL;
 
-	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
 		return -EIO;
 
 	if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
@@ -3957,7 +3897,7 @@
 	void __user *argp = compat_ptr(arg);
 	int r;
 
-	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
 		return -EIO;
 
 	switch (ioctl) {
@@ -4023,7 +3963,7 @@
 {
 	struct kvm_device *dev = filp->private_data;
 
-	if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
+	if (dev->kvm->mm != current->mm || dev->kvm->vm_dead)
 		return -EIO;
 
 	switch (ioctl) {
@@ -4345,7 +4285,7 @@
 	void __user *argp = (void __user *)arg;
 	int r;
 
-	if (kvm->mm != current->mm || kvm->vm_bugged)
+	if (kvm->mm != current->mm || kvm->vm_dead)
 		return -EIO;
 	switch (ioctl) {
 	case KVM_CREATE_VCPU:
@@ -4556,7 +4496,7 @@
 	struct kvm *kvm = filp->private_data;
 	int r;
 
-	if (kvm->mm != current->mm || kvm->vm_bugged)
+	if (kvm->mm != current->mm || kvm->vm_dead)
 		return -EIO;
 	switch (ioctl) {
 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT