Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:
"A number of fixes and some late updates:
- make in_compat_syscall() behavior on x86-32 similar to other
platforms, this touches a number of generic files but is not
intended to impact non-x86 platforms.
- objtool fixes
- PAT preemption fix
- paravirt fixes/cleanups
- cpufeatures updates for new instructions
- earlyprintk quirk
- make microcode version in sysfs world-readable (it is already
world-readable in procfs)
- minor cleanups and fixes"
* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
compat: Cleanup in_compat_syscall() callers
x86/compat: Adjust in_compat_syscall() to generic code under !COMPAT
objtool: Support GCC 9 cold subfunction naming scheme
x86/numa_emulation: Fix uniform-split numa emulation
x86/paravirt: Remove unused _paravirt_ident_32
x86/mm/pat: Disable preemption around __flush_tlb_all()
x86/paravirt: Remove GPL from pv_ops export
x86/traps: Use format string with panic() call
x86: Clean up 'sizeof x' => 'sizeof(x)'
x86/cpufeatures: Enumerate MOVDIR64B instruction
x86/cpufeatures: Enumerate MOVDIRI instruction
x86/earlyprintk: Add a force option for pciserial device
objtool: Support per-function rodata sections
x86/microcode: Make revision and processor flags world-readable
diff --git a/Documentation/ABI/testing/sysfs-platform-lg-laptop b/Documentation/ABI/testing/sysfs-platform-lg-laptop
new file mode 100644
index 0000000..cf47749
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-lg-laptop
@@ -0,0 +1,35 @@
+What: /sys/devices/platform/lg-laptop/reader_mode
+Date: October 2018
+KernelVersion: 4.20
+Contact: "Matan Ziv-Av <matan@svgalib.org>
+Description:
+ Control reader mode. 1 means on, 0 means off.
+
+What: /sys/devices/platform/lg-laptop/fn_lock
+Date: October 2018
+KernelVersion: 4.20
+Contact: "Matan Ziv-Av <matan@svgalib.org>
+Description:
+ Control FN lock mode. 1 means on, 0 means off.
+
+What: /sys/devices/platform/lg-laptop/battery_care_limit
+Date: October 2018
+KernelVersion: 4.20
+Contact: "Matan Ziv-Av <matan@svgalib.org>
+Description:
+ Maximal battery charge level. Accepted values are 80 or 100.
+
+What: /sys/devices/platform/lg-laptop/fan_mode
+Date: October 2018
+KernelVersion: 4.20
+Contact: "Matan Ziv-Av <matan@svgalib.org>
+Description:
+ Control fan mode. 1 for performance mode, 0 for silent mode.
+
+What: /sys/devices/platform/lg-laptop/usb_charge
+Date: October 2018
+KernelVersion: 4.20
+Contact: "Matan Ziv-Av <matan@svgalib.org>
+Description:
+ Control USB port charging when device is turned off.
+ 1 means on, 0 means off.
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 8384c68..476722b 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1879,10 +1879,8 @@
wbc_init_bio(@wbc, @bio)
Should be called for each bio carrying writeback data and
- associates the bio with the inode's owner cgroup and the
- corresponding request queue. This must be called after
- a queue (device) has been associated with the bio and
- before submission.
+ associates the bio with the inode's owner cgroup. Can be
+ called anytime between bio allocation and submission.
wbc_account_io(@wbc, @page, @bytes)
Should be called for each data segment being written out.
@@ -1901,7 +1899,7 @@
the writeback session is holding shared resources, e.g. a journal
entry, may lead to priority inversion. There is no one easy solution
for the problem. Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
+cases by skipping wbc_init_bio() or using bio_associate_blkcg()
directly.
diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index 5969bf4..8763866 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -183,6 +183,10 @@
void (*describe)(const struct key *key, struct seq_file *m);
void (*destroy)(void *payload);
+ int (*query)(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info);
+ int (*eds_op)(struct kernel_pkey_params *params,
+ const void *in, void *out);
int (*verify_signature)(const struct key *key,
const struct public_key_signature *sig);
};
@@ -207,12 +211,22 @@
asymmetric key will look after freeing the fingerprint and releasing the
reference on the subtype module.
- (3) verify_signature().
+ (3) query().
- Optional. These are the entry points for the key usage operations.
- Currently there is only the one defined. If not set, the caller will be
- given -ENOTSUPP. The subtype may do anything it likes to implement an
- operation, including offloading to hardware.
+ Mandatory. This is a function for querying the capabilities of a key.
+
+ (4) eds_op().
+
+ Optional. This is the entry point for the encryption, decryption and
+ signature creation operations (which are distinguished by the operation ID
+ in the parameter struct). The subtype may do anything it likes to
+ implement an operation, including offloading to hardware.
+
+ (5) verify_signature().
+
+ Optional. This is the entry point for signature verification. The
+ subtype may do anything it likes to implement an operation, including
+ offloading to hardware.
==========================
@@ -234,6 +248,8 @@
- X.509 ASN.1 stream.
- Pointer to TPM key.
- Pointer to UEFI key.
+ - PKCS#8 private key [RFC 5208].
+ - PKCS#5 encrypted private key [RFC 2898].
During key instantiation each parser in the list is tried until one doesn't
return -EBADMSG.
diff --git a/Documentation/devicetree/bindings/arm/cpu-capacity.txt b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
index 9b5685a..84262cd 100644
--- a/Documentation/devicetree/bindings/arm/cpu-capacity.txt
+++ b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
@@ -59,9 +59,11 @@
===========================================
Example 1 (ARM 64-bit, 6-cpu system, two clusters):
-capacities-dmips-mhz are scaled w.r.t. 1024 (cpu@0 and cpu@1)
-supposing cluster0@max-freq=1100 and custer1@max-freq=850,
-final capacities are 1024 for cluster0 and 446 for cluster1
+The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024)
+are 1024 and 578 for cluster0 and cluster1. Further normalization
+is done by the operating system based on cluster0@max-freq=1100 and
+custer1@max-freq=850, final capacities are 1024 for cluster0 and
+446 for cluster1 (576*850/1100).
cpus {
#address-cells = <2>;
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
similarity index 70%
rename from Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt
rename to Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
index a9b3526..513f034 100644
--- a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
@@ -1,20 +1,22 @@
-Innolux TV123WAM 12.3 inch eDP 2K display panel
+Innolux P120ZDG-BF1 12.02 inch eDP 2K display panel
This binding is compatible with the simple-panel binding, which is specified
in simple-panel.txt in this directory.
Required properties:
-- compatible: should be "innolux,tv123wam"
+- compatible: should be "innolux,p120zdg-bf1"
- power-supply: regulator to provide the supply voltage
Optional properties:
- enable-gpios: GPIO pin to enable or disable the panel
- backlight: phandle of the backlight device attached to the panel
+- no-hpd: If HPD isn't hooked up; add this property.
Example:
panel_edp: panel-edp {
- compatible = "innolux,tv123wam";
+ compatible = "innolux,p120zdg-bf1";
enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>;
power-supply = <&pm8916_l2>;
backlight = <&backlight>;
+ no-hpd;
};
diff --git a/Documentation/devicetree/bindings/display/panel/simple-panel.txt b/Documentation/devicetree/bindings/display/panel/simple-panel.txt
index 45a457a..b2b872c 100644
--- a/Documentation/devicetree/bindings/display/panel/simple-panel.txt
+++ b/Documentation/devicetree/bindings/display/panel/simple-panel.txt
@@ -11,6 +11,9 @@
- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
- enable-gpios: GPIO pin to enable or disable the panel
- backlight: phandle of the backlight device attached to the panel
+- no-hpd: This panel is supposed to communicate that it's ready via HPD
+ (hot plug detect) signal, but the signal isn't hooked up so we should
+ hardcode the max delay from the panel spec when powering up the panel.
Example:
diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
index 091c8df..b245363 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
@@ -3,6 +3,7 @@
Required properties:
- compatible :
- "fsl,imx7ulp-lpi2c" for LPI2C compatible with the one integrated on i.MX7ULP soc
+ - "fsl,imx8qxp-lpi2c" for LPI2C compatible with the one integrated on i.MX8QXP soc
- reg : address and length of the lpi2c master registers
- interrupts : lpi2c interrupt
- clocks : lpi2c clock specifier
diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
index 06a363d..b9a1d74 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
@@ -7,6 +7,7 @@
for da850 - compatible = "ti,da850-ecap", "ti,am3352-ecap", "ti,am33xx-ecap";
for dra746 - compatible = "ti,dra746-ecap", "ti,am3352-ecap";
for 66ak2g - compatible = "ti,k2g-ecap", "ti,am3352-ecap";
+ for am654 - compatible = "ti,am654-ecap", "ti,am3352-ecap";
- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
the cells format. The PWM channel index ranges from 0 to 4. The only third
cell flag supported by this binding is PWM_POLARITY_INVERTED.
diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
index e1ef6af..7f31fe7 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
@@ -3,7 +3,9 @@
Required Properties:
- compatible: should be "renesas,pwm-rcar" and one of the following.
- "renesas,pwm-r8a7743": for RZ/G1M
+ - "renesas,pwm-r8a7744": for RZ/G1N
- "renesas,pwm-r8a7745": for RZ/G1E
+ - "renesas,pwm-r8a774a1": for RZ/G2M
- "renesas,pwm-r8a7778": for R-Car M1A
- "renesas,pwm-r8a7779": for R-Car H1
- "renesas,pwm-r8a7790": for R-Car H2
@@ -12,6 +14,8 @@
- "renesas,pwm-r8a7795": for R-Car H3
- "renesas,pwm-r8a7796": for R-Car M3-W
- "renesas,pwm-r8a77965": for R-Car M3-N
+ - "renesas,pwm-r8a77970": for R-Car V3M
+ - "renesas,pwm-r8a77980": for R-Car V3H
- "renesas,pwm-r8a77990": for R-Car E3
- "renesas,pwm-r8a77995": for R-Car D3
- reg: base address and length of the registers block for the PWM.
diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
index d53a167..848a92b 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
@@ -2,13 +2,19 @@
Required Properties:
- - compatible: should be one of the following.
+ - compatible: must contain one or more of the following:
- "renesas,tpu-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible PWM controller.
- "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller.
- "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller.
+ - "renesas,tpu-r8a7744": for R8A7744 (RZ/G1N) compatible PWM controller.
- "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller.
- "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
- - "renesas,tpu": for generic R-Car and RZ/G1 TPU PWM controller.
+ - "renesas,tpu-r8a77970": for R8A77970 (R-Car V3M) compatible PWM
+ controller.
+ - "renesas,tpu-r8a77980": for R8A77980 (R-Car V3H) compatible PWM
+ controller.
+ - "renesas,tpu": for the generic TPU PWM controller; this is a fallback for
+ the entries listed above.
- reg: Base address and length of each memory resource used by the PWM
controller hardware module.
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 51c136c..eef7d9d 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -286,6 +286,12 @@
"trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
for untrusted layers like from a pen drive.
+Note: redirect_dir={off|nofollow|follow(*)} conflicts with metacopy=on, and
+results in an error.
+
+(*) redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
+given.
+
Sharing and copying layers
--------------------------
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 321d74b..cf43bc4 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -623,6 +623,11 @@
On success you get a new struct file sharing the mount/dentry with the
original, on failure - ERR_PTR().
--
+[mandatory]
+ ->clone_file_range() and ->dedupe_file_range have been replaced with
+ ->remap_file_range(). See Documentation/filesystems/vfs.txt for more
+ information.
+--
[recommended]
->lookup() instances doing an equivalent of
if (IS_ERR(inode))
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index a6c6a8a..5f71a25 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -883,8 +883,9 @@
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
- int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
- int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
+ loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
};
@@ -960,11 +961,18 @@
copy_file_range: called by the copy_file_range(2) system call.
- clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
- FICLONE commands.
-
- dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
- command.
+ remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
+ FICLONE and FIDEDUPERANGE commands to remap file ranges. An
+ implementation should remap len bytes at pos_in of the source file into
+ the dest file at pos_out. Implementations must handle callers passing
+ in len == 0; this means "remap to the end of the source file". The
+ return value should the number of bytes remapped, or the usual
+ negative error code if errors occurred before any bytes were remapped.
+ The remap_flags parameter accepts REMAP_FILE_* flags. If
+ REMAP_FILE_DEDUP is set then the implementation must only remap if the
+ requested file ranges have identical contents. If REMAP_CAN_SHORTEN is
+ set, the caller is ok with the implementation shortening the request
+ length to satisfy alignment or EOF requirements (or any other reason).
fadvise: possibly called by the fadvise64() system call.
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 7b6a2b2..8da26c6 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -537,21 +537,6 @@
The third parameter may be a text as in this example, but it may also
be an expanded variable or a macro.
- cc-fullversion
- cc-fullversion is useful when the exact version of gcc is needed.
- One typical use-case is when a specific GCC version is broken.
- cc-fullversion points out a more specific version than cc-version does.
-
- Example:
- #arch/powerpc/Makefile
- $(Q)if test "$(cc-fullversion)" = "040200" ; then \
- echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
- false ; \
- fi
-
- In this example for a specific GCC version the build will error out
- explaining to the user why it stops.
-
cc-cross-prefix
cc-cross-prefix is used to check if there exists a $(CC) in path with
one of the listed prefixes. The first prefix where there exist a
diff --git a/Documentation/laptops/lg-laptop.rst b/Documentation/laptops/lg-laptop.rst
new file mode 100644
index 0000000..e486fe7d
--- /dev/null
+++ b/Documentation/laptops/lg-laptop.rst
@@ -0,0 +1,81 @@
+.. SPDX-License-Identifier: GPL-2.0+
+LG Gram laptop extra features
+=============================
+
+By Matan Ziv-Av <matan@svgalib.org>
+
+
+Hotkeys
+-------
+
+The following FN keys are ignored by the kernel without this driver:
+- FN-F1 (LG control panel) - Generates F15
+- FN-F5 (Touchpad toggle) - Generates F13
+- FN-F6 (Airplane mode) - Generates RFKILL
+- FN-F8 (Keyboard backlight) - Generates F16.
+ This key also changes keyboard backlight mode.
+- FN-F9 (Reader mode) - Generates F14
+
+The rest of the FN key work without a need for a special driver.
+
+
+Reader mode
+-----------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/reader_mode disables/enables
+reader mode. In this mode the screen colors change (blue color reduced),
+and the reader mode indicator LED (on F9 key) turns on.
+
+
+FN Lock
+-------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/fn_lock disables/enables
+FN lock.
+
+
+Battery care limit
+------------------
+
+Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
+sets the maximum capacity to charge the battery. Limiting the charge
+reduces battery capacity loss over time.
+
+This value is reset to 100 when the kernel boots.
+
+
+Fan mode
+--------
+
+Writing 1/0 to /sys/devices/platform/lg-laptop/fan_mode disables/enables
+the fan silent mode.
+
+
+USB charge
+----------
+
+Writing 0/1 to /sys/devices/platform/lg-laptop/usb_charge disables/enables
+charging another device from the USB port while the device is turned off.
+
+This value is reset to 0 when the kernel boots.
+
+
+LEDs
+~~~~
+
+The are two LED devices supported by the driver:
+
+Keyboard backlight
+------------------
+
+A led device named kbd_led controls the keyboard backlight. There are three
+lighting level: off (0), low (127) and high (255).
+
+The keyboard backlight is also controlled by the key combination FN-F8
+which cycles through those levels.
+
+
+Touchpad indicator LED
+----------------------
+
+On the F5 key. Controlled by led device names tpad_led.
diff --git a/Documentation/networking/ice.rst b/Documentation/networking/ice.rst
index 1e4948c..4d118b8 100644
--- a/Documentation/networking/ice.rst
+++ b/Documentation/networking/ice.rst
@@ -20,7 +20,7 @@
The driver is enabled via the standard kernel configuration system,
using the make command::
- make oldconfig/silentoldconfig/menuconfig/etc.
+ make oldconfig/menuconfig/etc.
The driver is located in the menu structure at:
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 163b5ff..32b2157 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -316,6 +316,17 @@
By default it's enabled with a non-zero value. 0 disables F-RTO.
+tcp_fwmark_accept - BOOLEAN
+ If set, incoming connections to listening sockets that do not have a
+ socket mark will set the mark of the accepting socket to the fwmark of
+ the incoming SYN packet. This will cause all packets on that connection
+ (starting from the first SYNACK) to be sent with that fwmark. The
+ listening socket's mark is unchanged. Listening sockets that already
+ have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+ unaffected.
+
+ Default: 0
+
tcp_invalid_ratelimit - INTEGER
Limit the maximal rate for sending duplicate acknowledgments
in response to incoming TCP packets that are for an existing
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 7578085..878ebfd 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -25,6 +25,7 @@
code-of-conduct-interpretation
development-process
submitting-patches
+ programming-language
coding-style
maintainer-pgp-guide
email-clients
diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst
new file mode 100644
index 0000000..e5f5f06
--- /dev/null
+++ b/Documentation/process/programming-language.rst
@@ -0,0 +1,45 @@
+.. _programming_language:
+
+Programming Language
+====================
+
+The kernel is written in the C programming language [c-language]_.
+More precisely, the kernel is typically compiled with ``gcc`` [gcc]_
+under ``-std=gnu89`` [gcc-c-dialect-options]_: the GNU dialect of ISO C90
+(including some C99 features).
+
+This dialect contains many extensions to the language [gnu-extensions]_,
+and many of them are used within the kernel as a matter of course.
+
+There is some support for compiling the kernel with ``clang`` [clang]_
+and ``icc`` [icc]_ for several of the architectures, although at the time
+of writing it is not completed, requiring third-party patches.
+
+Attributes
+----------
+
+One of the common extensions used throughout the kernel are attributes
+[gcc-attribute-syntax]_. Attributes allow to introduce
+implementation-defined semantics to language entities (like variables,
+functions or types) without having to make significant syntactic changes
+to the language (e.g. adding a new keyword) [n2049]_.
+
+In some cases, attributes are optional (i.e. a compiler not supporting them
+should still produce proper code, even if it is slower or does not perform
+as many compile-time checks/diagnostics).
+
+The kernel defines pseudo-keywords (e.g. ``__pure``) instead of using
+directly the GNU attribute syntax (e.g. ``__attribute__((__pure__))``)
+in order to feature detect which ones can be used and/or to shorten the code.
+
+Please refer to ``include/linux/compiler_attributes.h`` for more information.
+
+.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [gcc] https://gcc.gnu.org
+.. [clang] https://clang.llvm.org
+.. [icc] https://software.intel.com/en-us/c-compilers
+.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+
diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 9ce7256..9521c42 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -859,6 +859,7 @@
and either the buffer length or the OtherInfo length exceeds the
allowed length.
+
* Restrict keyring linkage::
long keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring,
@@ -890,6 +891,116 @@
applicable to the asymmetric key type.
+ * Query an asymmetric key::
+
+ long keyctl(KEYCTL_PKEY_QUERY,
+ key_serial_t key_id, unsigned long reserved,
+ struct keyctl_pkey_query *info);
+
+ Get information about an asymmetric key. The information is returned in
+ the keyctl_pkey_query struct::
+
+ __u32 supported_ops;
+ __u32 key_size;
+ __u16 max_data_size;
+ __u16 max_sig_size;
+ __u16 max_enc_size;
+ __u16 max_dec_size;
+ __u32 __spare[10];
+
+ ``supported_ops`` contains a bit mask of flags indicating which ops are
+ supported. This is constructed from a bitwise-OR of::
+
+ KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}
+
+ ``key_size`` indicated the size of the key in bits.
+
+ ``max_*_size`` indicate the maximum sizes in bytes of a blob of data to be
+ signed, a signature blob, a blob to be encrypted and a blob to be
+ decrypted.
+
+ ``__spare[]`` must be set to 0. This is intended for future use to hand
+ over one or more passphrases needed unlock a key.
+
+ If successful, 0 is returned. If the key is not an asymmetric key,
+ EOPNOTSUPP is returned.
+
+
+ * Encrypt, decrypt, sign or verify a blob using an asymmetric key::
+
+ long keyctl(KEYCTL_PKEY_ENCRYPT,
+ const struct keyctl_pkey_params *params,
+ const char *info,
+ const void *in,
+ void *out);
+
+ long keyctl(KEYCTL_PKEY_DECRYPT,
+ const struct keyctl_pkey_params *params,
+ const char *info,
+ const void *in,
+ void *out);
+
+ long keyctl(KEYCTL_PKEY_SIGN,
+ const struct keyctl_pkey_params *params,
+ const char *info,
+ const void *in,
+ void *out);
+
+ long keyctl(KEYCTL_PKEY_VERIFY,
+ const struct keyctl_pkey_params *params,
+ const char *info,
+ const void *in,
+ const void *in2);
+
+ Use an asymmetric key to perform a public-key cryptographic operation a
+ blob of data. For encryption and verification, the asymmetric key may
+ only need the public parts to be available, but for decryption and signing
+ the private parts are required also.
+
+ The parameter block pointed to by params contains a number of integer
+ values::
+
+ __s32 key_id;
+ __u32 in_len;
+ __u32 out_len;
+ __u32 in2_len;
+
+ ``key_id`` is the ID of the asymmetric key to be used. ``in_len`` and
+ ``in2_len`` indicate the amount of data in the in and in2 buffers and
+ ``out_len`` indicates the size of the out buffer as appropriate for the
+ above operations.
+
+ For a given operation, the in and out buffers are used as follows::
+
+ Operation ID in,in_len out,out_len in2,in2_len
+ ======================= =============== =============== ===============
+ KEYCTL_PKEY_ENCRYPT Raw data Encrypted data -
+ KEYCTL_PKEY_DECRYPT Encrypted data Raw data -
+ KEYCTL_PKEY_SIGN Raw data Signature -
+ KEYCTL_PKEY_VERIFY Raw data - Signature
+
+ ``info`` is a string of key=value pairs that supply supplementary
+ information. These include:
+
+ ``enc=<encoding>`` The encoding of the encrypted/signature blob. This
+ can be "pkcs1" for RSASSA-PKCS1-v1.5 or
+ RSAES-PKCS1-v1.5; "pss" for "RSASSA-PSS"; "oaep" for
+ "RSAES-OAEP". If omitted or is "raw", the raw output
+ of the encryption function is specified.
+
+ ``hash=<algo>`` If the data buffer contains the output of a hash
+ function and the encoding includes some indication of
+ which hash function was used, the hash function can be
+ specified with this, eg. "hash=sha256".
+
+ The ``__spare[]`` space in the parameter block must be set to 0. This is
+ intended, amongst other things, to allow the passing of passphrases
+ required to unlock a key.
+
+ If successful, encrypt, decrypt and sign all return the amount of data
+ written into the output buffer. Verification returns 0 on success.
+
+
Kernel Services
===============
@@ -1483,6 +1594,112 @@
attempted key link operation. If there is no match, -EINVAL is returned.
+ * ``int (*asym_eds_op)(struct kernel_pkey_params *params,
+ const void *in, void *out);``
+ ``int (*asym_verify_signature)(struct kernel_pkey_params *params,
+ const void *in, const void *in2);``
+
+ These methods are optional. If provided the first allows a key to be
+ used to encrypt, decrypt or sign a blob of data, and the second allows a
+ key to verify a signature.
+
+ In all cases, the following information is provided in the params block::
+
+ struct kernel_pkey_params {
+ struct key *key;
+ const char *encoding;
+ const char *hash_algo;
+ char *info;
+ __u32 in_len;
+ union {
+ __u32 out_len;
+ __u32 in2_len;
+ };
+ enum kernel_pkey_operation op : 8;
+ };
+
+ This includes the key to be used; a string indicating the encoding to use
+ (for instance, "pkcs1" may be used with an RSA key to indicate
+ RSASSA-PKCS1-v1.5 or RSAES-PKCS1-v1.5 encoding or "raw" if no encoding);
+ the name of the hash algorithm used to generate the data for a signature
+ (if appropriate); the sizes of the input and output (or second input)
+ buffers; and the ID of the operation to be performed.
+
+ For a given operation ID, the input and output buffers are used as
+ follows::
+
+ Operation ID in,in_len out,out_len in2,in2_len
+ ======================= =============== =============== ===============
+ kernel_pkey_encrypt Raw data Encrypted data -
+ kernel_pkey_decrypt Encrypted data Raw data -
+ kernel_pkey_sign Raw data Signature -
+ kernel_pkey_verify Raw data - Signature
+
+ asym_eds_op() deals with encryption, decryption and signature creation as
+ specified by params->op. Note that params->op is also set for
+ asym_verify_signature().
+
+ Encrypting and signature creation both take raw data in the input buffer
+ and return the encrypted result in the output buffer. Padding may have
+ been added if an encoding was set. In the case of signature creation,
+ depending on the encoding, the padding created may need to indicate the
+ digest algorithm - the name of which should be supplied in hash_algo.
+
+ Decryption takes encrypted data in the input buffer and returns the raw
+ data in the output buffer. Padding will get checked and stripped off if
+ an encoding was set.
+
+ Verification takes raw data in the input buffer and the signature in the
+ second input buffer and checks that the one matches the other. Padding
+ will be validated. Depending on the encoding, the digest algorithm used
+ to generate the raw data may need to be indicated in hash_algo.
+
+ If successful, asym_eds_op() should return the number of bytes written
+ into the output buffer. asym_verify_signature() should return 0.
+
+ A variety of errors may be returned, including EOPNOTSUPP if the operation
+ is not supported; EKEYREJECTED if verification fails; ENOPKG if the
+ required crypto isn't available.
+
+
+ * ``int (*asym_query)(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info);``
+
+ This method is optional. If provided it allows information about the
+ public or asymmetric key held in the key to be determined.
+
+ The parameter block is as for asym_eds_op() and co. but in_len and out_len
+ are unused. The encoding and hash_algo fields should be used to reduce
+ the returned buffer/data sizes as appropriate.
+
+ If successful, the following information is filled in::
+
+ struct kernel_pkey_query {
+ __u32 supported_ops;
+ __u32 key_size;
+ __u16 max_data_size;
+ __u16 max_sig_size;
+ __u16 max_enc_size;
+ __u16 max_dec_size;
+ };
+
+ The supported_ops field will contain a bitmask indicating what operations
+ are supported by the key, including encryption of a blob, decryption of a
+ blob, signing a blob and verifying the signature on a blob. The following
+ constants are defined for this::
+
+ KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}
+
+ The key_size field is the size of the key in bits. max_data_size and
+ max_sig_size are the maximum raw data and signature sizes for creation and
+ verification of a signature; max_enc_size and max_dec_size are the maximum
+ raw data and signature sizes for encryption and decryption. The
+ max_*_size fields are measured in bytes.
+
+ If successful, 0 will be returned. If the key doesn't support this,
+ EOPNOTSUPP will be returned.
+
+
Request-Key Callback Service
============================
diff --git a/Documentation/security/self-protection.rst b/Documentation/security/self-protection.rst
index e1ca698..f584fb7 100644
--- a/Documentation/security/self-protection.rst
+++ b/Documentation/security/self-protection.rst
@@ -302,11 +302,11 @@
Memory poisoning
----------------
-When releasing memory, it is best to poison the contents (clear stack on
-syscall return, wipe heap memory on a free), to avoid reuse attacks that
-rely on the old contents of memory. This frustrates many uninitialized
-variable attacks, stack content exposures, heap content exposures, and
-use-after-free attacks.
+When releasing memory, it is best to poison the contents, to avoid reuse
+attacks that rely on the old contents of memory. E.g., clear stack on a
+syscall return (``CONFIG_GCC_PLUGIN_STACKLEAK``), wipe heap memory on a
+free. This frustrates many uninitialized variable attacks, stack content
+exposures, heap content exposures, and use-after-free attacks.
Destination tracking
--------------------
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 37a6795..1b87752 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -89,6 +89,7 @@
- shmmni
- softlockup_all_cpu_backtrace
- soft_watchdog
+- stack_erasing
- stop-a [ SPARC only ]
- sysrq ==> Documentation/admin-guide/sysrq.rst
- sysctl_writes_strict
@@ -987,6 +988,23 @@
==============================================================
+stack_erasing
+
+This parameter can be used to control kernel stack erasing at the end
+of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+
+That erasing reduces the information which kernel stack leak bugs
+can reveal and blocks some uninitialized stack variable attacks.
+The tradeoff is the performance impact: on a single CPU system kernel
+compilation sees a 1% slowdown, other systems and workloads may vary.
+
+ 0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+
+ 1: kernel stack erasing is enabled (default), it is performed before
+ returning to the userspace at the end of syscalls.
+
+==============================================================
+
tainted:
Non-zero if the kernel has been tainted. Numeric values, which can be
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 7028986..73aaaa3 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -146,3 +146,6 @@
Be very careful vs. KASLR when changing anything here. The KASLR address
range must not overlap with anything except the KASAN shadow area, which is
correct as KASAN disables KASLR.
+
+For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB
+hole: ffffffffffff4111
diff --git a/MAINTAINERS b/MAINTAINERS
index 1c0f771..f485597 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -376,7 +376,7 @@
ACPI PMIC DRIVERS
M: "Rafael J. Wysocki" <rjw@rjwysocki.net>
M: Len Brown <lenb@kernel.org>
-R: Andy Shevchenko <andy@infradead.org>
+R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
R: Mika Westerberg <mika.westerberg@linux.intel.com>
L: linux-acpi@vger.kernel.org
Q: https://patchwork.kernel.org/project/linux-acpi/list/
@@ -3737,6 +3737,11 @@
S: Maintained
F: drivers/platform/x86/compal-laptop.c
+COMPILER ATTRIBUTES
+M: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+S: Maintained
+F: include/linux/compiler_attributes.h
+
CONEXANT ACCESSRUNNER USB DRIVER
L: accessrunner-general@lists.sourceforge.net
W: http://accessrunner.sourceforge.net/
@@ -4207,6 +4212,12 @@
S: Maintained
F: drivers/platform/x86/dell-rbtn.*
+DELL REMOTE BIOS UPDATE DRIVER
+M: Stuart Hayes <stuart.w.hayes@gmail.com>
+L: platform-driver-x86@vger.kernel.org
+S: Maintained
+F: drivers/platform/x86/dell_rbu.c
+
DELL LAPTOP SMM DRIVER
M: Pali Rohár <pali.rohar@gmail.com>
S: Maintained
@@ -4214,10 +4225,11 @@
F: include/uapi/linux/i8k.h
DELL SYSTEMS MANAGEMENT BASE DRIVER (dcdbas)
-M: Doug Warzecha <Douglas_Warzecha@dell.com>
+M: Stuart Hayes <stuart.w.hayes@gmail.com>
+L: platform-driver-x86@vger.kernel.org
S: Maintained
F: Documentation/dcdbas.txt
-F: drivers/firmware/dcdbas.*
+F: drivers/platform/x86/dcdbas.*
DELL WMI NOTIFICATIONS DRIVER
M: Matthew Garrett <mjg59@srcf.ucam.org>
@@ -5871,6 +5883,14 @@
S: Maintained
F: drivers/i2c/busses/i2c-cpm.c
+FREESCALE IMX LPI2C DRIVER
+M: Dong Aisheng <aisheng.dong@nxp.com>
+L: linux-i2c@vger.kernel.org
+L: linux-imx@nxp.com
+S: Maintained
+F: drivers/i2c/busses/i2c-imx-lpi2c.c
+F: Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+
FREESCALE IMX / MXC FEC DRIVER
M: Fugang Duan <fugang.duan@nxp.com>
L: netdev@vger.kernel.org
@@ -7347,6 +7367,12 @@
S: Supported
F: sound/soc/intel/
+INTEL ATOMISP2 DUMMY / POWER-MANAGEMENT DRIVER
+M: Hans de Goede <hdegoede@redhat.com>
+L: platform-driver-x86@vger.kernel.org
+S: Maintained
+F: drivers/platform/x86/intel_atomisp2_pm.c
+
INTEL C600 SERIES SAS CONTROLLER DRIVER
M: Intel SCU Linux support <intel-linux-scu@intel.com>
M: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
@@ -7533,7 +7559,6 @@
M: Vishwanath Somayaji <vishwanath.somayaji@intel.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
-F: arch/x86/include/asm/pmc_core.h
F: drivers/platform/x86/intel_pmc_core*
INTEL PMC/P-Unit IPC DRIVER
@@ -7577,7 +7602,8 @@
F: include/uapi/rdma/i40iw-abi.h
INTEL TELEMETRY DRIVER
-M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
+M: Rajneesh Bhardwaj <rajneesh.bhardwaj@linux.intel.com>
+M: "David E. Box" <david.e.box@linux.intel.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: arch/x86/include/asm/intel_telemetry.h
@@ -8310,6 +8336,14 @@
S: Maintained
F: drivers/usb/misc/legousbtower.c
+LG LAPTOP EXTRAS
+M: Matan Ziv-Av <matan@svgalib.org>
+L: platform-driver-x86@vger.kernel.org
+S: Maintained
+F: Documentation/ABI/testing/sysfs-platform-lg-laptop
+F: Documentation/laptops/lg-laptop.rst
+F: drivers/platform/x86/lg-laptop.c
+
LG2160 MEDIA DRIVER
M: Michael Krufky <mkrufky@linuxtv.org>
L: linux-media@vger.kernel.org
@@ -15829,7 +15863,6 @@
F: net/vmw_vsock/virtio_transport.c
F: drivers/net/vsockmon.c
F: drivers/vhost/vsock.c
-F: drivers/vhost/vsock.h
F: tools/testing/vsock/
VIRTIO CONSOLE DRIVER
diff --git a/Makefile b/Makefile
index 9aa352b..bce41f41 100644
--- a/Makefile
+++ b/Makefile
@@ -485,7 +485,7 @@
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile $(srctree)
endif
-ifeq ($(cc-name),clang)
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
ifneq ($(CROSS_COMPILE),)
CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%))
GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD)))
@@ -702,7 +702,7 @@
KBUILD_CFLAGS += $(stackp-flags-y)
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
diff --git a/arch/Kconfig b/arch/Kconfig
index ed27fd2..e1e540f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -429,6 +429,13 @@
See Documentation/userspace-api/seccomp_filter.rst for details.
+config HAVE_ARCH_STACKLEAK
+ bool
+ help
+ An architecture should select this if it has the code which
+ fills the used part of the kernel stack with the STACKLEAK_POISON
+ value before returning from system calls.
+
config HAVE_STACKPROTECTOR
bool
help
diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index c50c36b..8bf1c17 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -923,7 +923,7 @@
interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&rcc HASH1>;
resets = <&rcc HASH1_R>;
- dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0 0x0>;
+ dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0>;
dma-names = "in";
dma-maxburst = <2>;
status = "disabled";
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 63af623..1c76168 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1,6 +1,7 @@
CONFIG_SYSVIPC=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
CONFIG_CGROUPS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EMBEDDED=y
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index af318d9..3d191fd 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -773,7 +773,7 @@ static struct plat_serial8250_port ams_delta_modem_ports[] = {
{
.membase = IOMEM(MODEM_VIRT),
.mapbase = MODEM_PHYS,
- .irq = -EINVAL, /* changed later */
+ .irq = IRQ_NOTCONNECTED, /* changed later */
.flags = UPF_BOOT_AUTOCONF,
.irqflags = IRQF_TRIGGER_RISING,
.iotype = UPIO_MEM,
@@ -864,8 +864,7 @@ static int __init modem_nreset_init(void)
/*
- * This function expects MODEM IRQ number already assigned to the port
- * and fails if it's not.
+ * This function expects MODEM IRQ number already assigned to the port.
* The MODEM device requires its RESET# pin kept high during probe.
* That requirement can be fulfilled in several ways:
* - with a descriptor of already functional modem_nreset regulator
@@ -888,9 +887,6 @@ static int __init ams_delta_modem_init(void)
if (!machine_is_ams_delta())
return -ENODEV;
- if (ams_delta_modem_ports[0].irq < 0)
- return ams_delta_modem_ports[0].irq;
-
omap_cfg_reg(M14_1510_GPIO2);
/* Initialize the modem_nreset regulator consumer before use */
diff --git a/arch/arm/plat-orion/mpp.c b/arch/arm/plat-orion/mpp.c
index 5b4ff93..8a6880d 100644
--- a/arch/arm/plat-orion/mpp.c
+++ b/arch/arm/plat-orion/mpp.c
@@ -28,10 +28,15 @@ void __init orion_mpp_conf(unsigned int *mpp_list, unsigned int variant_mask,
unsigned int mpp_max, void __iomem *dev_bus)
{
unsigned int mpp_nr_regs = (1 + mpp_max/8);
- u32 mpp_ctrl[mpp_nr_regs];
+ u32 mpp_ctrl[8];
int i;
printk(KERN_DEBUG "initial MPP regs:");
+ if (mpp_nr_regs > ARRAY_SIZE(mpp_ctrl)) {
+ printk(KERN_ERR "orion_mpp_conf: invalid mpp_max\n");
+ return;
+ }
+
for (i = 0; i < mpp_nr_regs; i++) {
mpp_ctrl[i] = readl(mpp_ctrl_addr(i, dev_bus));
printk(" %08x", mpp_ctrl[i]);
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b4e994c..6cb9fc7 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -134,6 +134,7 @@
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
+ifeq ($(KBUILD_EXTMOD),)
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
# In order to do that, we should use the archprepare target, but we can't since
# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
@@ -143,6 +144,7 @@
prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
+endif
define archhelp
echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 3cb9956..c9a57d1 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -308,6 +308,9 @@
CONFIG_SERIAL_MVEBU_UART=y
CONFIG_SERIAL_DEV_BUS=y
CONFIG_VIRTIO_CONSOLE=y
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
CONFIG_TCG_TPM=y
CONFIG_TCG_TIS_I2C_INFINEON=y
CONFIG_I2C_CHARDEV=y
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 9234013..21a81b5 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -96,6 +96,7 @@ static inline unsigned long __percpu_##op(void *ptr, \
: [val] "Ir" (val)); \
break; \
default: \
+ ret = 0; \
BUILD_BUG(); \
} \
\
@@ -125,6 +126,7 @@ static inline unsigned long __percpu_read(void *ptr, int size)
ret = READ_ONCE(*(u64 *)ptr);
break;
default:
+ ret = 0;
BUILD_BUG();
}
@@ -194,6 +196,7 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
: [val] "r" (val));
break;
default:
+ ret = 0;
BUILD_BUG();
}
diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
index f46d57c..6b5037e 100644
--- a/arch/arm64/kernel/crash_dump.c
+++ b/arch/arm64/kernel/crash_dump.c
@@ -58,7 +58,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
/**
* elfcorehdr_read - read from ELF core header
* @buf: buffer where the data is placed
- * @csize: number of bytes to read
+ * @count: number of bytes to read
* @ppos: address in the memory
*
* This function reads @count bytes from elf core header which exists
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 9b65132..2a5b338 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -23,7 +23,9 @@
#include <linux/slab.h>
#include <linux/stop_machine.h>
#include <linux/sched/debug.h>
+#include <linux/set_memory.h>
#include <linux/stringify.h>
+#include <linux/vmalloc.h>
#include <asm/traps.h>
#include <asm/ptrace.h>
#include <asm/cacheflush.h>
@@ -42,10 +44,21 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+{
+ void *addrs[1];
+ u32 insns[1];
+
+ addrs[0] = addr;
+ insns[0] = opcode;
+
+ return aarch64_insn_patch_text(addrs, insns, 1);
+}
+
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
/* prepare insn slot */
- p->ainsn.api.insn[0] = cpu_to_le32(p->opcode);
+ patch_text(p->ainsn.api.insn, p->opcode);
flush_icache_range((uintptr_t) (p->ainsn.api.insn),
(uintptr_t) (p->ainsn.api.insn) +
@@ -118,15 +131,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}
-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+void *alloc_insn_page(void)
{
- void *addrs[1];
- u32 insns[1];
+ void *page;
- addrs[0] = (void *)addr;
- insns[0] = (u32)opcode;
+ page = vmalloc_exec(PAGE_SIZE);
+ if (page)
+ set_memory_ro((unsigned long)page, 1);
- return aarch64_insn_patch_text(addrs, insns, 1);
+ return page;
}
/* arm kprobe: install breakpoint in text */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index ce99c58..d9a4c2d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -497,25 +497,3 @@ void arch_setup_new_exec(void)
{
current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
}
-
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
-void __used stackleak_check_alloca(unsigned long size)
-{
- unsigned long stack_left;
- unsigned long current_sp = current_stack_pointer;
- struct stack_info info;
-
- BUG_ON(!on_accessible_stack(current, current_sp, &info));
-
- stack_left = current_sp - info.low;
-
- /*
- * There's a good chance we're almost out of stack space if this
- * is true. Using panic() over BUG() is more likely to give
- * reliable debugging output.
- */
- if (size >= stack_left)
- panic("alloca() over the kernel stack boundary\n");
-}
-EXPORT_SYMBOL(stackleak_check_alloca);
-#endif
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3a703e5..a3ac262 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -160,6 +160,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
__dma_unmap_area(phys_to_virt(paddr), size, dir);
}
+#ifdef CONFIG_IOMMU_DMA
static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
struct page *page, size_t size)
{
@@ -188,6 +189,7 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
return ret;
}
+#endif /* CONFIG_IOMMU_DMA */
static int __init atomic_pool_init(void)
{
diff --git a/arch/csky/Kconfig.debug b/arch/csky/Kconfig.debug
index 48cf6ff..22a162cd 100644
--- a/arch/csky/Kconfig.debug
+++ b/arch/csky/Kconfig.debug
@@ -1,9 +1 @@
-menu "C-SKY Debug Options"
-config CSKY_BUILTIN_DTB
- string "Use kernel builtin dtb"
- help
- User could define the dtb instead of the one which is passed from
- bootloader.
- Sometimes for debug, we want to use a built-in dtb and then we needn't
- modify bootloader at all.
-endmenu
+# dummy file, do not delete
diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 67a4ae1..c639fc1 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -65,26 +65,15 @@
$(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
boot := arch/csky/boot
-ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""'
core-y += $(boot)/dts/
-endif
all: zImage
-
-dtbs: scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts
-
-%.dtb %.dtb.S %.dtb.o: scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-zImage Image uImage: vmlinux dtbs
+zImage Image uImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
- $(Q)$(MAKE) $(clean)=$(boot)/dts
- rm -rf arch/csky/include/generated
define archhelp
echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
diff --git a/arch/csky/boot/dts/Makefile b/arch/csky/boot/dts/Makefile
index 305e81a..c57ad3c 100644
--- a/arch/csky/boot/dts/Makefile
+++ b/arch/csky/boot/dts/Makefile
@@ -1,13 +1,3 @@
dtstree := $(srctree)/$(src)
-ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""'
-builtindtb-y := $(patsubst "%",%,$(CONFIG_CSKY_BUILTIN_DTB))
-dtb-y += $(builtindtb-y).dtb
-obj-y += $(builtindtb-y).dtb.o
-.SECONDARY: $(obj)/$(builtindtb-y).dtb.S
-else
dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
-endif
-
-always += $(dtb-y)
-clean-files += *.dtb *.dtb.S
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 15a84cf..6841049 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -128,7 +128,7 @@
# clang's output will be based upon the build machine. So for clang we simply
# unconditionally specify -EB or -EL as appropriate.
#
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
cflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB
cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -EL
else
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 34605ca..58a0315 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -10,7 +10,7 @@
$(filter -march=%,$(KBUILD_CFLAGS)) \
-D__VDSO__
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
endif
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d51b2b..8be3126 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -930,10 +930,6 @@
help
Freescale General-purpose Timers support
-# Yes MCA RS/6000s exist but Linux-PPC does not currently support any
-config MCA
- bool
-
# Platforms that what PCI turned unconditionally just do select PCI
# in their config node. Platforms that want to choose at config
# time should select PPC_PCI_CHOICE
@@ -944,7 +940,6 @@
bool "PCI support" if PPC_PCI_CHOICE
default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \
&& !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON
- default PCI_QSPAN if PPC_8xx
select GENERIC_PCI_IOMAP
help
Find out whether your system includes a PCI bus. PCI is the name of
@@ -958,14 +953,6 @@
config PCI_SYSCALL
def_bool PCI
-config PCI_QSPAN
- bool "QSpan PCI"
- depends on PPC_8xx
- select PPC_I8259
- help
- Say Y here if you have a system based on a Motorola 8xx-series
- embedded processor with a QSPAN PCI interface, otherwise say N.
-
config PCI_8260
bool
depends on PCI && 8260
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 17be664..8a2ce14 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -96,7 +96,7 @@
aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2
endif
-ifneq ($(cc-name),clang)
+ifndef CONFIG_CC_IS_CLANG
cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
endif
@@ -175,7 +175,7 @@
# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
-ifneq ($(cc-name),clang)
+ifndef CONFIG_CC_IS_CLANG
CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
endif
endif
diff --git a/arch/powerpc/boot/dts/fsl/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
index 55c0210..092a400 100644
--- a/arch/powerpc/boot/dts/fsl/t2080rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
@@ -77,12 +77,12 @@
};
ethernet@f0000 {
- phy-handle = <&xg_cs4315_phy1>;
+ phy-handle = <&xg_cs4315_phy2>;
phy-connection-type = "xgmii";
};
ethernet@f2000 {
- phy-handle = <&xg_cs4315_phy2>;
+ phy-handle = <&xg_cs4315_phy1>;
phy-connection-type = "xgmii";
};
diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts
index 5b037f5..3aa300a 100644
--- a/arch/powerpc/boot/dts/mpc885ads.dts
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -72,7 +72,7 @@
#address-cells = <1>;
#size-cells = <1>;
device_type = "soc";
- ranges = <0x0 0xff000000 0x4000>;
+ ranges = <0x0 0xff000000 0x28000>;
bus-frequency = <0>;
// Temporary -- will go away once kernel uses ranges for get_immrbase().
@@ -224,6 +224,17 @@
#size-cells = <0>;
};
};
+
+ crypto@20000 {
+ compatible = "fsl,sec1.2", "fsl,sec1.0";
+ reg = <0x20000 0x8000>;
+ interrupts = <1 1>;
+ interrupt-parent = <&PIC>;
+ fsl,num-channels = <1>;
+ fsl,channel-fifo-len = <24>;
+ fsl,exec-units-mask = <0x4c>;
+ fsl,descriptor-types-mask = <0x05000154>;
+ };
};
chosen {
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 31733a9..3d5acd2 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -36,6 +36,11 @@ int raw_patch_instruction(unsigned int *addr, unsigned int instr);
int patch_instruction_site(s32 *addr, unsigned int instr);
int patch_branch_site(s32 *site, unsigned long target, int flags);
+static inline unsigned long patch_site_addr(s32 *site)
+{
+ return (unsigned long)site + *site;
+}
+
int instr_is_relative_branch(unsigned int instr);
int instr_is_relative_link_branch(unsigned int instr);
int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 4f54775..fa05aa5 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -34,20 +34,12 @@
* respectively NA for All or X for Supervisor and no access for User.
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
- * When that bit is not set access is done iaw "all user"
- * which means no access iaw page rules.
- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
- * 0x => No access => 11 (all accesses performed as user iaw page definition)
- * 10 => No user => 01 (all accesses performed according to page definition)
- * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * Therefore, we define 2 APG groups. lsb is _PMD_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
* We define all 16 groups so that all other bits of APG can take any value
*/
-#ifdef CONFIG_SWAP
-#define MI_APG_INIT 0xf4f4f4f4
-#else
#define MI_APG_INIT 0x44444444
-#endif
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -115,20 +107,12 @@
* Supervisor and no access for user and NA for ALL.
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
- * When that bit is not set access is done iaw "all user"
- * which means no access iaw page rules.
- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
- * 0x => No access => 11 (all accesses performed as user iaw page definition)
- * 10 => No user => 01 (all accesses performed according to page definition)
- * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * Therefore, we define 2 APG groups. lsb is _PMD_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
* We define all 16 groups so that all other bits of APG can take any value
*/
-#ifdef CONFIG_SWAP
-#define MD_APG_INIT 0xf4f4f4f4
-#else
#define MD_APG_INIT 0x44444444
-#endif
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
@@ -180,12 +164,6 @@
*/
#define SPRN_M_TW 799
-/* APGs */
-#define M_APG0 0x00000000
-#define M_APG1 0x00000020
-#define M_APG2 0x00000040
-#define M_APG3 0x00000060
-
#ifdef CONFIG_PPC_MM_SLICES
#include <asm/nohash/32/slice.h>
#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
@@ -251,6 +229,15 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
BUG();
}
+/* patch sites */
+extern s32 patch__itlbmiss_linmem_top;
+extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp;
+extern s32 patch__fixupdar_linmem_top;
+
+extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2;
+extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3;
+extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
+
#endif /* !__ASSEMBLY__ */
#if defined(CONFIG_PPC_4K_PAGES)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index bb38dd6..1b06add 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -5,6 +5,7 @@
#include <linux/spinlock.h>
#include <asm/page.h>
#include <linux/time.h>
+#include <linux/cpumask.h>
/*
* Definitions for talking to the RTAS on CHRP machines.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 134a573..3b67b95 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -31,6 +31,7 @@
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/export.h>
+#include <asm/code-patching-asm.h>
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
@@ -318,8 +319,8 @@
cmpli cr0, r11, PAGE_OFFSET@h
#ifndef CONFIG_PIN_TLB_TEXT
/* It is assumed that kernel code fits into the first 8M page */
-_ENTRY(ITLBMiss_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+0: cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+ patch_site 0b, patch__itlbmiss_linmem_top
#endif
#endif
#endif
@@ -353,13 +354,14 @@
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtcr r12
#endif
-
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
/* Load the MI_TWC with the attributes for this "segment." */
mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
+#ifdef CONFIG_SWAP
+ rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ and r11, r11, r10
+ rlwimi r10, r11, 0, _PAGE_PRESENT
+#endif
li r11, RPN_PATTERN | 0x200
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
@@ -372,16 +374,17 @@
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-_ENTRY(itlb_miss_exit_1)
- mfspr r10, SPRN_SPRG_SCRATCH0
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r12, SPRN_SPRG_SCRATCH2
#endif
rfi
+ patch_site 0b, patch__itlbmiss_exit_1
+
#ifdef CONFIG_PERF_EVENTS
-_ENTRY(itlb_miss_perf)
- lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+ patch_site 0f, patch__itlbmiss_perf
+0: lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -435,11 +438,11 @@
#ifndef CONFIG_PIN_TLB_IMMR
cmpli cr0, r11, VIRT_IMMR_BASE@h
#endif
-_ENTRY(DTLBMiss_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+ patch_site 0b, patch__dtlbmiss_linmem_top
#ifndef CONFIG_PIN_TLB_IMMR
-_ENTRY(DTLBMiss_jmp)
- beq- DTLBMissIMMR
+0: beq- DTLBMissIMMR
+ patch_site 0b, patch__dtlbmiss_immr_jmp
#endif
blt cr7, DTLBMissLinear
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
@@ -470,14 +473,22 @@
* above.
*/
rlwimi r11, r10, 0, _PAGE_GUARDED
-#ifdef CONFIG_SWAP
- /* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
- * on that bit will represent a Non Access group
- */
- rlwinm r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
mtspr SPRN_MD_TWC, r11
+ /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
+ * We also need to know if the insn is a load/store, so:
+ * Clear _PAGE_PRESENT and load that which will
+ * trap into DTLB Error with store bit set accordinly.
+ */
+ /* PRESENT=0x1, ACCESSED=0x20
+ * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
+ * r10 = (r10 & ~PRESENT) | r11;
+ */
+#ifdef CONFIG_SWAP
+ rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ and r11, r11, r10
+ rlwimi r10, r11, 0, _PAGE_PRESENT
+#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
@@ -489,14 +500,16 @@
/* Restore registers */
mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_1)
- mfspr r10, SPRN_SPRG_SCRATCH0
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__dtlbmiss_exit_1
+
#ifdef CONFIG_PERF_EVENTS
-_ENTRY(dtlb_miss_perf)
- lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+ patch_site 0f, patch__dtlbmiss_perf
+0: lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -637,8 +650,8 @@
*/
DTLBMissIMMR:
mtcr r12
- /* Set 512k byte guarded page and mark it valid and accessed */
- li r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+ /* Set 512k byte guarded page and mark it valid */
+ li r10, MD_PS512K | MD_GUARDED | MD_SVALID
mtspr SPRN_MD_TWC, r10
mfspr r10, SPRN_IMMR /* Get current IMMR */
rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
@@ -648,16 +661,17 @@
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_2)
- mfspr r10, SPRN_SPRG_SCRATCH0
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__dtlbmiss_exit_2
DTLBMissLinear:
mtcr r12
- /* Set 8M byte page and mark it valid and accessed */
- li r11, MD_PS8MEG | MD_SVALID | M_APG2
+ /* Set 8M byte page and mark it valid */
+ li r11, MD_PS8MEG | MD_SVALID
mtspr SPRN_MD_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
@@ -666,28 +680,29 @@
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
-_ENTRY(dtlb_miss_exit_3)
- mfspr r10, SPRN_SPRG_SCRATCH0
+
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__dtlbmiss_exit_3
#ifndef CONFIG_PIN_TLB_TEXT
ITLBMissLinear:
mtcr r12
- /* Set 8M byte page and mark it valid,accessed */
- li r11, MI_PS8MEG | MI_SVALID | M_APG2
+ /* Set 8M byte page and mark it valid */
+ li r11, MI_PS8MEG | MI_SVALID
mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
-_ENTRY(itlb_miss_exit_2)
- mfspr r10, SPRN_SPRG_SCRATCH0
+0: mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
+ patch_site 0b, patch__itlbmiss_exit_2
#endif
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
@@ -705,8 +720,10 @@
mfspr r11, SPRN_M_TW /* Get level 1 table */
blt+ 3f
rlwinm r11, r10, 16, 0xfff8
-_ENTRY(FixupDAR_cmp)
- cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+
+0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+ patch_site 0b, patch__fixupdar_linmem_top
+
/* create physical page address from effective address */
tophys(r11, r10)
blt- cr7, 201f
@@ -960,7 +977,7 @@
ori r8, r8, MI_EVALID /* Mark it valid */
mtspr SPRN_MI_EPN, r8
li r8, MI_PS8MEG /* Set 8M byte page */
- ori r8, r8, MI_SVALID | M_APG2 /* Make it valid, APG 2 */
+ ori r8, r8, MI_SVALID /* Make it valid */
mtspr SPRN_MI_TWC, r8
li r8, MI_BOOTINIT /* Create RPN for address 0 */
mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
@@ -987,7 +1004,7 @@
ori r8, r8, MD_EVALID /* Mark it valid */
mtspr SPRN_MD_EPN, r8
li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */
- ori r8, r8, MD_SVALID | M_APG2 /* Make it valid and accessed */
+ ori r8, r8, MD_SVALID /* Make it valid */
mtspr SPRN_MD_TWC, r8
mr r8, r9 /* Create paddr for TLB */
ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4d5322c..96f3473 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -590,12 +590,11 @@ void flush_all_to_thread(struct task_struct *tsk)
if (tsk->thread.regs) {
preempt_disable();
BUG_ON(tsk != current);
- save_all(tsk);
-
#ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
#endif
+ save_all(tsk);
preempt_enable();
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index bf8def2..d65b961 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2337,8 +2337,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
kvmppc_core_prepare_to_enter(vcpu);
return;
}
- dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
- / tb_ticks_per_sec;
+ dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
vcpu->arch.timer_running = 1;
}
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index fa888bf..9f5b8c0 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -61,11 +61,10 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
dec_time = vcpu->arch.dec;
/*
- * Guest timebase ticks at the same frequency as host decrementer.
- * So use the host decrementer calculations for decrementer emulation.
+ * Guest timebase ticks at the same frequency as host timebase.
+ * So use the host timebase calculations for decrementer emulation.
*/
- dec_time = dec_time << decrementer_clockevent.shift;
- do_div(dec_time, decrementer_clockevent.mult);
+ dec_time = tb_to_ns(dec_time);
dec_nsec = do_div(dec_time, NSEC_PER_SEC);
hrtimer_start(&vcpu->arch.dec_timer,
ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 36484a2..01b7f51 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -13,6 +13,7 @@
*/
#include <linux/memblock.h>
+#include <linux/mmu_context.h>
#include <asm/fixmap.h>
#include <asm/code-patching.h>
@@ -79,7 +80,7 @@ void __init MMU_init_hw(void)
for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
mtspr(SPRN_MD_CTR, ctr | (i << 8));
mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
- mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2);
+ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
addr += LARGE_PAGE_SIZE_8M;
mem -= LARGE_PAGE_SIZE_8M;
@@ -97,22 +98,13 @@ static void __init mmu_mapin_immr(void)
map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
}
-/* Address of instructions to patch */
-#ifndef CONFIG_PIN_TLB_IMMR
-extern unsigned int DTLBMiss_jmp;
-#endif
-extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
-#ifndef CONFIG_PIN_TLB_TEXT
-extern unsigned int ITLBMiss_cmp;
-#endif
-
-static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
{
- unsigned int instr = *addr;
+ unsigned int instr = *(unsigned int *)patch_site_addr(site);
instr &= 0xffff0000;
instr |= (unsigned long)__va(mapped) >> 16;
- patch_instruction(addr, instr);
+ patch_instruction_site(site, instr);
}
unsigned long __init mmu_mapin_ram(unsigned long top)
@@ -123,17 +115,17 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
mapped = 0;
mmu_mapin_immr();
#ifndef CONFIG_PIN_TLB_IMMR
- patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
+ patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
#endif
#ifndef CONFIG_PIN_TLB_TEXT
- mmu_patch_cmp_limit(&ITLBMiss_cmp, 0);
+ mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
#endif
} else {
mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
}
- mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped);
- mmu_patch_cmp_limit(&FixupDAR_cmp, mapped);
+ mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
+ mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped);
/* If the size of RAM is not an exact power of two, we may not
* have covered RAM in its entirety with 8 MiB
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 6c0020d..e38f74e 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -31,9 +31,6 @@
extern unsigned long itlb_miss_counter, dtlb_miss_counter;
extern atomic_t instruction_counter;
-extern unsigned int itlb_miss_perf, dtlb_miss_perf;
-extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
-extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
static atomic_t insn_ctr_ref;
static atomic_t itlb_miss_ref;
@@ -103,22 +100,22 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_inc_return(&itlb_miss_ref) == 1) {
- unsigned long target = (unsigned long)&itlb_miss_perf;
+ unsigned long target = patch_site_addr(&patch__itlbmiss_perf);
- patch_branch(&itlb_miss_exit_1, target, 0);
+ patch_branch_site(&patch__itlbmiss_exit_1, target, 0);
#ifndef CONFIG_PIN_TLB_TEXT
- patch_branch(&itlb_miss_exit_2, target, 0);
+ patch_branch_site(&patch__itlbmiss_exit_2, target, 0);
#endif
}
val = itlb_miss_counter;
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_inc_return(&dtlb_miss_ref) == 1) {
- unsigned long target = (unsigned long)&dtlb_miss_perf;
+ unsigned long target = patch_site_addr(&patch__dtlbmiss_perf);
- patch_branch(&dtlb_miss_exit_1, target, 0);
- patch_branch(&dtlb_miss_exit_2, target, 0);
- patch_branch(&dtlb_miss_exit_3, target, 0);
+ patch_branch_site(&patch__dtlbmiss_exit_1, target, 0);
+ patch_branch_site(&patch__dtlbmiss_exit_2, target, 0);
+ patch_branch_site(&patch__dtlbmiss_exit_3, target, 0);
}
val = dtlb_miss_counter;
break;
@@ -180,17 +177,17 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_dec_return(&itlb_miss_ref) == 0) {
- patch_instruction(&itlb_miss_exit_1, insn);
+ patch_instruction_site(&patch__itlbmiss_exit_1, insn);
#ifndef CONFIG_PIN_TLB_TEXT
- patch_instruction(&itlb_miss_exit_2, insn);
+ patch_instruction_site(&patch__itlbmiss_exit_2, insn);
#endif
}
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_dec_return(&dtlb_miss_ref) == 0) {
- patch_instruction(&dtlb_miss_exit_1, insn);
- patch_instruction(&dtlb_miss_exit_2, insn);
- patch_instruction(&dtlb_miss_exit_3, insn);
+ patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
+ patch_instruction_site(&patch__dtlbmiss_exit_2, insn);
+ patch_instruction_site(&patch__dtlbmiss_exit_3, insn);
}
break;
}
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index 2a9d662..5326ece 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -29,6 +29,7 @@
select 405EX
select PPC40x_SIMPLE
select PPC4xx_PCI_EXPRESS
+ select PCI
select PCI_MSI
select PPC4xx_MSI
help
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index f024efd..9a85d35 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -21,6 +21,7 @@
depends on 44x
select PPC44x_SIMPLE
select APM821xx
+ select PCI
select PCI_MSI
select PPC4xx_MSI
select PPC4xx_PCI_EXPRESS
@@ -200,6 +201,7 @@
select SWIOTLB
select 476FPE
select PPC4xx_PCI_EXPRESS
+ select PCI
select PCI_MSI
select PPC4xx_HSTA_MSI
select I2C
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 8bd590a..7944873 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -26,6 +26,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
#include <asm/lppaca.h>
#include <asm/hvcall.h>
#include <asm/firmware.h>
@@ -36,6 +37,7 @@
#include <asm/vio.h>
#include <asm/mmu.h>
#include <asm/machdep.h>
+#include <asm/drmem.h>
#include "pseries.h"
@@ -433,6 +435,16 @@ static void parse_em_data(struct seq_file *m)
seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
}
+static void maxmem_data(struct seq_file *m)
+{
+ unsigned long maxmem = 0;
+
+ maxmem += drmem_info->n_lmbs * drmem_info->lmb_size;
+ maxmem += hugetlb_total_pages() * PAGE_SIZE;
+
+ seq_printf(m, "MaxMem=%ld\n", maxmem);
+}
+
static int pseries_lparcfg_data(struct seq_file *m, void *v)
{
int partition_potential_processors;
@@ -491,6 +503,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
seq_printf(m, "slb_size=%d\n", mmu_slb_size);
#endif
parse_em_data(m);
+ maxmem_data(m);
return 0;
}
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 69e7fb4..878f9c1 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -11,6 +11,12 @@
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
+ifdef CONFIG_CC_IS_CLANG
+# clang stores addresses on the stack causing the frame size to blow
+# out. See https://github.com/ClangBuiltLinux/linux/issues/252
+KBUILD_CFLAGS += -Wframe-larger-than=4096
+endif
+
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y += xmon.o nonstdio.o spr_access.o
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 36473d7..07fa9ea 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -1,6 +1,3 @@
-CONFIG_SMP=y
-CONFIG_PCI=y
-CONFIG_PCIE_XILINX=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_IKCONFIG=y
@@ -11,10 +8,15 @@
CONFIG_CGROUP_BPF=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BPF_SYSCALL=y
+CONFIG_SMP=y
+CONFIG_PCI=y
+CONFIG_PCIE_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -59,6 +61,7 @@
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
CONFIG_VIRTIO_MMIO=y
+CONFIG_SIFIVE_PLIC=y
CONFIG_RAS=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
@@ -72,8 +75,5 @@
CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
-# CONFIG_RCU_TRACE is not set
CONFIG_CRYPTO_USER_API_HASH=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_SIFIVE_PLIC=y
+# CONFIG_RCU_TRACE is not set
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 67b3e6b..47c8713 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1849,16 +1849,12 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
{
u64 saved_fault_address = current_thread_info()->fault_address;
u8 saved_fault_code = get_thread_fault_code();
- mm_segment_t old_fs;
perf_callchain_store(entry, regs->tpc);
if (!current->mm)
return;
- old_fs = get_fs();
- set_fs(USER_DS);
-
flushw_user();
pagefault_disable();
@@ -1870,7 +1866,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
pagefault_enable();
- set_fs(old_fs);
set_thread_fault_code(saved_fault_code);
current_thread_info()->fault_address = saved_fault_address;
}
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index bb68c80..ff9389a 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -47,9 +47,9 @@
.word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
/*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
.word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
-/*140*/ .word sys_sendfile64, sys_nis_syscall, compat_sys_futex, sys_gettid, compat_sys_getrlimit
+/*140*/ .word sys_sendfile64, sys_getpeername, compat_sys_futex, sys_gettid, compat_sys_getrlimit
.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
-/*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+/*150*/ .word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
.word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
/*160*/ .word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
.word sys_quotactl, sys_set_tid_address, compat_sys_mount, compat_sys_ustat, sys_setxattr
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c51c989..ba7e346 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -129,6 +129,7 @@
select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
+ select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 708b46a..25e5a6b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with
#endif
+.macro STACKLEAK_ERASE_NOCLOBBER
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ PUSH_AND_CLEAR_REGS
+ call stackleak_erase
+ POP_REGS
+#endif
+.endm
+
#endif /* CONFIG_X86_64 */
+.macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ call stackleak_erase
+#endif
+.endm
+
/*
* This does 'call enter_from_user_mode' unless we can avoid it based on
* kernel config or using the static jump infrastructure.
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 687e47f..d309f30 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -46,6 +46,8 @@
#include <asm/frame.h>
#include <asm/nospec-branch.h>
+#include "calling.h"
+
.section .entry.text, "ax"
/*
@@ -712,6 +714,7 @@
/* When we fork, we trace the syscall return in the child, too. */
movl %esp, %eax
call syscall_return_slowpath
+ STACKLEAK_ERASE
jmp restore_all
/* kernel thread */
@@ -886,6 +889,8 @@
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+ STACKLEAK_ERASE
+
/* Opportunistic SYSEXIT */
TRACE_IRQS_ON /* User mode traces as IRQs on. */
@@ -997,6 +1002,8 @@
call do_int80_syscall_32
.Lsyscall_32_done:
+ STACKLEAK_ERASE
+
restore_all:
TRACE_IRQS_IRET
SWITCH_TO_ENTRY_STACK
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4d7a2d9..ce25d84 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -266,6 +266,8 @@
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/
+ STACKLEAK_ERASE_NOCLOBBER
+
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
popq %rdi
@@ -625,6 +627,7 @@
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/
+ STACKLEAK_ERASE_NOCLOBBER
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 7d0df78..8eaf895 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -261,6 +261,11 @@
/* Opportunistic SYSRET */
sysret32_from_system_call:
+ /*
+ * We are not going to return to userspace from the trampoline
+ * stack. So let's erase the thread stack right now.
+ */
+ STACKLEAK_ERASE
TRACE_IRQS_ON /* User mode traces as IRQs on. */
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 60c141a..d29b736 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -1,7 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-config ZONE_DMA
- def_bool y
-
config XTENSA
def_bool y
select ARCH_HAS_SG_CHAIN
diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile
index dc9e0ba..2948461 100644
--- a/arch/xtensa/boot/Makefile
+++ b/arch/xtensa/boot/Makefile
@@ -33,7 +33,7 @@
boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y))
$(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
-OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
+OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary
vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index b727b18..b80a430 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -131,6 +131,7 @@
.fixup : { *(.fixup) }
EXCEPTION_TABLE(16)
+ NOTES
/* Data section */
_sdata = .;
@@ -296,38 +297,11 @@
_end = .;
- .xt.lit : { *(.xt.lit) }
- .xt.prop : { *(.xt.prop) }
+ DWARF_DEBUG
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- .debug_info 0 : { *(.debug_info) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
-
- .xt.insn 0 :
- {
- *(.xt.insn)
- *(.gnu.linkonce.x*)
- }
-
- .xt.lit 0 :
- {
- *(.xt.lit)
- *(.gnu.linkonce.p*)
- }
+ .xt.prop 0 : { KEEP(*(.xt.prop .xt.prop.* .gnu.linkonce.prop.*)) }
+ .xt.insn 0 : { KEEP(*(.xt.insn .xt.insn.* .gnu.linkonce.x*)) }
+ .xt.lit 0 : { KEEP(*(.xt.lit .xt.lit.* .gnu.linkonce.p*)) }
/* Sections to be discarded */
DISCARDS
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 9750a48..30a48bb 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -71,7 +71,7 @@ void __init zones_init(void)
{
/* All pages are DMA-able, so we put them all in the DMA zone. */
unsigned long zones_size[MAX_NR_ZONES] = {
- [ZONE_DMA] = max_low_pfn - ARCH_PFN_OFFSET,
+ [ZONE_NORMAL] = max_low_pfn - ARCH_PFN_OFFSET,
#ifdef CONFIG_HIGHMEM
[ZONE_HIGHMEM] = max_pfn - max_low_pfn,
#endif
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d9a7916..9fe5952 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
uint64_t serial_nr;
rcu_read_lock();
- serial_nr = __bio_blkcg(bio)->css.serial_nr;
+ serial_nr = bio_blkcg(bio)->css.serial_nr;
/*
* Check whether blkcg has changed. The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
goto out;
- bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
/*
* Update blkg_path for bfq_log_* functions. We cache this
* path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6075100f..3a27d31f 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
rcu_read_lock();
- bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
+ bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
if (!bfqg) {
bfqq = &bfqd->oom_bfqq;
goto out;
diff --git a/block/bio.c b/block/bio.c
index bbfeb4e..d5368a4 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
- bio_clone_blkg_association(bio, bio_src);
-
- blkcg_bio_issue_init(bio);
+ bio_clone_blkcg_association(bio, bio_src);
}
EXPORT_SYMBOL(__bio_clone_fast);
@@ -1256,7 +1254,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
/*
* success
*/
- if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
+ if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
(map_data && map_data->from_user)) {
ret = bio_copy_from_iter(bio, iter);
if (ret)
@@ -1956,154 +1954,72 @@ EXPORT_SYMBOL(bioset_init_from_src);
#ifdef CONFIG_BLK_CGROUP
+#ifdef CONFIG_MEMCG
/**
- * bio_associate_blkg - associate a bio with the a blkg
+ * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
+ * @bio: target bio
+ * @page: the page to lookup the blkcg from
+ *
+ * Associate @bio with the blkcg from @page's owning memcg. This works like
+ * every other associate function wrt references.
+ */
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
+{
+ struct cgroup_subsys_state *blkcg_css;
+
+ if (unlikely(bio->bi_css))
+ return -EBUSY;
+ if (!page->mem_cgroup)
+ return 0;
+ blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
+ &io_cgrp_subsys);
+ bio->bi_css = blkcg_css;
+ return 0;
+}
+#endif /* CONFIG_MEMCG */
+
+/**
+ * bio_associate_blkcg - associate a bio with the specified blkcg
+ * @bio: target bio
+ * @blkcg_css: css of the blkcg to associate
+ *
+ * Associate @bio with the blkcg specified by @blkcg_css. Block layer will
+ * treat @bio as if it were issued by a task which belongs to the blkcg.
+ *
+ * This function takes an extra reference of @blkcg_css which will be put
+ * when @bio is released. The caller must own @bio and is responsible for
+ * synchronizing calls to this function.
+ */
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
+{
+ if (unlikely(bio->bi_css))
+ return -EBUSY;
+ css_get(blkcg_css);
+ bio->bi_css = blkcg_css;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bio_associate_blkcg);
+
+/**
+ * bio_associate_blkg - associate a bio with the specified blkg
* @bio: target bio
* @blkg: the blkg to associate
*
- * This tries to associate @bio with the specified blkg. Association failure
- * is handled by walking up the blkg tree. Therefore, the blkg associated can
- * be anything between @blkg and the root_blkg. This situation only happens
- * when a cgroup is dying and then the remaining bios will spill to the closest
- * alive blkg.
- *
- * A reference will be taken on the @blkg and will be released when @bio is
- * freed.
+ * Associate @bio with the blkg specified by @blkg. This is the queue specific
+ * blkcg information associated with the @bio, a reference will be taken on the
+ * @blkg and will be freed when the bio is freed.
*/
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
{
if (unlikely(bio->bi_blkg))
return -EBUSY;
- bio->bi_blkg = blkg_tryget_closest(blkg);
+ if (!blkg_try_get(blkg))
+ return -ENODEV;
+ bio->bi_blkg = blkg;
return 0;
}
/**
- * __bio_associate_blkg_from_css - internal blkg association function
- *
- * This in the core association function that all association paths rely on.
- * A blkg reference is taken which is released upon freeing of the bio.
- */
-static int __bio_associate_blkg_from_css(struct bio *bio,
- struct cgroup_subsys_state *css)
-{
- struct request_queue *q = bio->bi_disk->queue;
- struct blkcg_gq *blkg;
- int ret;
-
- rcu_read_lock();
-
- if (!css || !css->parent)
- blkg = q->root_blkg;
- else
- blkg = blkg_lookup_create(css_to_blkcg(css), q);
-
- ret = bio_associate_blkg(bio, blkg);
-
- rcu_read_unlock();
- return ret;
-}
-
-/**
- * bio_associate_blkg_from_css - associate a bio with a specified css
- * @bio: target bio
- * @css: target css
- *
- * Associate @bio with the blkg found by combining the css's blkg and the
- * request_queue of the @bio. This falls back to the queue's root_blkg if
- * the association fails with the css.
- */
-int bio_associate_blkg_from_css(struct bio *bio,
- struct cgroup_subsys_state *css)
-{
- if (unlikely(bio->bi_blkg))
- return -EBUSY;
- return __bio_associate_blkg_from_css(bio, css);
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
-
-#ifdef CONFIG_MEMCG
-/**
- * bio_associate_blkg_from_page - associate a bio with the page's blkg
- * @bio: target bio
- * @page: the page to lookup the blkcg from
- *
- * Associate @bio with the blkg from @page's owning memcg and the respective
- * request_queue. If cgroup_e_css returns NULL, fall back to the queue's
- * root_blkg.
- *
- * Note: this must be called after bio has an associated device.
- */
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
-{
- struct cgroup_subsys_state *css;
- int ret;
-
- if (unlikely(bio->bi_blkg))
- return -EBUSY;
- if (!page->mem_cgroup)
- return 0;
-
- rcu_read_lock();
-
- css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
-
- ret = __bio_associate_blkg_from_css(bio, css);
-
- rcu_read_unlock();
- return ret;
-}
-#endif /* CONFIG_MEMCG */
-
-/**
- * bio_associate_create_blkg - associate a bio with a blkg from q
- * @q: request_queue where bio is going
- * @bio: target bio
- *
- * Associate @bio with the blkg found from the bio's css and the request_queue.
- * If one is not found, bio_lookup_blkg creates the blkg. This falls back to
- * the queue's root_blkg if association fails.
- */
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
-{
- struct cgroup_subsys_state *css;
- int ret = 0;
-
- /* someone has already associated this bio with a blkg */
- if (bio->bi_blkg)
- return ret;
-
- rcu_read_lock();
-
- css = blkcg_css();
-
- ret = __bio_associate_blkg_from_css(bio, css);
-
- rcu_read_unlock();
- return ret;
-}
-
-/**
- * bio_reassociate_blkg - reassociate a bio with a blkg from q
- * @q: request_queue where bio is going
- * @bio: target bio
- *
- * When submitting a bio, multiple recursive calls to make_request() may occur.
- * This causes the initial associate done in blkcg_bio_issue_check() to be
- * incorrect and reference the prior request_queue. This performs reassociation
- * when this situation happens.
- */
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
-{
- if (bio->bi_blkg) {
- blkg_put(bio->bi_blkg);
- bio->bi_blkg = NULL;
- }
-
- return bio_associate_create_blkg(q, bio);
-}
-
-/**
* bio_disassociate_task - undo bio_associate_current()
* @bio: target bio
*/
@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
put_io_context(bio->bi_ioc);
bio->bi_ioc = NULL;
}
+ if (bio->bi_css) {
+ css_put(bio->bi_css);
+ bio->bi_css = NULL;
+ }
if (bio->bi_blkg) {
blkg_put(bio->bi_blkg);
bio->bi_blkg = NULL;
@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
}
/**
- * bio_clone_blkg_association - clone blkg association from src to dst bio
+ * bio_clone_blkcg_association - clone blkcg association from src to dst bio
* @dst: destination bio
* @src: source bio
*/
-void bio_clone_blkg_association(struct bio *dst, struct bio *src)
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
{
- if (src->bi_blkg)
- bio_associate_blkg(dst, src->bi_blkg);
+ if (src->bi_css)
+ WARN_ON(bio_associate_blkcg(dst, src->bi_css));
}
-EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
+EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
#endif /* CONFIG_BLK_CGROUP */
static void __init biovec_init_slabs(void)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 992da55..c630e02 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
kfree(blkg);
}
-static void __blkg_release(struct rcu_head *rcu)
-{
- struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
-
- percpu_ref_exit(&blkg->refcnt);
-
- /* release the blkcg and parent blkg refs this blkg has been holding */
- css_put(&blkg->blkcg->css);
- if (blkg->parent)
- blkg_put(blkg->parent);
-
- wb_congested_put(blkg->wb_congested);
-
- blkg_free(blkg);
-}
-
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid. For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-static void blkg_release(struct percpu_ref *ref)
-{
- struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
-
- call_rcu(&blkg->rcu_head, __blkg_release);
-}
-
/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
+ atomic_set(&blkg->refcnt, 1);
/* root blkg uses @q->root_rl, init rl only for !root blkgs */
if (blkcg != &blkcg_root) {
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_get(blkg->parent);
}
- ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
- GFP_NOWAIT | __GFP_NOWARN);
- if (ret)
- goto err_cancel_ref;
-
/* invoke per-policy init */
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_put(blkg);
return ERR_PTR(ret);
-err_cancel_ref:
- percpu_ref_exit(&blkg->refcnt);
err_put_congested:
wb_congested_put(wb_congested);
err_put_css:
@@ -296,7 +259,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
}
/**
- * __blkg_lookup_create - lookup blkg, try to create one if not there
+ * blkg_lookup_create - lookup blkg, try to create one if not there
* @blkcg: blkcg of interest
* @q: request_queue of interest
*
@@ -305,11 +268,12 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
* that all non-root blkg's have access to the parent blkg. This function
* should be called under RCU read lock and @q->queue_lock.
*
- * Returns the blkg or the closest blkg if blkg_create fails as it walks
- * down from root.
+ * Returns pointer to the looked up or created blkg on success, ERR_PTR()
+ * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
+ * dead and bypassing, returns ERR_PTR(-EBUSY).
*/
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q)
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+ struct request_queue *q)
{
struct blkcg_gq *blkg;
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
* we shouldn't allow anything to go through for a bypassing queue.
*/
if (unlikely(blk_queue_bypass(q)))
- return q->root_blkg;
+ return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
blkg = __blkg_lookup(blkcg, q, true);
if (blkg)
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
/*
* Create blkgs walking down from blkcg_root to @blkcg, so that all
- * non-root blkgs have access to their parents. Returns the closest
- * blkg to the intended blkg should blkg_create() fail.
+ * non-root blkgs have access to their parents.
*/
while (true) {
struct blkcg *pos = blkcg;
struct blkcg *parent = blkcg_parent(blkcg);
- struct blkcg_gq *ret_blkg = q->root_blkg;
- while (parent) {
- blkg = __blkg_lookup(parent, q, false);
- if (blkg) {
- /* remember closest blkg */
- ret_blkg = blkg;
- break;
- }
+ while (parent && !__blkg_lookup(parent, q, false)) {
pos = parent;
parent = blkcg_parent(parent);
}
blkg = blkg_create(pos, q, NULL);
- if (IS_ERR(blkg))
- return ret_blkg;
- if (pos == blkcg)
+ if (pos == blkcg || IS_ERR(blkg))
return blkg;
}
}
-/**
- * blkg_lookup_create - find or create a blkg
- * @blkcg: target block cgroup
- * @q: target request_queue
- *
- * This looks up or creates the blkg representing the unique pair
- * of the blkcg and the request_queue.
- */
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q)
-{
- struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
- unsigned long flags;
-
- if (unlikely(!blkg)) {
- spin_lock_irqsave(q->queue_lock, flags);
-
- blkg = __blkg_lookup_create(blkcg, q);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
-
- return blkg;
-}
-
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
- percpu_ref_kill(&blkg->refcnt);
+ blkg_put(blkg);
}
/**
@@ -452,6 +381,29 @@ static void blkg_destroy_all(struct request_queue *q)
}
/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid. For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+void __blkg_release_rcu(struct rcu_head *rcu_head)
+{
+ struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
+
+ /* release the blkcg and parent blkg refs this blkg has been holding */
+ css_put(&blkg->blkcg->css);
+ if (blkg->parent)
+ blkg_put(blkg->parent);
+
+ wb_congested_put(blkg->wb_congested);
+
+ blkg_free(blkg);
+}
+EXPORT_SYMBOL_GPL(__blkg_release_rcu);
+
+/*
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
* because the root blkg uses @q->root_rl instead of its own rl.
*/
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
blkg = blkg_lookup(blkcg, q);
if (!blkg)
goto out;
- if (!blkg_tryget(blkg))
+ blkg = blkg_try_get(blkg);
+ if (!blkg)
goto out;
rcu_read_unlock();
diff --git a/block/blk-core.c b/block/blk-core.c
index bc6ea87..ce12515f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q)
* prevent that q->request_fn() gets invoked after draining finished.
*/
blk_freeze_queue(q);
+
+ rq_qos_exit(q);
+
spin_lock_irq(lock);
queue_flag_set(QUEUE_FLAG_DEAD, q);
spin_unlock_irq(lock);
@@ -2432,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
if (q)
blk_queue_exit(q);
q = bio->bi_disk->queue;
- bio_reassociate_blkg(q, bio);
flags = 0;
if (bio->bi_opf & REQ_NOWAIT)
flags = BLK_MQ_REQ_NOWAIT;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 28f80d2..38c35c3 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -482,12 +482,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
spinlock_t *lock)
{
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
- struct blkcg_gq *blkg = bio->bi_blkg;
+ struct blkcg *blkcg;
+ struct blkcg_gq *blkg;
+ struct request_queue *q = rqos->q;
bool issue_as_root = bio_issue_as_root_blkg(bio);
if (!blk_iolatency_enabled(blkiolat))
return;
+ rcu_read_lock();
+ blkcg = bio_blkcg(bio);
+ bio_associate_blkcg(bio, &blkcg->css);
+ blkg = blkg_lookup(blkcg, q);
+ if (unlikely(!blkg)) {
+ if (!lock)
+ spin_lock_irq(q->queue_lock);
+ blkg = blkg_lookup_create(blkcg, q);
+ if (IS_ERR(blkg))
+ blkg = NULL;
+ if (!lock)
+ spin_unlock_irq(q->queue_lock);
+ }
+ if (!blkg)
+ goto out;
+
+ bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+ bio_associate_blkg(bio, blkg);
+out:
+ rcu_read_unlock();
while (blkg && blkg->parent) {
struct iolatency_grp *iolat = blkg_to_lat(blkg);
if (!iolat) {
@@ -708,7 +730,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
* We could be exiting, don't access the pd unless we have a
* ref on the blkg.
*/
- if (!blkg_tryget(blkg))
+ if (!blkg_try_get(blkg))
continue;
iolat = blkg_to_lat(blkg);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 42a4674..6b5ad27 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -714,6 +714,31 @@ static void blk_account_io_merge(struct request *req)
part_stat_unlock();
}
}
+/*
+ * Two cases of handling DISCARD merge:
+ * If max_discard_segments > 1, the driver takes every bio
+ * as a range and send them to controller together. The ranges
+ * needn't to be contiguous.
+ * Otherwise, the bios/requests will be handled as same as
+ * others which should be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+ if (req_op(req) == REQ_OP_DISCARD &&
+ queue_max_discard_segments(req->q) > 1)
+ return true;
+ return false;
+}
+
+enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
+{
+ if (blk_discard_mergable(req))
+ return ELEVATOR_DISCARD_MERGE;
+ else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
+ return ELEVATOR_BACK_MERGE;
+
+ return ELEVATOR_NO_MERGE;
+}
/*
* For non-mq, this has to be called with the request spinlock acquired.
@@ -731,12 +756,6 @@ static struct request *attempt_merge(struct request_queue *q,
if (req_op(req) != req_op(next))
return NULL;
- /*
- * not contiguous
- */
- if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
- return NULL;
-
if (rq_data_dir(req) != rq_data_dir(next)
|| req->rq_disk != next->rq_disk
|| req_no_special_merge(next))
@@ -760,11 +779,19 @@ static struct request *attempt_merge(struct request_queue *q,
* counts here. Handle DISCARDs separately, as they
* have separate settings.
*/
- if (req_op(req) == REQ_OP_DISCARD) {
+
+ switch (blk_try_req_merge(req, next)) {
+ case ELEVATOR_DISCARD_MERGE:
if (!req_attempt_discard_merge(q, req, next))
return NULL;
- } else if (!ll_merge_requests_fn(q, req, next))
+ break;
+ case ELEVATOR_BACK_MERGE:
+ if (!ll_merge_requests_fn(q, req, next))
+ return NULL;
+ break;
+ default:
return NULL;
+ }
/*
* If failfast settings disagree or any of the two is already
@@ -888,8 +915,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
{
- if (req_op(rq) == REQ_OP_DISCARD &&
- queue_max_discard_segments(rq->q) > 1)
+ if (blk_discard_mergable(rq))
return ELEVATOR_DISCARD_MERGE;
else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
return ELEVATOR_BACK_MERGE;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 0641533..844a454 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk)
kobject_del(&q->kobj);
blk_trace_remove_sysfs(disk_to_dev(disk));
- rq_qos_exit(q);
-
mutex_lock(&q->sysfs_lock);
if (q->request_fn || (q->mq_ops && q->elevator))
elv_unregister_queue(q);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4bda70e..db1a3a2 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
}
#endif
+static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
+{
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+ /* fallback to root_blkg if we fail to get a blkg ref */
+ if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
+ bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
+ bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+#endif
+}
+
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
struct bio *bio)
{
struct throtl_qnode *qn = NULL;
- struct throtl_grp *tg = blkg_to_tg(blkg);
+ struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
struct throtl_service_queue *sq;
bool rw = bio_data_dir(bio);
bool throttled = false;
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
if (unlikely(blk_queue_bypass(q)))
goto out_unlock;
+ blk_throtl_assoc_bio(tg, bio);
blk_throtl_update_idletime(tg);
sq = &tg->service_queue;
diff --git a/block/bounce.c b/block/bounce.c
index cf49fe0..36869af 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
}
}
- bio_clone_blkg_association(bio, bio_src);
-
- blkcg_bio_issue_init(bio);
+ bio_clone_blkcg_association(bio, bio_src);
return bio;
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a3d87d..ed41aa9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
uint64_t serial_nr;
rcu_read_lock();
- serial_nr = __bio_blkcg(bio)->css.serial_nr;
+ serial_nr = bio_blkcg(bio)->css.serial_nr;
rcu_read_unlock();
/*
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
struct cfq_group *cfqg;
rcu_read_lock();
- cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
+ cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
if (!cfqg) {
cfqq = &cfqd->oom_cfqq;
goto out;
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index f3702e5..be70ca6 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -21,6 +21,18 @@
appropriate hash algorithms (such as SHA-1) must be available.
ENOPKG will be reported if the requisite algorithm is unavailable.
+config ASYMMETRIC_TPM_KEY_SUBTYPE
+ tristate "Asymmetric TPM backed private key subtype"
+ depends on TCG_TPM
+ depends on TRUSTED_KEYS
+ select CRYPTO_HMAC
+ select CRYPTO_SHA1
+ select CRYPTO_HASH_INFO
+ help
+ This option provides support for TPM backed private key type handling.
+ Operations such as sign, verify, encrypt, decrypt are performed by
+ the TPM after the private key is loaded.
+
config X509_CERTIFICATE_PARSER
tristate "X.509 certificate parser"
depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
@@ -31,6 +43,25 @@
data and provides the ability to instantiate a crypto key from a
public key packet found inside the certificate.
+config PKCS8_PRIVATE_KEY_PARSER
+ tristate "PKCS#8 private key parser"
+ depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
+ select ASN1
+ select OID_REGISTRY
+ help
+ This option provides support for parsing PKCS#8 format blobs for
+ private key data and provides the ability to instantiate a crypto key
+ from that data.
+
+config TPM_KEY_PARSER
+ tristate "TPM private key parser"
+ depends on ASYMMETRIC_TPM_KEY_SUBTYPE
+ select ASN1
+ help
+ This option provides support for parsing TPM format blobs for
+ private key data and provides the ability to instantiate a crypto key
+ from that data.
+
config PKCS7_MESSAGE_PARSER
tristate "PKCS#7 message parser"
depends on X509_CERTIFICATE_PARSER
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index d4b2e1b..28b91ad 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -11,6 +11,7 @@
signature.o
obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o
+obj-$(CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE) += asym_tpm.o
#
# X.509 Certificate handling
@@ -30,6 +31,19 @@
$(obj)/x509_akid.asn1.o: $(obj)/x509_akid.asn1.c $(obj)/x509_akid.asn1.h
#
+# PKCS#8 private key handling
+#
+obj-$(CONFIG_PKCS8_PRIVATE_KEY_PARSER) += pkcs8_key_parser.o
+pkcs8_key_parser-y := \
+ pkcs8.asn1.o \
+ pkcs8_parser.o
+
+$(obj)/pkcs8_parser.o: $(obj)/pkcs8.asn1.h
+$(obj)/pkcs8-asn1.o: $(obj)/pkcs8.asn1.c $(obj)/pkcs8.asn1.h
+
+clean-files += pkcs8.asn1.c pkcs8.asn1.h
+
+#
# PKCS#7 message handling
#
obj-$(CONFIG_PKCS7_MESSAGE_PARSER) += pkcs7_message.o
@@ -61,3 +75,14 @@
$(obj)/mscode_parser.o: $(obj)/mscode.asn1.h $(obj)/mscode.asn1.h
$(obj)/mscode.asn1.o: $(obj)/mscode.asn1.c $(obj)/mscode.asn1.h
+
+#
+# TPM private key parsing
+#
+obj-$(CONFIG_TPM_KEY_PARSER) += tpm_key_parser.o
+tpm_key_parser-y := \
+ tpm.asn1.o \
+ tpm_parser.o
+
+$(obj)/tpm_parser.o: $(obj)/tpm.asn1.h
+$(obj)/tpm.asn1.o: $(obj)/tpm.asn1.c $(obj)/tpm.asn1.h
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
new file mode 100644
index 0000000..5d4c2704
--- /dev/null
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -0,0 +1,988 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "ASYM-TPM: "fmt
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/scatterlist.h>
+#include <linux/tpm.h>
+#include <linux/tpm_command.h>
+#include <crypto/akcipher.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <asm/unaligned.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/trusted.h>
+#include <crypto/asym_tpm_subtype.h>
+#include <crypto/public_key.h>
+
+#define TPM_ORD_FLUSHSPECIFIC 186
+#define TPM_ORD_LOADKEY2 65
+#define TPM_ORD_UNBIND 30
+#define TPM_ORD_SIGN 60
+#define TPM_LOADKEY2_SIZE 59
+#define TPM_FLUSHSPECIFIC_SIZE 18
+#define TPM_UNBIND_SIZE 63
+#define TPM_SIGN_SIZE 63
+
+#define TPM_RT_KEY 0x00000001
+
+/*
+ * Load a TPM key from the blob provided by userspace
+ */
+static int tpm_loadkey2(struct tpm_buf *tb,
+ uint32_t keyhandle, unsigned char *keyauth,
+ const unsigned char *keyblob, int keybloblen,
+ uint32_t *newhandle)
+{
+ unsigned char nonceodd[TPM_NONCE_SIZE];
+ unsigned char enonce[TPM_NONCE_SIZE];
+ unsigned char authdata[SHA1_DIGEST_SIZE];
+ uint32_t authhandle = 0;
+ unsigned char cont = 0;
+ uint32_t ordinal;
+ int ret;
+
+ ordinal = htonl(TPM_ORD_LOADKEY2);
+
+ /* session for loading the key */
+ ret = oiap(tb, &authhandle, enonce);
+ if (ret < 0) {
+ pr_info("oiap failed (%d)\n", ret);
+ return ret;
+ }
+
+ /* generate odd nonce */
+ ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+ if (ret < 0) {
+ pr_info("tpm_get_random failed (%d)\n", ret);
+ return ret;
+ }
+
+ /* calculate authorization HMAC value */
+ ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+ nonceodd, cont, sizeof(uint32_t), &ordinal,
+ keybloblen, keyblob, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ /* build the request buffer */
+ INIT_BUF(tb);
+ store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+ store32(tb, TPM_LOADKEY2_SIZE + keybloblen);
+ store32(tb, TPM_ORD_LOADKEY2);
+ store32(tb, keyhandle);
+ storebytes(tb, keyblob, keybloblen);
+ store32(tb, authhandle);
+ storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+ store8(tb, cont);
+ storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+ ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+ if (ret < 0) {
+ pr_info("authhmac failed (%d)\n", ret);
+ return ret;
+ }
+
+ ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, keyauth,
+ SHA1_DIGEST_SIZE, 0, 0);
+ if (ret < 0) {
+ pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+ return ret;
+ }
+
+ *newhandle = LOAD32(tb->data, TPM_DATA_OFFSET);
+ return 0;
+}
+
+/*
+ * Execute the FlushSpecific TPM command
+ */
+static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
+{
+ INIT_BUF(tb);
+ store16(tb, TPM_TAG_RQU_COMMAND);
+ store32(tb, TPM_FLUSHSPECIFIC_SIZE);
+ store32(tb, TPM_ORD_FLUSHSPECIFIC);
+ store32(tb, handle);
+ store32(tb, TPM_RT_KEY);
+
+ return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+}
+
+/*
+ * Decrypt a blob provided by userspace using a specific key handle.
+ * The handle is a well known handle or previously loaded by e.g. LoadKey2
+ */
+static int tpm_unbind(struct tpm_buf *tb,
+ uint32_t keyhandle, unsigned char *keyauth,
+ const unsigned char *blob, uint32_t bloblen,
+ void *out, uint32_t outlen)
+{
+ unsigned char nonceodd[TPM_NONCE_SIZE];
+ unsigned char enonce[TPM_NONCE_SIZE];
+ unsigned char authdata[SHA1_DIGEST_SIZE];
+ uint32_t authhandle = 0;
+ unsigned char cont = 0;
+ uint32_t ordinal;
+ uint32_t datalen;
+ int ret;
+
+ ordinal = htonl(TPM_ORD_UNBIND);
+ datalen = htonl(bloblen);
+
+ /* session for loading the key */
+ ret = oiap(tb, &authhandle, enonce);
+ if (ret < 0) {
+ pr_info("oiap failed (%d)\n", ret);
+ return ret;
+ }
+
+ /* generate odd nonce */
+ ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+ if (ret < 0) {
+ pr_info("tpm_get_random failed (%d)\n", ret);
+ return ret;
+ }
+
+ /* calculate authorization HMAC value */
+ ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+ nonceodd, cont, sizeof(uint32_t), &ordinal,
+ sizeof(uint32_t), &datalen,
+ bloblen, blob, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ /* build the request buffer */
+ INIT_BUF(tb);
+ store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+ store32(tb, TPM_UNBIND_SIZE + bloblen);
+ store32(tb, TPM_ORD_UNBIND);
+ store32(tb, keyhandle);
+ store32(tb, bloblen);
+ storebytes(tb, blob, bloblen);
+ store32(tb, authhandle);
+ storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+ store8(tb, cont);
+ storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+ ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+ if (ret < 0) {
+ pr_info("authhmac failed (%d)\n", ret);
+ return ret;
+ }
+
+ datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
+
+ ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
+ keyauth, SHA1_DIGEST_SIZE,
+ sizeof(uint32_t), TPM_DATA_OFFSET,
+ datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
+ 0, 0);
+ if (ret < 0) {
+ pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+ return ret;
+ }
+
+ memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
+ min(outlen, datalen));
+
+ return datalen;
+}
+
+/*
+ * Sign a blob provided by userspace (that has had the hash function applied)
+ * using a specific key handle. The handle is assumed to have been previously
+ * loaded by e.g. LoadKey2.
+ *
+ * Note that the key signature scheme of the used key should be set to
+ * TPM_SS_RSASSAPKCS1v15_DER. This allows the hashed input to be of any size
+ * up to key_length_in_bytes - 11 and not be limited to size 20 like the
+ * TPM_SS_RSASSAPKCS1v15_SHA1 signature scheme.
+ */
+static int tpm_sign(struct tpm_buf *tb,
+ uint32_t keyhandle, unsigned char *keyauth,
+ const unsigned char *blob, uint32_t bloblen,
+ void *out, uint32_t outlen)
+{
+ unsigned char nonceodd[TPM_NONCE_SIZE];
+ unsigned char enonce[TPM_NONCE_SIZE];
+ unsigned char authdata[SHA1_DIGEST_SIZE];
+ uint32_t authhandle = 0;
+ unsigned char cont = 0;
+ uint32_t ordinal;
+ uint32_t datalen;
+ int ret;
+
+ ordinal = htonl(TPM_ORD_SIGN);
+ datalen = htonl(bloblen);
+
+ /* session for loading the key */
+ ret = oiap(tb, &authhandle, enonce);
+ if (ret < 0) {
+ pr_info("oiap failed (%d)\n", ret);
+ return ret;
+ }
+
+ /* generate odd nonce */
+ ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+ if (ret < 0) {
+ pr_info("tpm_get_random failed (%d)\n", ret);
+ return ret;
+ }
+
+ /* calculate authorization HMAC value */
+ ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+ nonceodd, cont, sizeof(uint32_t), &ordinal,
+ sizeof(uint32_t), &datalen,
+ bloblen, blob, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ /* build the request buffer */
+ INIT_BUF(tb);
+ store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+ store32(tb, TPM_SIGN_SIZE + bloblen);
+ store32(tb, TPM_ORD_SIGN);
+ store32(tb, keyhandle);
+ store32(tb, bloblen);
+ storebytes(tb, blob, bloblen);
+ store32(tb, authhandle);
+ storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+ store8(tb, cont);
+ storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+ ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+ if (ret < 0) {
+ pr_info("authhmac failed (%d)\n", ret);
+ return ret;
+ }
+
+ datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
+
+ ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
+ keyauth, SHA1_DIGEST_SIZE,
+ sizeof(uint32_t), TPM_DATA_OFFSET,
+ datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
+ 0, 0);
+ if (ret < 0) {
+ pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+ return ret;
+ }
+
+ memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
+ min(datalen, outlen));
+
+ return datalen;
+}
+/*
+ * Maximum buffer size for the BER/DER encoded public key. The public key
+ * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
+ * bit key and e is usually 65537
+ * The encoding overhead is:
+ * - max 4 bytes for SEQUENCE
+ * - max 4 bytes for INTEGER n type/length
+ * - 257 bytes of n
+ * - max 2 bytes for INTEGER e type/length
+ * - 3 bytes of e
+ */
+#define PUB_KEY_BUF_SIZE (4 + 4 + 257 + 2 + 3)
+
+/*
+ * Provide a part of a description of the key for /proc/keys.
+ */
+static void asym_tpm_describe(const struct key *asymmetric_key,
+ struct seq_file *m)
+{
+ struct tpm_key *tk = asymmetric_key->payload.data[asym_crypto];
+
+ if (!tk)
+ return;
+
+ seq_printf(m, "TPM1.2/Blob");
+}
+
+static void asym_tpm_destroy(void *payload0, void *payload3)
+{
+ struct tpm_key *tk = payload0;
+
+ if (!tk)
+ return;
+
+ kfree(tk->blob);
+ tk->blob_len = 0;
+
+ kfree(tk);
+}
+
+/* How many bytes will it take to encode the length */
+static inline uint32_t definite_length(uint32_t len)
+{
+ if (len <= 127)
+ return 1;
+ if (len <= 255)
+ return 2;
+ return 3;
+}
+
+static inline uint8_t *encode_tag_length(uint8_t *buf, uint8_t tag,
+ uint32_t len)
+{
+ *buf++ = tag;
+
+ if (len <= 127) {
+ buf[0] = len;
+ return buf + 1;
+ }
+
+ if (len <= 255) {
+ buf[0] = 0x81;
+ buf[1] = len;
+ return buf + 2;
+ }
+
+ buf[0] = 0x82;
+ put_unaligned_be16(len, buf + 1);
+ return buf + 3;
+}
+
+static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
+{
+ uint8_t *cur = buf;
+ uint32_t n_len = definite_length(len) + 1 + len + 1;
+ uint32_t e_len = definite_length(3) + 1 + 3;
+ uint8_t e[3] = { 0x01, 0x00, 0x01 };
+
+ /* SEQUENCE */
+ cur = encode_tag_length(cur, 0x30, n_len + e_len);
+ /* INTEGER n */
+ cur = encode_tag_length(cur, 0x02, len + 1);
+ cur[0] = 0x00;
+ memcpy(cur + 1, pub_key, len);
+ cur += len + 1;
+ cur = encode_tag_length(cur, 0x02, sizeof(e));
+ memcpy(cur, e, sizeof(e));
+ cur += sizeof(e);
+
+ return cur - buf;
+}
+
+/*
+ * Determine the crypto algorithm name.
+ */
+static int determine_akcipher(const char *encoding, const char *hash_algo,
+ char alg_name[CRYPTO_MAX_ALG_NAME])
+{
+ if (strcmp(encoding, "pkcs1") == 0) {
+ if (!hash_algo) {
+ strcpy(alg_name, "pkcs1pad(rsa)");
+ return 0;
+ }
+
+ if (snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(rsa,%s)",
+ hash_algo) >= CRYPTO_MAX_ALG_NAME)
+ return -EINVAL;
+
+ return 0;
+ }
+
+ if (strcmp(encoding, "raw") == 0) {
+ strcpy(alg_name, "rsa");
+ return 0;
+ }
+
+ return -ENOPKG;
+}
+
+/*
+ * Query information about a key.
+ */
+static int tpm_key_query(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info)
+{
+ struct tpm_key *tk = params->key->payload.data[asym_crypto];
+ int ret;
+ char alg_name[CRYPTO_MAX_ALG_NAME];
+ struct crypto_akcipher *tfm;
+ uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+ uint32_t der_pub_key_len;
+ int len;
+
+ /* TPM only works on private keys, public keys still done in software */
+ ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
+ if (ret < 0)
+ return ret;
+
+ tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+ der_pub_key);
+
+ ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+ if (ret < 0)
+ goto error_free_tfm;
+
+ len = crypto_akcipher_maxsize(tfm);
+
+ info->key_size = tk->key_len;
+ info->max_data_size = tk->key_len / 8;
+ info->max_sig_size = len;
+ info->max_enc_size = len;
+ info->max_dec_size = tk->key_len / 8;
+
+ info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
+ KEYCTL_SUPPORTS_DECRYPT |
+ KEYCTL_SUPPORTS_VERIFY |
+ KEYCTL_SUPPORTS_SIGN;
+
+ ret = 0;
+error_free_tfm:
+ crypto_free_akcipher(tfm);
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ return ret;
+}
+
+/*
+ * Encryption operation is performed with the public key. Hence it is done
+ * in software
+ */
+static int tpm_key_encrypt(struct tpm_key *tk,
+ struct kernel_pkey_params *params,
+ const void *in, void *out)
+{
+ char alg_name[CRYPTO_MAX_ALG_NAME];
+ struct crypto_akcipher *tfm;
+ struct akcipher_request *req;
+ struct crypto_wait cwait;
+ struct scatterlist in_sg, out_sg;
+ uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+ uint32_t der_pub_key_len;
+ int ret;
+
+ pr_devel("==>%s()\n", __func__);
+
+ ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
+ if (ret < 0)
+ return ret;
+
+ tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+ der_pub_key);
+
+ ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+ if (ret < 0)
+ goto error_free_tfm;
+
+ req = akcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req)
+ goto error_free_tfm;
+
+ sg_init_one(&in_sg, in, params->in_len);
+ sg_init_one(&out_sg, out, params->out_len);
+ akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
+ params->out_len);
+ crypto_init_wait(&cwait);
+ akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+ CRYPTO_TFM_REQ_MAY_SLEEP,
+ crypto_req_done, &cwait);
+
+ ret = crypto_akcipher_encrypt(req);
+ ret = crypto_wait_req(ret, &cwait);
+
+ if (ret == 0)
+ ret = req->dst_len;
+
+ akcipher_request_free(req);
+error_free_tfm:
+ crypto_free_akcipher(tfm);
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ return ret;
+}
+
+/*
+ * Decryption operation is performed with the private key in the TPM.
+ */
+static int tpm_key_decrypt(struct tpm_key *tk,
+ struct kernel_pkey_params *params,
+ const void *in, void *out)
+{
+ struct tpm_buf *tb;
+ uint32_t keyhandle;
+ uint8_t srkauth[SHA1_DIGEST_SIZE];
+ uint8_t keyauth[SHA1_DIGEST_SIZE];
+ int r;
+
+ pr_devel("==>%s()\n", __func__);
+
+ if (params->hash_algo)
+ return -ENOPKG;
+
+ if (strcmp(params->encoding, "pkcs1"))
+ return -ENOPKG;
+
+ tb = kzalloc(sizeof(*tb), GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
+
+ /* TODO: Handle a non-all zero SRK authorization */
+ memset(srkauth, 0, sizeof(srkauth));
+
+ r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+ tk->blob, tk->blob_len, &keyhandle);
+ if (r < 0) {
+ pr_devel("loadkey2 failed (%d)\n", r);
+ goto error;
+ }
+
+ /* TODO: Handle a non-all zero key authorization */
+ memset(keyauth, 0, sizeof(keyauth));
+
+ r = tpm_unbind(tb, keyhandle, keyauth,
+ in, params->in_len, out, params->out_len);
+ if (r < 0)
+ pr_devel("tpm_unbind failed (%d)\n", r);
+
+ if (tpm_flushspecific(tb, keyhandle) < 0)
+ pr_devel("flushspecific failed (%d)\n", r);
+
+error:
+ kzfree(tb);
+ pr_devel("<==%s() = %d\n", __func__, r);
+ return r;
+}
+
+/*
+ * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2].
+ */
+static const u8 digest_info_md5[] = {
+ 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08,
+ 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */
+ 0x05, 0x00, 0x04, 0x10
+};
+
+static const u8 digest_info_sha1[] = {
+ 0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
+ 0x2b, 0x0e, 0x03, 0x02, 0x1a,
+ 0x05, 0x00, 0x04, 0x14
+};
+
+static const u8 digest_info_rmd160[] = {
+ 0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
+ 0x2b, 0x24, 0x03, 0x02, 0x01,
+ 0x05, 0x00, 0x04, 0x14
+};
+
+static const u8 digest_info_sha224[] = {
+ 0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09,
+ 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04,
+ 0x05, 0x00, 0x04, 0x1c
+};
+
+static const u8 digest_info_sha256[] = {
+ 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09,
+ 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
+ 0x05, 0x00, 0x04, 0x20
+};
+
+static const u8 digest_info_sha384[] = {
+ 0x30, 0x41, 0x30, 0x0d, 0x06, 0x09,
+ 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02,
+ 0x05, 0x00, 0x04, 0x30
+};
+
+static const u8 digest_info_sha512[] = {
+ 0x30, 0x51, 0x30, 0x0d, 0x06, 0x09,
+ 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03,
+ 0x05, 0x00, 0x04, 0x40
+};
+
+static const struct asn1_template {
+ const char *name;
+ const u8 *data;
+ size_t size;
+} asn1_templates[] = {
+#define _(X) { #X, digest_info_##X, sizeof(digest_info_##X) }
+ _(md5),
+ _(sha1),
+ _(rmd160),
+ _(sha256),
+ _(sha384),
+ _(sha512),
+ _(sha224),
+ { NULL }
+#undef _
+};
+
+static const struct asn1_template *lookup_asn1(const char *name)
+{
+ const struct asn1_template *p;
+
+ for (p = asn1_templates; p->name; p++)
+ if (strcmp(name, p->name) == 0)
+ return p;
+ return NULL;
+}
+
+/*
+ * Sign operation is performed with the private key in the TPM.
+ */
+static int tpm_key_sign(struct tpm_key *tk,
+ struct kernel_pkey_params *params,
+ const void *in, void *out)
+{
+ struct tpm_buf *tb;
+ uint32_t keyhandle;
+ uint8_t srkauth[SHA1_DIGEST_SIZE];
+ uint8_t keyauth[SHA1_DIGEST_SIZE];
+ void *asn1_wrapped = NULL;
+ uint32_t in_len = params->in_len;
+ int r;
+
+ pr_devel("==>%s()\n", __func__);
+
+ if (strcmp(params->encoding, "pkcs1"))
+ return -ENOPKG;
+
+ if (params->hash_algo) {
+ const struct asn1_template *asn1 =
+ lookup_asn1(params->hash_algo);
+
+ if (!asn1)
+ return -ENOPKG;
+
+ /* request enough space for the ASN.1 template + input hash */
+ asn1_wrapped = kzalloc(in_len + asn1->size, GFP_KERNEL);
+ if (!asn1_wrapped)
+ return -ENOMEM;
+
+ /* Copy ASN.1 template, then the input */
+ memcpy(asn1_wrapped, asn1->data, asn1->size);
+ memcpy(asn1_wrapped + asn1->size, in, in_len);
+
+ in = asn1_wrapped;
+ in_len += asn1->size;
+ }
+
+ if (in_len > tk->key_len / 8 - 11) {
+ r = -EOVERFLOW;
+ goto error_free_asn1_wrapped;
+ }
+
+ r = -ENOMEM;
+ tb = kzalloc(sizeof(*tb), GFP_KERNEL);
+ if (!tb)
+ goto error_free_asn1_wrapped;
+
+ /* TODO: Handle a non-all zero SRK authorization */
+ memset(srkauth, 0, sizeof(srkauth));
+
+ r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+ tk->blob, tk->blob_len, &keyhandle);
+ if (r < 0) {
+ pr_devel("loadkey2 failed (%d)\n", r);
+ goto error_free_tb;
+ }
+
+ /* TODO: Handle a non-all zero key authorization */
+ memset(keyauth, 0, sizeof(keyauth));
+
+ r = tpm_sign(tb, keyhandle, keyauth, in, in_len, out, params->out_len);
+ if (r < 0)
+ pr_devel("tpm_sign failed (%d)\n", r);
+
+ if (tpm_flushspecific(tb, keyhandle) < 0)
+ pr_devel("flushspecific failed (%d)\n", r);
+
+error_free_tb:
+ kzfree(tb);
+error_free_asn1_wrapped:
+ kfree(asn1_wrapped);
+ pr_devel("<==%s() = %d\n", __func__, r);
+ return r;
+}
+
+/*
+ * Do encryption, decryption and signing ops.
+ */
+static int tpm_key_eds_op(struct kernel_pkey_params *params,
+ const void *in, void *out)
+{
+ struct tpm_key *tk = params->key->payload.data[asym_crypto];
+ int ret = -EOPNOTSUPP;
+
+ /* Perform the encryption calculation. */
+ switch (params->op) {
+ case kernel_pkey_encrypt:
+ ret = tpm_key_encrypt(tk, params, in, out);
+ break;
+ case kernel_pkey_decrypt:
+ ret = tpm_key_decrypt(tk, params, in, out);
+ break;
+ case kernel_pkey_sign:
+ ret = tpm_key_sign(tk, params, in, out);
+ break;
+ default:
+ BUG();
+ }
+
+ return ret;
+}
+
+/*
+ * Verify a signature using a public key.
+ */
+static int tpm_key_verify_signature(const struct key *key,
+ const struct public_key_signature *sig)
+{
+ const struct tpm_key *tk = key->payload.data[asym_crypto];
+ struct crypto_wait cwait;
+ struct crypto_akcipher *tfm;
+ struct akcipher_request *req;
+ struct scatterlist sig_sg, digest_sg;
+ char alg_name[CRYPTO_MAX_ALG_NAME];
+ uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+ uint32_t der_pub_key_len;
+ void *output;
+ unsigned int outlen;
+ int ret;
+
+ pr_devel("==>%s()\n", __func__);
+
+ BUG_ON(!tk);
+ BUG_ON(!sig);
+ BUG_ON(!sig->s);
+
+ if (!sig->digest)
+ return -ENOPKG;
+
+ ret = determine_akcipher(sig->encoding, sig->hash_algo, alg_name);
+ if (ret < 0)
+ return ret;
+
+ tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+ der_pub_key);
+
+ ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+ if (ret < 0)
+ goto error_free_tfm;
+
+ ret = -ENOMEM;
+ req = akcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req)
+ goto error_free_tfm;
+
+ ret = -ENOMEM;
+ outlen = crypto_akcipher_maxsize(tfm);
+ output = kmalloc(outlen, GFP_KERNEL);
+ if (!output)
+ goto error_free_req;
+
+ sg_init_one(&sig_sg, sig->s, sig->s_size);
+ sg_init_one(&digest_sg, output, outlen);
+ akcipher_request_set_crypt(req, &sig_sg, &digest_sg, sig->s_size,
+ outlen);
+ crypto_init_wait(&cwait);
+ akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+ CRYPTO_TFM_REQ_MAY_SLEEP,
+ crypto_req_done, &cwait);
+
+ /* Perform the verification calculation. This doesn't actually do the
+ * verification, but rather calculates the hash expected by the
+ * signature and returns that to us.
+ */
+ ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
+ if (ret)
+ goto out_free_output;
+
+ /* Do the actual verification step. */
+ if (req->dst_len != sig->digest_size ||
+ memcmp(sig->digest, output, sig->digest_size) != 0)
+ ret = -EKEYREJECTED;
+
+out_free_output:
+ kfree(output);
+error_free_req:
+ akcipher_request_free(req);
+error_free_tfm:
+ crypto_free_akcipher(tfm);
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ if (WARN_ON_ONCE(ret > 0))
+ ret = -EINVAL;
+ return ret;
+}
+
+/*
+ * Parse enough information out of TPM_KEY structure:
+ * TPM_STRUCT_VER -> 4 bytes
+ * TPM_KEY_USAGE -> 2 bytes
+ * TPM_KEY_FLAGS -> 4 bytes
+ * TPM_AUTH_DATA_USAGE -> 1 byte
+ * TPM_KEY_PARMS -> variable
+ * UINT32 PCRInfoSize -> 4 bytes
+ * BYTE* -> PCRInfoSize bytes
+ * TPM_STORE_PUBKEY
+ * UINT32 encDataSize;
+ * BYTE* -> encDataSize;
+ *
+ * TPM_KEY_PARMS:
+ * TPM_ALGORITHM_ID -> 4 bytes
+ * TPM_ENC_SCHEME -> 2 bytes
+ * TPM_SIG_SCHEME -> 2 bytes
+ * UINT32 parmSize -> 4 bytes
+ * BYTE* -> variable
+ */
+static int extract_key_parameters(struct tpm_key *tk)
+{
+ const void *cur = tk->blob;
+ uint32_t len = tk->blob_len;
+ const void *pub_key;
+ uint32_t sz;
+ uint32_t key_len;
+
+ if (len < 11)
+ return -EBADMSG;
+
+ /* Ensure this is a legacy key */
+ if (get_unaligned_be16(cur + 4) != 0x0015)
+ return -EBADMSG;
+
+ /* Skip to TPM_KEY_PARMS */
+ cur += 11;
+ len -= 11;
+
+ if (len < 12)
+ return -EBADMSG;
+
+ /* Make sure this is an RSA key */
+ if (get_unaligned_be32(cur) != 0x00000001)
+ return -EBADMSG;
+
+ /* Make sure this is TPM_ES_RSAESPKCSv15 encoding scheme */
+ if (get_unaligned_be16(cur + 4) != 0x0002)
+ return -EBADMSG;
+
+ /* Make sure this is TPM_SS_RSASSAPKCS1v15_DER signature scheme */
+ if (get_unaligned_be16(cur + 6) != 0x0003)
+ return -EBADMSG;
+
+ sz = get_unaligned_be32(cur + 8);
+ if (len < sz + 12)
+ return -EBADMSG;
+
+ /* Move to TPM_RSA_KEY_PARMS */
+ len -= 12;
+ cur += 12;
+
+ /* Grab the RSA key length */
+ key_len = get_unaligned_be32(cur);
+
+ switch (key_len) {
+ case 512:
+ case 1024:
+ case 1536:
+ case 2048:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Move just past TPM_KEY_PARMS */
+ cur += sz;
+ len -= sz;
+
+ if (len < 4)
+ return -EBADMSG;
+
+ sz = get_unaligned_be32(cur);
+ if (len < 4 + sz)
+ return -EBADMSG;
+
+ /* Move to TPM_STORE_PUBKEY */
+ cur += 4 + sz;
+ len -= 4 + sz;
+
+ /* Grab the size of the public key, it should jive with the key size */
+ sz = get_unaligned_be32(cur);
+ if (sz > 256)
+ return -EINVAL;
+
+ pub_key = cur + 4;
+
+ tk->key_len = key_len;
+ tk->pub_key = pub_key;
+ tk->pub_key_len = sz;
+
+ return 0;
+}
+
+/* Given the blob, parse it and load it into the TPM */
+struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
+{
+ int r;
+ struct tpm_key *tk;
+
+ r = tpm_is_tpm2(NULL);
+ if (r < 0)
+ goto error;
+
+ /* We don't support TPM2 yet */
+ if (r > 0) {
+ r = -ENODEV;
+ goto error;
+ }
+
+ r = -ENOMEM;
+ tk = kzalloc(sizeof(struct tpm_key), GFP_KERNEL);
+ if (!tk)
+ goto error;
+
+ tk->blob = kmemdup(blob, blob_len, GFP_KERNEL);
+ if (!tk->blob)
+ goto error_memdup;
+
+ tk->blob_len = blob_len;
+
+ r = extract_key_parameters(tk);
+ if (r < 0)
+ goto error_extract;
+
+ return tk;
+
+error_extract:
+ kfree(tk->blob);
+ tk->blob_len = 0;
+error_memdup:
+ kfree(tk);
+error:
+ return ERR_PTR(r);
+}
+EXPORT_SYMBOL_GPL(tpm_key_create);
+
+/*
+ * TPM-based asymmetric key subtype
+ */
+struct asymmetric_key_subtype asym_tpm_subtype = {
+ .owner = THIS_MODULE,
+ .name = "asym_tpm",
+ .name_len = sizeof("asym_tpm") - 1,
+ .describe = asym_tpm_describe,
+ .destroy = asym_tpm_destroy,
+ .query = tpm_key_query,
+ .eds_op = tpm_key_eds_op,
+ .verify_signature = tpm_key_verify_signature,
+};
+EXPORT_SYMBOL_GPL(asym_tpm_subtype);
+
+MODULE_DESCRIPTION("TPM based asymmetric key subtype");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/asymmetric_keys/asymmetric_keys.h b/crypto/asymmetric_keys/asymmetric_keys.h
index ca8e9ac..7be1ccf 100644
--- a/crypto/asymmetric_keys/asymmetric_keys.h
+++ b/crypto/asymmetric_keys/asymmetric_keys.h
@@ -16,3 +16,6 @@ extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id);
extern int __asymmetric_key_hex_to_key_id(const char *id,
struct asymmetric_key_id *match_id,
size_t hexlen);
+
+extern int asymmetric_key_eds_op(struct kernel_pkey_params *params,
+ const void *in, void *out);
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 26539e9..69a0788 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/ctype.h>
#include <keys/system_keyring.h>
+#include <keys/user-type.h>
#include "asymmetric_keys.h"
MODULE_LICENSE("GPL");
@@ -538,6 +539,45 @@ static struct key_restriction *asymmetric_lookup_restriction(
return ret;
}
+int asymmetric_key_eds_op(struct kernel_pkey_params *params,
+ const void *in, void *out)
+{
+ const struct asymmetric_key_subtype *subtype;
+ struct key *key = params->key;
+ int ret;
+
+ pr_devel("==>%s()\n", __func__);
+
+ if (key->type != &key_type_asymmetric)
+ return -EINVAL;
+ subtype = asymmetric_key_subtype(key);
+ if (!subtype ||
+ !key->payload.data[0])
+ return -EINVAL;
+ if (!subtype->eds_op)
+ return -ENOTSUPP;
+
+ ret = subtype->eds_op(params, in, out);
+
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ return ret;
+}
+
+static int asymmetric_key_verify_signature(struct kernel_pkey_params *params,
+ const void *in, const void *in2)
+{
+ struct public_key_signature sig = {
+ .s_size = params->in2_len,
+ .digest_size = params->in_len,
+ .encoding = params->encoding,
+ .hash_algo = params->hash_algo,
+ .digest = (void *)in,
+ .s = (void *)in2,
+ };
+
+ return verify_signature(params->key, &sig);
+}
+
struct key_type key_type_asymmetric = {
.name = "asymmetric",
.preparse = asymmetric_key_preparse,
@@ -548,6 +588,9 @@ struct key_type key_type_asymmetric = {
.destroy = asymmetric_key_destroy,
.describe = asymmetric_key_describe,
.lookup_restriction = asymmetric_lookup_restriction,
+ .asym_query = query_asymmetric_key,
+ .asym_eds_op = asymmetric_key_eds_op,
+ .asym_verify_signature = asymmetric_key_verify_signature,
};
EXPORT_SYMBOL_GPL(key_type_asymmetric);
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 0f13416..f0d56e1 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -271,6 +271,7 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen,
switch (ctx->last_oid) {
case OID_rsaEncryption:
ctx->sinfo->sig->pkey_algo = "rsa";
+ ctx->sinfo->sig->encoding = "pkcs1";
break;
default:
printk("Unsupported pkey algo: %u\n", ctx->last_oid);
diff --git a/crypto/asymmetric_keys/pkcs8.asn1 b/crypto/asymmetric_keys/pkcs8.asn1
new file mode 100644
index 0000000..702c41a
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs8.asn1
@@ -0,0 +1,24 @@
+--
+-- This is the unencrypted variant
+--
+PrivateKeyInfo ::= SEQUENCE {
+ version Version,
+ privateKeyAlgorithm PrivateKeyAlgorithmIdentifier,
+ privateKey PrivateKey,
+ attributes [0] IMPLICIT Attributes OPTIONAL
+}
+
+Version ::= INTEGER ({ pkcs8_note_version })
+
+PrivateKeyAlgorithmIdentifier ::= AlgorithmIdentifier ({ pkcs8_note_algo })
+
+PrivateKey ::= OCTET STRING ({ pkcs8_note_key })
+
+Attributes ::= SET OF Attribute
+
+Attribute ::= ANY
+
+AlgorithmIdentifier ::= SEQUENCE {
+ algorithm OBJECT IDENTIFIER ({ pkcs8_note_OID }),
+ parameters ANY OPTIONAL
+}
diff --git a/crypto/asymmetric_keys/pkcs8_parser.c b/crypto/asymmetric_keys/pkcs8_parser.c
new file mode 100644
index 0000000..5f6a7ec
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs8_parser.c
@@ -0,0 +1,184 @@
+/* PKCS#8 Private Key parser [RFC 5208].
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "PKCS8: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/oid_registry.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/asymmetric-parser.h>
+#include <crypto/public_key.h>
+#include "pkcs8.asn1.h"
+
+struct pkcs8_parse_context {
+ struct public_key *pub;
+ unsigned long data; /* Start of data */
+ enum OID last_oid; /* Last OID encountered */
+ enum OID algo_oid; /* Algorithm OID */
+ u32 key_size;
+ const void *key;
+};
+
+/*
+ * Note an OID when we find one for later processing when we know how to
+ * interpret it.
+ */
+int pkcs8_note_OID(void *context, size_t hdrlen,
+ unsigned char tag,
+ const void *value, size_t vlen)
+{
+ struct pkcs8_parse_context *ctx = context;
+
+ ctx->last_oid = look_up_OID(value, vlen);
+ if (ctx->last_oid == OID__NR) {
+ char buffer[50];
+
+ sprint_oid(value, vlen, buffer, sizeof(buffer));
+ pr_info("Unknown OID: [%lu] %s\n",
+ (unsigned long)value - ctx->data, buffer);
+ }
+ return 0;
+}
+
+/*
+ * Note the version number of the ASN.1 blob.
+ */
+int pkcs8_note_version(void *context, size_t hdrlen,
+ unsigned char tag,
+ const void *value, size_t vlen)
+{
+ if (vlen != 1 || ((const u8 *)value)[0] != 0) {
+ pr_warn("Unsupported PKCS#8 version\n");
+ return -EBADMSG;
+ }
+ return 0;
+}
+
+/*
+ * Note the public algorithm.
+ */
+int pkcs8_note_algo(void *context, size_t hdrlen,
+ unsigned char tag,
+ const void *value, size_t vlen)
+{
+ struct pkcs8_parse_context *ctx = context;
+
+ if (ctx->last_oid != OID_rsaEncryption)
+ return -ENOPKG;
+
+ ctx->pub->pkey_algo = "rsa";
+ return 0;
+}
+
+/*
+ * Note the key data of the ASN.1 blob.
+ */
+int pkcs8_note_key(void *context, size_t hdrlen,
+ unsigned char tag,
+ const void *value, size_t vlen)
+{
+ struct pkcs8_parse_context *ctx = context;
+
+ ctx->key = value;
+ ctx->key_size = vlen;
+ return 0;
+}
+
+/*
+ * Parse a PKCS#8 private key blob.
+ */
+static struct public_key *pkcs8_parse(const void *data, size_t datalen)
+{
+ struct pkcs8_parse_context ctx;
+ struct public_key *pub;
+ long ret;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ ret = -ENOMEM;
+ ctx.pub = kzalloc(sizeof(struct public_key), GFP_KERNEL);
+ if (!ctx.pub)
+ goto error;
+
+ ctx.data = (unsigned long)data;
+
+ /* Attempt to decode the private key */
+ ret = asn1_ber_decoder(&pkcs8_decoder, &ctx, data, datalen);
+ if (ret < 0)
+ goto error_decode;
+
+ ret = -ENOMEM;
+ pub = ctx.pub;
+ pub->key = kmemdup(ctx.key, ctx.key_size, GFP_KERNEL);
+ if (!pub->key)
+ goto error_decode;
+
+ pub->keylen = ctx.key_size;
+ pub->key_is_private = true;
+ return pub;
+
+error_decode:
+ kfree(ctx.pub);
+error:
+ return ERR_PTR(ret);
+}
+
+/*
+ * Attempt to parse a data blob for a key as a PKCS#8 private key.
+ */
+static int pkcs8_key_preparse(struct key_preparsed_payload *prep)
+{
+ struct public_key *pub;
+
+ pub = pkcs8_parse(prep->data, prep->datalen);
+ if (IS_ERR(pub))
+ return PTR_ERR(pub);
+
+ pr_devel("Cert Key Algo: %s\n", pub->pkey_algo);
+ pub->id_type = "PKCS8";
+
+ /* We're pinning the module by being linked against it */
+ __module_get(public_key_subtype.owner);
+ prep->payload.data[asym_subtype] = &public_key_subtype;
+ prep->payload.data[asym_key_ids] = NULL;
+ prep->payload.data[asym_crypto] = pub;
+ prep->payload.data[asym_auth] = NULL;
+ prep->quotalen = 100;
+ return 0;
+}
+
+static struct asymmetric_key_parser pkcs8_key_parser = {
+ .owner = THIS_MODULE,
+ .name = "pkcs8",
+ .parse = pkcs8_key_preparse,
+};
+
+/*
+ * Module stuff
+ */
+static int __init pkcs8_key_init(void)
+{
+ return register_asymmetric_key_parser(&pkcs8_key_parser);
+}
+
+static void __exit pkcs8_key_exit(void)
+{
+ unregister_asymmetric_key_parser(&pkcs8_key_parser);
+}
+
+module_init(pkcs8_key_init);
+module_exit(pkcs8_key_exit);
+
+MODULE_DESCRIPTION("PKCS#8 certificate parser");
+MODULE_LICENSE("GPL");
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index e929fe1..f5d85b4 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -60,6 +60,165 @@ static void public_key_destroy(void *payload0, void *payload3)
}
/*
+ * Determine the crypto algorithm name.
+ */
+static
+int software_key_determine_akcipher(const char *encoding,
+ const char *hash_algo,
+ const struct public_key *pkey,
+ char alg_name[CRYPTO_MAX_ALG_NAME])
+{
+ int n;
+
+ if (strcmp(encoding, "pkcs1") == 0) {
+ /* The data wangled by the RSA algorithm is typically padded
+ * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
+ * sec 8.2].
+ */
+ if (!hash_algo)
+ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+ "pkcs1pad(%s)",
+ pkey->pkey_algo);
+ else
+ n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+ "pkcs1pad(%s,%s)",
+ pkey->pkey_algo, hash_algo);
+ return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
+ }
+
+ if (strcmp(encoding, "raw") == 0) {
+ strcpy(alg_name, pkey->pkey_algo);
+ return 0;
+ }
+
+ return -ENOPKG;
+}
+
+/*
+ * Query information about a key.
+ */
+static int software_key_query(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info)
+{
+ struct crypto_akcipher *tfm;
+ struct public_key *pkey = params->key->payload.data[asym_crypto];
+ char alg_name[CRYPTO_MAX_ALG_NAME];
+ int ret, len;
+
+ ret = software_key_determine_akcipher(params->encoding,
+ params->hash_algo,
+ pkey, alg_name);
+ if (ret < 0)
+ return ret;
+
+ tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ if (pkey->key_is_private)
+ ret = crypto_akcipher_set_priv_key(tfm,
+ pkey->key, pkey->keylen);
+ else
+ ret = crypto_akcipher_set_pub_key(tfm,
+ pkey->key, pkey->keylen);
+ if (ret < 0)
+ goto error_free_tfm;
+
+ len = crypto_akcipher_maxsize(tfm);
+ info->key_size = len * 8;
+ info->max_data_size = len;
+ info->max_sig_size = len;
+ info->max_enc_size = len;
+ info->max_dec_size = len;
+ info->supported_ops = (KEYCTL_SUPPORTS_ENCRYPT |
+ KEYCTL_SUPPORTS_VERIFY);
+ if (pkey->key_is_private)
+ info->supported_ops |= (KEYCTL_SUPPORTS_DECRYPT |
+ KEYCTL_SUPPORTS_SIGN);
+ ret = 0;
+
+error_free_tfm:
+ crypto_free_akcipher(tfm);
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ return ret;
+}
+
+/*
+ * Do encryption, decryption and signing ops.
+ */
+static int software_key_eds_op(struct kernel_pkey_params *params,
+ const void *in, void *out)
+{
+ const struct public_key *pkey = params->key->payload.data[asym_crypto];
+ struct akcipher_request *req;
+ struct crypto_akcipher *tfm;
+ struct crypto_wait cwait;
+ struct scatterlist in_sg, out_sg;
+ char alg_name[CRYPTO_MAX_ALG_NAME];
+ int ret;
+
+ pr_devel("==>%s()\n", __func__);
+
+ ret = software_key_determine_akcipher(params->encoding,
+ params->hash_algo,
+ pkey, alg_name);
+ if (ret < 0)
+ return ret;
+
+ tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ req = akcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req)
+ goto error_free_tfm;
+
+ if (pkey->key_is_private)
+ ret = crypto_akcipher_set_priv_key(tfm,
+ pkey->key, pkey->keylen);
+ else
+ ret = crypto_akcipher_set_pub_key(tfm,
+ pkey->key, pkey->keylen);
+ if (ret)
+ goto error_free_req;
+
+ sg_init_one(&in_sg, in, params->in_len);
+ sg_init_one(&out_sg, out, params->out_len);
+ akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
+ params->out_len);
+ crypto_init_wait(&cwait);
+ akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+ CRYPTO_TFM_REQ_MAY_SLEEP,
+ crypto_req_done, &cwait);
+
+ /* Perform the encryption calculation. */
+ switch (params->op) {
+ case kernel_pkey_encrypt:
+ ret = crypto_akcipher_encrypt(req);
+ break;
+ case kernel_pkey_decrypt:
+ ret = crypto_akcipher_decrypt(req);
+ break;
+ case kernel_pkey_sign:
+ ret = crypto_akcipher_sign(req);
+ break;
+ default:
+ BUG();
+ }
+
+ ret = crypto_wait_req(ret, &cwait);
+ if (ret == 0)
+ ret = req->dst_len;
+
+error_free_req:
+ akcipher_request_free(req);
+error_free_tfm:
+ crypto_free_akcipher(tfm);
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ return ret;
+}
+
+/*
* Verify a signature using a public key.
*/
int public_key_verify_signature(const struct public_key *pkey,
@@ -69,8 +228,7 @@ int public_key_verify_signature(const struct public_key *pkey,
struct crypto_akcipher *tfm;
struct akcipher_request *req;
struct scatterlist sig_sg, digest_sg;
- const char *alg_name;
- char alg_name_buf[CRYPTO_MAX_ALG_NAME];
+ char alg_name[CRYPTO_MAX_ALG_NAME];
void *output;
unsigned int outlen;
int ret;
@@ -81,21 +239,11 @@ int public_key_verify_signature(const struct public_key *pkey,
BUG_ON(!sig);
BUG_ON(!sig->s);
- if (!sig->digest)
- return -ENOPKG;
-
- alg_name = sig->pkey_algo;
- if (strcmp(sig->pkey_algo, "rsa") == 0) {
- /* The data wangled by the RSA algorithm is typically padded
- * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
- * sec 8.2].
- */
- if (snprintf(alg_name_buf, CRYPTO_MAX_ALG_NAME,
- "pkcs1pad(rsa,%s)", sig->hash_algo
- ) >= CRYPTO_MAX_ALG_NAME)
- return -EINVAL;
- alg_name = alg_name_buf;
- }
+ ret = software_key_determine_akcipher(sig->encoding,
+ sig->hash_algo,
+ pkey, alg_name);
+ if (ret < 0)
+ return ret;
tfm = crypto_alloc_akcipher(alg_name, 0, 0);
if (IS_ERR(tfm))
@@ -106,7 +254,12 @@ int public_key_verify_signature(const struct public_key *pkey,
if (!req)
goto error_free_tfm;
- ret = crypto_akcipher_set_pub_key(tfm, pkey->key, pkey->keylen);
+ if (pkey->key_is_private)
+ ret = crypto_akcipher_set_priv_key(tfm,
+ pkey->key, pkey->keylen);
+ else
+ ret = crypto_akcipher_set_pub_key(tfm,
+ pkey->key, pkey->keylen);
if (ret)
goto error_free_req;
@@ -167,6 +320,8 @@ struct asymmetric_key_subtype public_key_subtype = {
.name_len = sizeof("public_key") - 1,
.describe = public_key_describe,
.destroy = public_key_destroy,
+ .query = software_key_query,
+ .eds_op = software_key_eds_op,
.verify_signature = public_key_verify_signature_2,
};
EXPORT_SYMBOL_GPL(public_key_subtype);
diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 2819831..ad95a58 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -16,7 +16,9 @@
#include <linux/export.h>
#include <linux/err.h>
#include <linux/slab.h>
+#include <linux/keyctl.h>
#include <crypto/public_key.h>
+#include <keys/user-type.h>
#include "asymmetric_keys.h"
/*
@@ -37,6 +39,99 @@ void public_key_signature_free(struct public_key_signature *sig)
EXPORT_SYMBOL_GPL(public_key_signature_free);
/**
+ * query_asymmetric_key - Get information about an aymmetric key.
+ * @params: Various parameters.
+ * @info: Where to put the information.
+ */
+int query_asymmetric_key(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info)
+{
+ const struct asymmetric_key_subtype *subtype;
+ struct key *key = params->key;
+ int ret;
+
+ pr_devel("==>%s()\n", __func__);
+
+ if (key->type != &key_type_asymmetric)
+ return -EINVAL;
+ subtype = asymmetric_key_subtype(key);
+ if (!subtype ||
+ !key->payload.data[0])
+ return -EINVAL;
+ if (!subtype->query)
+ return -ENOTSUPP;
+
+ ret = subtype->query(params, info);
+
+ pr_devel("<==%s() = %d\n", __func__, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(query_asymmetric_key);
+
+/**
+ * encrypt_blob - Encrypt data using an asymmetric key
+ * @params: Various parameters
+ * @data: Data blob to be encrypted, length params->data_len
+ * @enc: Encrypted data buffer, length params->enc_len
+ *
+ * Encrypt the specified data blob using the private key specified by
+ * params->key. The encrypted data is wrapped in an encoding if
+ * params->encoding is specified (eg. "pkcs1").
+ *
+ * Returns the length of the data placed in the encrypted data buffer or an
+ * error.
+ */
+int encrypt_blob(struct kernel_pkey_params *params,
+ const void *data, void *enc)
+{
+ params->op = kernel_pkey_encrypt;
+ return asymmetric_key_eds_op(params, data, enc);
+}
+EXPORT_SYMBOL_GPL(encrypt_blob);
+
+/**
+ * decrypt_blob - Decrypt data using an asymmetric key
+ * @params: Various parameters
+ * @enc: Encrypted data to be decrypted, length params->enc_len
+ * @data: Decrypted data buffer, length params->data_len
+ *
+ * Decrypt the specified data blob using the private key specified by
+ * params->key. The decrypted data is wrapped in an encoding if
+ * params->encoding is specified (eg. "pkcs1").
+ *
+ * Returns the length of the data placed in the decrypted data buffer or an
+ * error.
+ */
+int decrypt_blob(struct kernel_pkey_params *params,
+ const void *enc, void *data)
+{
+ params->op = kernel_pkey_decrypt;
+ return asymmetric_key_eds_op(params, enc, data);
+}
+EXPORT_SYMBOL_GPL(decrypt_blob);
+
+/**
+ * create_signature - Sign some data using an asymmetric key
+ * @params: Various parameters
+ * @data: Data blob to be signed, length params->data_len
+ * @enc: Signature buffer, length params->enc_len
+ *
+ * Sign the specified data blob using the private key specified by params->key.
+ * The signature is wrapped in an encoding if params->encoding is specified
+ * (eg. "pkcs1"). If the encoding needs to know the digest type, this can be
+ * passed through params->hash_algo (eg. "sha1").
+ *
+ * Returns the length of the data placed in the signature buffer or an error.
+ */
+int create_signature(struct kernel_pkey_params *params,
+ const void *data, void *enc)
+{
+ params->op = kernel_pkey_sign;
+ return asymmetric_key_eds_op(params, data, enc);
+}
+EXPORT_SYMBOL_GPL(create_signature);
+
+/**
* verify_signature - Initiate the use of an asymmetric key to verify a signature
* @key: The asymmetric key to verify against
* @sig: The signature to check
diff --git a/crypto/asymmetric_keys/tpm.asn1 b/crypto/asymmetric_keys/tpm.asn1
new file mode 100644
index 0000000..d7f1942
--- /dev/null
+++ b/crypto/asymmetric_keys/tpm.asn1
@@ -0,0 +1,5 @@
+--
+-- Unencryted TPM Blob. For details of the format, see:
+-- http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#I-D.mavrogiannopoulos-tpmuri
+--
+PrivateKeyInfo ::= OCTET STRING ({ tpm_note_key })
diff --git a/crypto/asymmetric_keys/tpm_parser.c b/crypto/asymmetric_keys/tpm_parser.c
new file mode 100644
index 0000000..96405d8
--- /dev/null
+++ b/crypto/asymmetric_keys/tpm_parser.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "TPM-PARSER: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/asymmetric-parser.h>
+#include <crypto/asym_tpm_subtype.h>
+#include "tpm.asn1.h"
+
+struct tpm_parse_context {
+ const void *blob;
+ u32 blob_len;
+};
+
+/*
+ * Note the key data of the ASN.1 blob.
+ */
+int tpm_note_key(void *context, size_t hdrlen,
+ unsigned char tag,
+ const void *value, size_t vlen)
+{
+ struct tpm_parse_context *ctx = context;
+
+ ctx->blob = value;
+ ctx->blob_len = vlen;
+
+ return 0;
+}
+
+/*
+ * Parse a TPM-encrypted private key blob.
+ */
+static struct tpm_key *tpm_parse(const void *data, size_t datalen)
+{
+ struct tpm_parse_context ctx;
+ long ret;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ /* Attempt to decode the private key */
+ ret = asn1_ber_decoder(&tpm_decoder, &ctx, data, datalen);
+ if (ret < 0)
+ goto error;
+
+ return tpm_key_create(ctx.blob, ctx.blob_len);
+
+error:
+ return ERR_PTR(ret);
+}
+/*
+ * Attempt to parse a data blob for a key as a TPM private key blob.
+ */
+static int tpm_key_preparse(struct key_preparsed_payload *prep)
+{
+ struct tpm_key *tk;
+
+ /*
+ * TPM 1.2 keys are max 2048 bits long, so assume the blob is no
+ * more than 4x that
+ */
+ if (prep->datalen > 256 * 4)
+ return -EMSGSIZE;
+
+ tk = tpm_parse(prep->data, prep->datalen);
+
+ if (IS_ERR(tk))
+ return PTR_ERR(tk);
+
+ /* We're pinning the module by being linked against it */
+ __module_get(asym_tpm_subtype.owner);
+ prep->payload.data[asym_subtype] = &asym_tpm_subtype;
+ prep->payload.data[asym_key_ids] = NULL;
+ prep->payload.data[asym_crypto] = tk;
+ prep->payload.data[asym_auth] = NULL;
+ prep->quotalen = 100;
+ return 0;
+}
+
+static struct asymmetric_key_parser tpm_key_parser = {
+ .owner = THIS_MODULE,
+ .name = "tpm_parser",
+ .parse = tpm_key_preparse,
+};
+
+static int __init tpm_key_init(void)
+{
+ return register_asymmetric_key_parser(&tpm_key_parser);
+}
+
+static void __exit tpm_key_exit(void)
+{
+ unregister_asymmetric_key_parser(&tpm_key_parser);
+}
+
+module_init(tpm_key_init);
+module_exit(tpm_key_exit);
+
+MODULE_DESCRIPTION("TPM private key-blob parser");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index b6cabac..991f4d7 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -199,35 +199,32 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
case OID_md4WithRSAEncryption:
ctx->cert->sig->hash_algo = "md4";
- ctx->cert->sig->pkey_algo = "rsa";
- break;
+ goto rsa_pkcs1;
case OID_sha1WithRSAEncryption:
ctx->cert->sig->hash_algo = "sha1";
- ctx->cert->sig->pkey_algo = "rsa";
- break;
+ goto rsa_pkcs1;
case OID_sha256WithRSAEncryption:
ctx->cert->sig->hash_algo = "sha256";
- ctx->cert->sig->pkey_algo = "rsa";
- break;
+ goto rsa_pkcs1;
case OID_sha384WithRSAEncryption:
ctx->cert->sig->hash_algo = "sha384";
- ctx->cert->sig->pkey_algo = "rsa";
- break;
+ goto rsa_pkcs1;
case OID_sha512WithRSAEncryption:
ctx->cert->sig->hash_algo = "sha512";
- ctx->cert->sig->pkey_algo = "rsa";
- break;
+ goto rsa_pkcs1;
case OID_sha224WithRSAEncryption:
ctx->cert->sig->hash_algo = "sha224";
- ctx->cert->sig->pkey_algo = "rsa";
- break;
+ goto rsa_pkcs1;
}
+rsa_pkcs1:
+ ctx->cert->sig->pkey_algo = "rsa";
+ ctx->cert->sig->encoding = "pkcs1";
ctx->algo_oid = ctx->last_oid;
return 0;
}
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 812476e..cfc04e1 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -392,7 +392,8 @@ static int pkcs1pad_sign(struct akcipher_request *req)
if (!ctx->key_size)
return -EINVAL;
- digest_size = digest_info->size;
+ if (digest_info)
+ digest_size = digest_info->size;
if (req->src_len + digest_size > ctx->key_size - 11)
return -EOVERFLOW;
@@ -412,8 +413,9 @@ static int pkcs1pad_sign(struct akcipher_request *req)
memset(req_ctx->in_buf + 1, 0xff, ps_end - 1);
req_ctx->in_buf[ps_end] = 0x00;
- memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
- digest_info->size);
+ if (digest_info)
+ memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
+ digest_info->size);
pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
ctx->key_size - 1 - req->src_len, req->src);
@@ -475,10 +477,13 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
goto done;
pos++;
- if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size))
- goto done;
+ if (digest_info) {
+ if (crypto_memneq(out_buf + pos, digest_info->data,
+ digest_info->size))
+ goto done;
- pos += digest_info->size;
+ pos += digest_info->size;
+ }
err = 0;
@@ -608,11 +613,14 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
hash_name = crypto_attr_alg_name(tb[2]);
if (IS_ERR(hash_name))
- return PTR_ERR(hash_name);
+ hash_name = NULL;
- digest_info = rsa_lookup_asn1(hash_name);
- if (!digest_info)
- return -EINVAL;
+ if (hash_name) {
+ digest_info = rsa_lookup_asn1(hash_name);
+ if (!digest_info)
+ return -EINVAL;
+ } else
+ digest_info = NULL;
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
if (!inst)
@@ -632,14 +640,29 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
err = -ENAMETOOLONG;
- if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
- "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, hash_name) >=
- CRYPTO_MAX_ALG_NAME ||
- snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
- "pkcs1pad(%s,%s)",
- rsa_alg->base.cra_driver_name, hash_name) >=
- CRYPTO_MAX_ALG_NAME)
- goto out_drop_alg;
+ if (!hash_name) {
+ if (snprintf(inst->alg.base.cra_name,
+ CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
+ rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
+ goto out_drop_alg;
+
+ if (snprintf(inst->alg.base.cra_driver_name,
+ CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
+ rsa_alg->base.cra_driver_name) >=
+ CRYPTO_MAX_ALG_NAME)
+ goto out_drop_alg;
+ } else {
+ if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+ "pkcs1pad(%s,%s)", rsa_alg->base.cra_name,
+ hash_name) >= CRYPTO_MAX_ALG_NAME)
+ goto out_drop_alg;
+
+ if (snprintf(inst->alg.base.cra_driver_name,
+ CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)",
+ rsa_alg->base.cra_driver_name,
+ hash_name) >= CRYPTO_MAX_ALG_NAME)
+ goto out_drop_alg;
+ }
inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC;
inst->alg.base.cra_priority = rsa_alg->base.cra_priority;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index a7c2673..824ae98 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -126,6 +126,7 @@ int acpi_device_get_power(struct acpi_device *device, int *state)
return 0;
}
+EXPORT_SYMBOL(acpi_device_get_power);
static int acpi_dev_pm_explicit_set(struct acpi_device *adev, int state)
{
diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c
index 3b25a64..21b9b2f 100644
--- a/drivers/auxdisplay/panel.c
+++ b/drivers/auxdisplay/panel.c
@@ -155,10 +155,9 @@ struct logical_input {
int release_data;
} std;
struct { /* valid when type == INPUT_TYPE_KBD */
- /* strings can be non null-terminated */
- char press_str[sizeof(void *) + sizeof(int)];
- char repeat_str[sizeof(void *) + sizeof(int)];
- char release_str[sizeof(void *) + sizeof(int)];
+ char press_str[sizeof(void *) + sizeof(int)] __nonstring;
+ char repeat_str[sizeof(void *) + sizeof(int)] __nonstring;
+ char release_str[sizeof(void *) + sizeof(int)] __nonstring;
} kbd;
} u;
};
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index df8103d..c18586f 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i)
disk->first_minor = i * max_part;
disk->fops = &brd_fops;
disk->private_data = brd;
- disk->queue = brd->brd_queue;
disk->flags = GENHD_FL_EXT_DEVT;
sprintf(disk->disk_name, "ram%d", i);
set_capacity(disk, rd_size * 2);
- disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
+ brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
/* Tell the block layer that this is not a rotational device */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
+ blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
return brd;
@@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new)
brd = brd_alloc(i);
if (brd) {
+ brd->brd_disk->queue = brd->brd_queue;
add_disk(brd->brd_disk);
list_add_tail(&brd->brd_list, &brd_devices);
}
@@ -503,8 +503,14 @@ static int __init brd_init(void)
/* point of no return */
- list_for_each_entry(brd, &brd_devices, brd_list)
+ list_for_each_entry(brd, &brd_devices, brd_list) {
+ /*
+ * associate with queue just before adding disk for
+ * avoiding to mess up failure path
+ */
+ brd->brd_disk->queue = brd->brd_queue;
add_disk(brd->brd_disk);
+ }
blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
THIS_MODULE, brd_probe, NULL, NULL);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 55fd104..fa82042 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1856,7 +1856,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
/* THINK if (signal_pending) return ... ? */
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
if (sock == connection->data.socket) {
rcu_read_lock();
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index fc67fd8..61c3927 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -516,7 +516,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
struct msghdr msg = {
.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
};
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
return sock_recvmsg(sock, &msg, msg.msg_flags);
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index abad6d1..cb0cc86 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,7 +77,6 @@
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/ioprio.h>
-#include <linux/blk-cgroup.h>
#include "loop.h"
@@ -269,7 +268,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
struct iov_iter i;
ssize_t bw;
- iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
+ iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
file_start_write(file);
bw = vfs_iter_write(file, &i, ppos, 0);
@@ -347,7 +346,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
ssize_t len;
rq_for_each_segment(bvec, rq, iter) {
- iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0)
return len;
@@ -388,7 +387,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
b.bv_offset = 0;
b.bv_len = bvec.bv_len;
- iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+ iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0) {
ret = len;
@@ -555,8 +554,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
}
atomic_set(&cmd->ref, 2);
- iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
- segments, blk_rq_bytes(rq));
+ iov_iter_bvec(&iter, rw, bvec, segments, blk_rq_bytes(rq));
iter.iov_offset = offset;
cmd->iocb.ki_pos = pos;
@@ -1761,8 +1759,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
/* always use the first bio's css */
#ifdef CONFIG_BLK_CGROUP
- if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
- cmd->css = &bio_blkcg(rq->bio)->css;
+ if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
+ cmd->css = rq->bio->bi_css;
css_get(cmd->css);
} else
#endif
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index dfc8de6..a7daa8a 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
dev_warn(&dd->pdev->dev,
"data movement but "
"sect_count is 0\n");
- err = -EINVAL;
- goto abort;
+ err = -EINVAL;
+ goto abort;
}
}
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 14a5125..4d4d612 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -473,7 +473,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
u32 nbd_cmd_flags = 0;
int sent = nsock->sent, skip = 0;
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+ iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
switch (req_op(req)) {
case REQ_OP_DISCARD:
@@ -564,8 +564,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
req, bvec.bv_len);
- iov_iter_bvec(&from, ITER_BVEC | WRITE,
- &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
if (skip) {
if (skip >= iov_iter_count(&from)) {
skip -= iov_iter_count(&from);
@@ -624,7 +623,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
int ret = 0;
reply.magic = 0;
- iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+ iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) {
if (!nbd_disconnected(config))
@@ -678,8 +677,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
struct bio_vec bvec;
rq_for_each_segment(bvec, req, iter) {
- iov_iter_bvec(&to, ITER_BVEC | READ,
- &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
@@ -1073,7 +1071,7 @@ static void send_disconnects(struct nbd_device *nbd)
for (i = 0; i < config->num_connections; i++) {
struct nbd_sock *nsock = config->socks[i];
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+ iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
mutex_lock(&nsock->tx_lock);
ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
if (ret <= 0)
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index df9467e..41c9ccd 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -234,6 +234,7 @@
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
select DMI
+ select ACPI_ADXL if ACPI
help
Support for error detection and correction the Intel
Skylake server Integrated Memory Controllers. If your
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
index dd209e0..a99ea61d 100644
--- a/drivers/edac/skx_edac.c
+++ b/drivers/edac/skx_edac.c
@@ -26,6 +26,7 @@
#include <linux/bitmap.h>
#include <linux/math64.h>
#include <linux/mod_devicetable.h>
+#include <linux/adxl.h>
#include <acpi/nfit.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
@@ -35,6 +36,7 @@
#include "edac_module.h"
#define EDAC_MOD_STR "skx_edac"
+#define MSG_SIZE 1024
/*
* Debug macros
@@ -54,6 +56,29 @@
static LIST_HEAD(skx_edac_list);
static u64 skx_tolm, skx_tohm;
+static char *skx_msg;
+static unsigned int nvdimm_count;
+
+enum {
+ INDEX_SOCKET,
+ INDEX_MEMCTRL,
+ INDEX_CHANNEL,
+ INDEX_DIMM,
+ INDEX_MAX
+};
+
+static const char * const component_names[] = {
+ [INDEX_SOCKET] = "ProcessorSocketId",
+ [INDEX_MEMCTRL] = "MemoryControllerId",
+ [INDEX_CHANNEL] = "ChannelId",
+ [INDEX_DIMM] = "DimmSlotId",
+};
+
+static int component_indices[ARRAY_SIZE(component_names)];
+static int adxl_component_count;
+static const char * const *adxl_component_names;
+static u64 *adxl_values;
+static char *adxl_msg;
#define NUM_IMC 2 /* memory controllers per socket */
#define NUM_CHANNELS 3 /* channels per memory controller */
@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
u16 flags;
u64 size = 0;
+ nvdimm_count++;
+
dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
imc->src_id, 0);
@@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
}
#endif /*CONFIG_EDAC_DEBUG*/
+static bool skx_adxl_decode(struct decoded_addr *res)
+
+{
+ int i, len = 0;
+
+ if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
+ res->addr < BIT_ULL(32))) {
+ edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
+ return false;
+ }
+
+ if (adxl_decode(res->addr, adxl_values)) {
+ edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
+ return false;
+ }
+
+ res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
+ res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+ res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+ res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
+
+ for (i = 0; i < adxl_component_count; i++) {
+ if (adxl_values[i] == ~0x0ull)
+ continue;
+
+ len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
+ adxl_component_names[i], adxl_values[i]);
+ if (MSG_SIZE - len <= 0)
+ break;
+ }
+
+ return true;
+}
+
static void skx_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m,
struct decoded_addr *res)
{
enum hw_event_mc_err_type tp_event;
- char *type, *optype, msg[256];
+ char *type, *optype;
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break;
}
}
+ if (adxl_component_count) {
+ snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ mscod, errcode, adxl_msg);
+ } else {
+ snprintf(skx_msg, MSG_SIZE,
+ "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+ overflow ? " OVERFLOW" : "",
+ (uncorrected_error && recoverable) ? " recoverable" : "",
+ mscod, errcode,
+ res->socket, res->imc, res->rank,
+ res->bank_group, res->bank_address, res->row, res->column);
+ }
- snprintf(msg, sizeof(msg),
- "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
- overflow ? " OVERFLOW" : "",
- (uncorrected_error && recoverable) ? " recoverable" : "",
- mscod, errcode,
- res->socket, res->imc, res->rank,
- res->bank_group, res->bank_address, res->row, res->column);
-
- edac_dbg(0, "%s\n", msg);
+ edac_dbg(0, "%s\n", skx_msg);
/* Call the helper to output message */
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
res->channel, res->dimm, -1,
- optype, msg);
+ optype, skx_msg);
+}
+
+static struct mem_ctl_info *get_mci(int src_id, int lmc)
+{
+ struct skx_dev *d;
+
+ if (lmc > NUM_IMC - 1) {
+ skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
+ return NULL;
+ }
+
+ list_for_each_entry(d, &skx_edac_list, list) {
+ if (d->imc[0].src_id == src_id)
+ return d->imc[lmc].mci;
+ }
+
+ skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
+
+ return NULL;
}
static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
+ memset(&res, 0, sizeof(res));
res.addr = mce->addr;
- if (!skx_decode(&res))
+
+ if (adxl_component_count) {
+ if (!skx_adxl_decode(&res))
+ return NOTIFY_DONE;
+
+ mci = get_mci(res.socket, res.imc);
+ } else {
+ if (!skx_decode(&res))
+ return NOTIFY_DONE;
+
+ mci = res.dev->imc[res.imc].mci;
+ }
+
+ if (!mci)
return NOTIFY_DONE;
- mci = res.dev->imc[res.imc].mci;
if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception";
@@ -1094,6 +1193,62 @@ static void skx_remove(void)
}
}
+static void __init skx_adxl_get(void)
+{
+ const char * const *names;
+ int i, j;
+
+ names = adxl_get_component_names();
+ if (!names) {
+ skx_printk(KERN_NOTICE, "No firmware support for address translation.");
+ skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
+ return;
+ }
+
+ for (i = 0; i < INDEX_MAX; i++) {
+ for (j = 0; names[j]; j++) {
+ if (!strcmp(component_names[i], names[j])) {
+ component_indices[i] = j;
+ break;
+ }
+ }
+
+ if (!names[j])
+ goto err;
+ }
+
+ adxl_component_names = names;
+ while (*names++)
+ adxl_component_count++;
+
+ adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
+ GFP_KERNEL);
+ if (!adxl_values) {
+ adxl_component_count = 0;
+ return;
+ }
+
+ adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+ if (!adxl_msg) {
+ adxl_component_count = 0;
+ kfree(adxl_values);
+ }
+
+ return;
+err:
+ skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
+ component_names[i]);
+ for (j = 0; names[j]; j++)
+ skx_printk(KERN_CONT, "%s ", names[j]);
+ skx_printk(KERN_CONT, "\n");
+}
+
+static void __exit skx_adxl_put(void)
+{
+ kfree(adxl_values);
+ kfree(adxl_msg);
+}
+
/*
* skx_init:
* make sure we are running on the correct cpu model
@@ -1158,6 +1313,15 @@ static int __init skx_init(void)
}
}
+ skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+ if (!skx_msg) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ if (nvdimm_count)
+ skx_adxl_get();
+
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
edac_dbg(2, "\n");
mce_unregister_decode_chain(&skx_mce_dec);
skx_remove();
+ if (nvdimm_count)
+ skx_adxl_put();
+ kfree(skx_msg);
teardown_skx_debug();
}
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 7670e8d..7273e50 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -145,34 +145,6 @@
See DIG64_HCDPv20_042804.pdf available from
<http://www.dig64.org/specifications/>
-config DELL_RBU
- tristate "BIOS update support for DELL systems via sysfs"
- depends on X86
- select FW_LOADER
- select FW_LOADER_USER_HELPER
- help
- Say m if you want to have the option of updating the BIOS for your
- DELL system. Note you need a Dell OpenManage or Dell Update package (DUP)
- supporting application to communicate with the BIOS regarding the new
- image for the image update to take effect.
- See <file:Documentation/dell_rbu.txt> for more details on the driver.
-
-config DCDBAS
- tristate "Dell Systems Management Base Driver"
- depends on X86
- help
- The Dell Systems Management Base Driver provides a sysfs interface
- for systems management software to perform System Management
- Interrupts (SMIs) and Host Control Actions (system power cycle or
- power off after OS shutdown) on certain Dell systems.
-
- See <file:Documentation/dcdbas.txt> for more details on the driver
- and the Dell systems on which Dell systems management software makes
- use of this driver.
-
- Say Y or M here to enable the driver for use by Dell systems
- management software such as Dell OpenManage.
-
config DMIID
bool "Export DMI identification via sysfs to userspace"
depends on DMI
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 13660a9..3158dff 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -11,8 +11,6 @@
obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o
obj-$(CONFIG_EDD) += edd.o
obj-$(CONFIG_EFI_PCDP) += pcdp.o
-obj-$(CONFIG_DELL_RBU) += dell_rbu.o
-obj-$(CONFIG_DCDBAS) += dcdbas.o
obj-$(CONFIG_DMIID) += dmi-id.o
obj-$(CONFIG_ISCSI_IBFT_FIND) += iscsi_ibft_find.o
obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o
diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c
index ae86134..d92f5b8 100644
--- a/drivers/fsi/fsi-sbefifo.c
+++ b/drivers/fsi/fsi-sbefifo.c
@@ -638,7 +638,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
}
ffdc_iov.iov_base = ffdc;
ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;
- iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+ iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
cmd[0] = cpu_to_be32(2);
cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
@@ -735,7 +735,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
rbytes = (*resp_len) * sizeof(__be32);
resp_iov.iov_base = response;
resp_iov.iov_len = rbytes;
- iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes);
+ iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
/* Perform the command */
mutex_lock(&sbefifo->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 297a549..0a4fba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -135,7 +135,8 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
- if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
}
return 0;
@@ -517,7 +518,8 @@ static int acp_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = state == AMD_PG_STATE_GATE ? true : false;
- if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1e4dd09..30bc345 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1493,8 +1493,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
}
adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
- if (amdgpu_sriov_vf(adev))
- adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK;
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
@@ -1600,7 +1598,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
}
}
- if (adev->powerplay.pp_funcs->load_firmware) {
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
if (r) {
pr_err("firmware loading failed\n");
@@ -3341,7 +3339,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
kthread_park(ring->sched.thread);
- if (job && job->base.sched == &ring->sched)
+ if (job && job->base.sched != &ring->sched)
continue;
drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 28781414..943dbf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -114,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
-/* OverDrive(bit 14) disabled by default*/
-uint amdgpu_pp_feature_mask = 0xffffbfff;
+/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
+uint amdgpu_pp_feature_mask = 0xfffd3fff;
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 790fd54..1a656b86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -392,7 +392,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
return;
- if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 94055a4..59cc678 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -704,7 +704,10 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
return ret;
if (adev->powerplay.pp_funcs->force_clock_level)
- amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+ ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+
+ if (ret)
+ return -EINVAL;
return count;
}
@@ -737,7 +740,10 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
return ret;
if (adev->powerplay.pp_funcs->force_clock_level)
- amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+ ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+
+ if (ret)
+ return -EINVAL;
return count;
}
@@ -770,7 +776,10 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
return ret;
if (adev->powerplay.pp_funcs->force_clock_level)
- amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+ ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+
+ if (ret)
+ return -EINVAL;
return count;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6904d79..352b304 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -542,7 +542,8 @@ static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
struct amdgpu_vm_pt_cursor *cursor)
{
amdgpu_vm_pt_next(adev, cursor);
- while (amdgpu_vm_pt_descendant(adev, cursor));
+ if (cursor->pfn != ~0ll)
+ while (amdgpu_vm_pt_descendant(adev, cursor));
}
/**
@@ -3234,8 +3235,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
rbtree_postorder_for_each_entry_safe(mapping, tmp,
&vm->va.rb_root, rb) {
+ /* Don't remove the mapping here, we don't want to trigger a
+ * rebalance and the tree is about to be destroyed anyway.
+ */
list_del(&mapping->list);
- amdgpu_vm_it_remove(mapping, &vm->va);
kfree(mapping);
}
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 3d0f277..617b0c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4815,8 +4815,10 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
if (r)
goto done;
- /* Test KCQs */
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ /* Test KCQs - reversing the order of rings seems to fix ring test failure
+ * after GPU reset
+ */
+ for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
ring = &adev->gfx.compute_ring[i];
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 14649f8..fd23ba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -280,7 +280,7 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
return;
if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
- if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 04fa3d9..7a8c917 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1366,7 +1366,8 @@ static int sdma_v4_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
sdma_v4_0_init_golden_registers(adev);
@@ -1386,7 +1387,8 @@ static int sdma_v4_0_hw_fini(void *handle)
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
- if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
+ && adev->powerplay.pp_funcs->set_powergating_by_smu)
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e224f23..b0df6dc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1524,6 +1524,13 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
{
struct amdgpu_display_manager *dm = bl_get_data(bd);
+ /*
+ * PWM interperts 0 as 100% rather than 0% because of HW
+ * limitation for level 0.So limiting minimum brightness level
+ * to 1.
+ */
+ if (bd->props.brightness < 1)
+ return 1;
if (dc_link_set_backlight_level(dm->backlight_link,
bd->props.brightness, 0, 0))
return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 0fab64a..12001a0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -101,7 +101,7 @@ bool dm_pp_apply_display_requirements(
adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1;
}
- if (adev->powerplay.pp_funcs->display_configuration_change)
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_configuration_change)
adev->powerplay.pp_funcs->display_configuration_change(
adev->powerplay.pp_handle,
&adev->pm.pm_display_cfg);
@@ -304,7 +304,7 @@ bool dm_pp_get_clock_levels_by_type(
struct amd_pp_simple_clock_info validation_clks = { 0 };
uint32_t i;
- if (adev->powerplay.pp_funcs->get_clock_by_type) {
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
dc_to_pp_clock_type(clk_type), &pp_clks)) {
/* Error in pplib. Provide default values. */
@@ -315,7 +315,7 @@ bool dm_pp_get_clock_levels_by_type(
pp_to_dc_clock_levels(&pp_clks, dc_clks, clk_type);
- if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks) {
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_display_mode_validation_clocks) {
if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks(
pp_handle, &validation_clks)) {
/* Error in pplib. Provide default values. */
@@ -398,6 +398,9 @@ bool dm_pp_get_clock_levels_by_type_with_voltage(
struct pp_clock_levels_with_voltage pp_clk_info = {0};
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ if (!pp_funcs || !pp_funcs->get_clock_by_type_with_voltage)
+ return false;
+
if (pp_funcs->get_clock_by_type_with_voltage(pp_handle,
dc_to_pp_clock_type(clk_type),
&pp_clk_info))
@@ -438,7 +441,7 @@ bool dm_pp_apply_clock_for_voltage_request(
if (!pp_clock_request.clock_type)
return false;
- if (adev->powerplay.pp_funcs->display_clock_voltage_request)
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_clock_voltage_request)
ret = adev->powerplay.pp_funcs->display_clock_voltage_request(
adev->powerplay.pp_handle,
&pp_clock_request);
@@ -455,7 +458,7 @@ bool dm_pp_get_static_clocks(
struct amd_pp_clock_info pp_clk_info = {0};
int ret = 0;
- if (adev->powerplay.pp_funcs->get_current_clocks)
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_current_clocks)
ret = adev->powerplay.pp_funcs->get_current_clocks(
adev->powerplay.pp_handle,
&pp_clk_info);
@@ -505,6 +508,9 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets;
wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets;
+ if (!pp_funcs || !pp_funcs->set_watermarks_for_clocks_ranges)
+ return;
+
for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) {
if (ranges->reader_wm_sets[i].wm_inst > 3)
wm_dce_clocks[i].wm_set_id = WM_SET_A;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index de19093..e3624ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -568,7 +568,7 @@ static struct input_pixel_processor *dce110_ipp_create(
static const struct encoder_feature_support link_enc_feature = {
.max_hdmi_deep_color = COLOR_DEPTH_121212,
- .max_hdmi_pixel_clock = 594000,
+ .max_hdmi_pixel_clock = 300000,
.flags.bits.IS_HBR2_CAPABLE = true,
.flags.bits.IS_TPS3_CAPABLE = true
};
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index a407892..c0d9f332 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -40,8 +40,6 @@
#define LITTLEENDIAN_CPU
#endif
-#undef READ
-#undef WRITE
#undef FRAME_SIZE
#define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index e8964ca..d6aa1d4 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -723,11 +723,14 @@ static int pp_dpm_force_clock_level(void *handle,
pr_info("%s was not implemented.\n", __func__);
return 0;
}
+
+ if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
+ pr_info("force clock level is for dpm manual mode only.\n");
+ return -EINVAL;
+ }
+
mutex_lock(&hwmgr->smu_lock);
- if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL)
- ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask);
- else
- ret = -EINVAL;
+ ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask);
mutex_unlock(&hwmgr->smu_lock);
return ret;
}
@@ -963,6 +966,7 @@ static int pp_dpm_switch_power_profile(void *handle,
static int pp_set_power_limit(void *handle, uint32_t limit)
{
struct pp_hwmgr *hwmgr = handle;
+ uint32_t max_power_limit;
if (!hwmgr || !hwmgr->pm_en)
return -EINVAL;
@@ -975,7 +979,13 @@ static int pp_set_power_limit(void *handle, uint32_t limit)
if (limit == 0)
limit = hwmgr->default_power_limit;
- if (limit > hwmgr->default_power_limit)
+ max_power_limit = hwmgr->default_power_limit;
+ if (hwmgr->od_enabled) {
+ max_power_limit *= (100 + hwmgr->platform_descriptor.TDPODLimit);
+ max_power_limit /= 100;
+ }
+
+ if (limit > max_power_limit)
return -EINVAL;
mutex_lock(&hwmgr->smu_lock);
@@ -994,8 +1004,13 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit)
mutex_lock(&hwmgr->smu_lock);
- if (default_limit)
+ if (default_limit) {
*limit = hwmgr->default_power_limit;
+ if (hwmgr->od_enabled) {
+ *limit *= (100 + hwmgr->platform_descriptor.TDPODLimit);
+ *limit /= 100;
+ }
+ }
else
*limit = hwmgr->power_limit;
@@ -1303,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle)
{
struct pp_hwmgr *hwmgr = handle;
- if (!hwmgr || !hwmgr->pm_en)
+ if (!hwmgr)
return -EINVAL;
- if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) {
+ if (!hwmgr->pm_en ||
+ hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL)
return 0;
- }
mutex_lock(&hwmgr->smu_lock);
hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 6c99cbf..ed35ec0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3588,9 +3588,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
break;
}
- if (i >= sclk_table->count)
+ if (i >= sclk_table->count) {
data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
- else {
+ sclk_table->dpm_levels[i-1].value = sclk;
+ } else {
/* TODO: Check SCLK in DAL's minimum clocks
* in case DeepSleep divider update is required.
*/
@@ -3605,9 +3606,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
break;
}
- if (i >= mclk_table->count)
+ if (i >= mclk_table->count) {
data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-
+ mclk_table->dpm_levels[i-1].value = mclk;
+ }
if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
index 4714b5b..99a33c3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
@@ -718,7 +718,7 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table,
table->WatermarkRow[1][i].MaxClock =
cpu_to_le16((uint16_t)
(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz) /
- 100);
+ 1000);
table->WatermarkRow[1][i].MinUclk =
cpu_to_le16((uint16_t)
(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz) /
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 419a1d7..8c4db86 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -1333,7 +1333,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0)
hwmgr->platform_descriptor.overdriveLimit.memoryClock =
dpm_table->dpm_levels[dpm_table->count-1].value;
-
vega10_init_dpm_state(&(dpm_table->dpm_state));
data->dpm_table.eclk_table.count = 0;
@@ -3249,6 +3248,37 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
{
struct vega10_hwmgr *data = hwmgr->backend;
+ const struct phm_set_power_state_input *states =
+ (const struct phm_set_power_state_input *)input;
+ const struct vega10_power_state *vega10_ps =
+ cast_const_phw_vega10_power_state(states->pnew_state);
+ struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
+ uint32_t sclk = vega10_ps->performance_levels
+ [vega10_ps->performance_level_count - 1].gfx_clock;
+ struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
+ uint32_t mclk = vega10_ps->performance_levels
+ [vega10_ps->performance_level_count - 1].mem_clock;
+ uint32_t i;
+
+ for (i = 0; i < sclk_table->count; i++) {
+ if (sclk == sclk_table->dpm_levels[i].value)
+ break;
+ }
+
+ if (i >= sclk_table->count) {
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+ sclk_table->dpm_levels[i-1].value = sclk;
+ }
+
+ for (i = 0; i < mclk_table->count; i++) {
+ if (mclk == mclk_table->dpm_levels[i].value)
+ break;
+ }
+
+ if (i >= mclk_table->count) {
+ data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+ mclk_table->dpm_levels[i-1].value = mclk;
+ }
if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
@@ -4529,11 +4559,13 @@ static int vega10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
if (vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].gfx_clock >
- hwmgr->platform_descriptor.overdriveLimit.engineClock)
+ hwmgr->platform_descriptor.overdriveLimit.engineClock) {
vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].gfx_clock =
hwmgr->platform_descriptor.overdriveLimit.engineClock;
-
+ pr_warn("max sclk supported by vbios is %d\n",
+ hwmgr->platform_descriptor.overdriveLimit.engineClock);
+ }
return 0;
}
@@ -4581,10 +4613,13 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
if (vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].mem_clock >
- hwmgr->platform_descriptor.overdriveLimit.memoryClock)
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock) {
vega10_ps->performance_levels
[vega10_ps->performance_level_count - 1].mem_clock =
hwmgr->platform_descriptor.overdriveLimit.memoryClock;
+ pr_warn("max mclk supported by vbios is %d\n",
+ hwmgr->platform_descriptor.overdriveLimit.memoryClock);
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 9600e2f..74bc373 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2356,6 +2356,13 @@ static int vega12_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable)
return vega12_disable_gfx_off(hwmgr);
}
+static int vega12_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+ PHM_PerformanceLevelDesignation designation, uint32_t index,
+ PHM_PerformanceLevel *level)
+{
+ return 0;
+}
+
static const struct pp_hwmgr_func vega12_hwmgr_funcs = {
.backend_init = vega12_hwmgr_backend_init,
.backend_fini = vega12_hwmgr_backend_fini,
@@ -2406,6 +2413,7 @@ static const struct pp_hwmgr_func vega12_hwmgr_funcs = {
.register_irq_handlers = smu9_register_irq_handlers,
.start_thermal_controller = vega12_start_thermal_controller,
.powergate_gfx = vega12_gfx_off_control,
+ .get_performance_level = vega12_get_performance_level,
};
int vega12_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index b4dbbb7..57143d5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -1875,38 +1875,20 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr,
return ret;
}
-static int vega20_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq)
+static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,
+ PPCLK_e clk_id, uint32_t *clk_freq)
{
- uint32_t gfx_clk = 0;
int ret = 0;
- *gfx_freq = 0;
+ *clk_freq = 0;
PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
- PPSMC_MSG_GetDpmClockFreq, (PPCLK_GFXCLK << 16))) == 0,
- "[GetCurrentGfxClkFreq] Attempt to get Current GFXCLK Frequency Failed!",
+ PPSMC_MSG_GetDpmClockFreq, (clk_id << 16))) == 0,
+ "[GetCurrentClkFreq] Attempt to get Current Frequency Failed!",
return ret);
- gfx_clk = smum_get_argument(hwmgr);
+ *clk_freq = smum_get_argument(hwmgr);
- *gfx_freq = gfx_clk * 100;
-
- return 0;
-}
-
-static int vega20_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_freq)
-{
- uint32_t mem_clk = 0;
- int ret = 0;
-
- *mclk_freq = 0;
-
- PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
- PPSMC_MSG_GetDpmClockFreq, (PPCLK_UCLK << 16))) == 0,
- "[GetCurrentMClkFreq] Attempt to get Current MCLK Frequency Failed!",
- return ret);
- mem_clk = smum_get_argument(hwmgr);
-
- *mclk_freq = mem_clk * 100;
+ *clk_freq = *clk_freq * 100;
return 0;
}
@@ -1937,12 +1919,16 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
switch (idx) {
case AMDGPU_PP_SENSOR_GFX_SCLK:
- ret = vega20_get_current_gfx_clk_freq(hwmgr, (uint32_t *)value);
+ ret = vega20_get_current_clk_freq(hwmgr,
+ PPCLK_GFXCLK,
+ (uint32_t *)value);
if (!ret)
*size = 4;
break;
case AMDGPU_PP_SENSOR_GFX_MCLK:
- ret = vega20_get_current_mclk_freq(hwmgr, (uint32_t *)value);
+ ret = vega20_get_current_clk_freq(hwmgr,
+ PPCLK_UCLK,
+ (uint32_t *)value);
if (!ret)
*size = 4;
break;
@@ -2012,7 +1998,6 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
if (data->smu_features[GNLD_DPM_DCEFCLK].enabled) {
switch (clk_type) {
case amd_pp_dcef_clock:
- clk_freq = clock_req->clock_freq_in_khz / 100;
clk_select = PPCLK_DCEFCLK;
break;
case amd_pp_disp_clock:
@@ -2041,11 +2026,20 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
return result;
}
+static int vega20_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+ PHM_PerformanceLevelDesignation designation, uint32_t index,
+ PHM_PerformanceLevel *level)
+{
+ return 0;
+}
+
static int vega20_notify_smc_display_config_after_ps_adjustment(
struct pp_hwmgr *hwmgr)
{
struct vega20_hwmgr *data =
(struct vega20_hwmgr *)(hwmgr->backend);
+ struct vega20_single_dpm_table *dpm_table =
+ &data->dpm_table.mem_table;
struct PP_Clocks min_clocks = {0};
struct pp_display_clock_request clock_req;
int ret = 0;
@@ -2063,7 +2057,7 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
if (data->smu_features[GNLD_DPM_DCEFCLK].supported) {
clock_req.clock_type = amd_pp_dcef_clock;
- clock_req.clock_freq_in_khz = min_clocks.dcefClock;
+ clock_req.clock_freq_in_khz = min_clocks.dcefClock * 10;
if (!vega20_display_clock_voltage_request(hwmgr, &clock_req)) {
if (data->smu_features[GNLD_DS_DCEFCLK].supported)
PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(
@@ -2076,6 +2070,15 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
}
}
+ if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+ dpm_table->dpm_state.hard_min_level = min_clocks.memoryClock / 100;
+ PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+ PPSMC_MSG_SetHardMinByFreq,
+ (PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level)),
+ "[SetHardMinFreq] Set hard min uclk failed!",
+ return ret);
+ }
+
return 0;
}
@@ -2353,7 +2356,7 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr,
for (i = 0; i < count; i++) {
clocks->data[i].clocks_in_khz =
- dpm_table->dpm_levels[i].value * 100;
+ dpm_table->dpm_levels[i].value * 1000;
clocks->data[i].latency_in_us = 0;
}
@@ -2383,7 +2386,7 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr,
for (i = 0; i < count; i++) {
clocks->data[i].clocks_in_khz =
data->mclk_latency_table.entries[i].frequency =
- dpm_table->dpm_levels[i].value * 100;
+ dpm_table->dpm_levels[i].value * 1000;
clocks->data[i].latency_in_us =
data->mclk_latency_table.entries[i].latency =
vega20_get_mem_latency(hwmgr, dpm_table->dpm_levels[i].value);
@@ -2408,7 +2411,7 @@ static int vega20_get_dcefclocks(struct pp_hwmgr *hwmgr,
for (i = 0; i < count; i++) {
clocks->data[i].clocks_in_khz =
- dpm_table->dpm_levels[i].value * 100;
+ dpm_table->dpm_levels[i].value * 1000;
clocks->data[i].latency_in_us = 0;
}
@@ -2431,7 +2434,7 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr,
for (i = 0; i < count; i++) {
clocks->data[i].clocks_in_khz =
- dpm_table->dpm_levels[i].value * 100;
+ dpm_table->dpm_levels[i].value * 1000;
clocks->data[i].latency_in_us = 0;
}
@@ -2582,11 +2585,11 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
return -EINVAL;
}
- if (input_clk < clocks.data[0].clocks_in_khz / 100 ||
+ if (input_clk < clocks.data[0].clocks_in_khz / 1000 ||
input_clk > od8_settings[OD8_SETTING_UCLK_FMAX].max_value) {
pr_info("clock freq %d is not within allowed range [%d - %d]\n",
input_clk,
- clocks.data[0].clocks_in_khz / 100,
+ clocks.data[0].clocks_in_khz / 1000,
od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
return -EINVAL;
}
@@ -2726,7 +2729,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
switch (type) {
case PP_SCLK:
- ret = vega20_get_current_gfx_clk_freq(hwmgr, &now);
+ ret = vega20_get_current_clk_freq(hwmgr, PPCLK_GFXCLK, &now);
PP_ASSERT_WITH_CODE(!ret,
"Attempt to get current gfx clk Failed!",
return ret);
@@ -2738,12 +2741,12 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
for (i = 0; i < clocks.num_levels; i++)
size += sprintf(buf + size, "%d: %uMhz %s\n",
- i, clocks.data[i].clocks_in_khz / 100,
+ i, clocks.data[i].clocks_in_khz / 1000,
(clocks.data[i].clocks_in_khz == now) ? "*" : "");
break;
case PP_MCLK:
- ret = vega20_get_current_mclk_freq(hwmgr, &now);
+ ret = vega20_get_current_clk_freq(hwmgr, PPCLK_UCLK, &now);
PP_ASSERT_WITH_CODE(!ret,
"Attempt to get current mclk freq Failed!",
return ret);
@@ -2755,7 +2758,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
for (i = 0; i < clocks.num_levels; i++)
size += sprintf(buf + size, "%d: %uMhz %s\n",
- i, clocks.data[i].clocks_in_khz / 100,
+ i, clocks.data[i].clocks_in_khz / 1000,
(clocks.data[i].clocks_in_khz == now) ? "*" : "");
break;
@@ -2820,7 +2823,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
return ret);
size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
- clocks.data[0].clocks_in_khz / 100,
+ clocks.data[0].clocks_in_khz / 1000,
od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
}
@@ -3476,6 +3479,8 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
vega20_set_watermarks_for_clocks_ranges,
.display_clock_voltage_request =
vega20_display_clock_voltage_request,
+ .get_performance_level =
+ vega20_get_performance_level,
/* UMD pstate, profile related */
.force_dpm_level =
vega20_dpm_force_dpm_level,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
index e5f7f82..97f8a1a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
@@ -642,8 +642,14 @@ static int check_powerplay_tables(
"Unsupported PPTable format!", return -1);
PP_ASSERT_WITH_CODE(powerplay_table->sHeader.structuresize > 0,
"Invalid PowerPlay Table!", return -1);
- PP_ASSERT_WITH_CODE(powerplay_table->smcPPTable.Version == PPTABLE_V20_SMU_VERSION,
- "Unmatch PPTable version, vbios update may be needed!", return -1);
+
+ if (powerplay_table->smcPPTable.Version != PPTABLE_V20_SMU_VERSION) {
+ pr_info("Unmatch PPTable version: "
+ "pptable from VBIOS is V%d while driver supported is V%d!",
+ powerplay_table->smcPPTable.Version,
+ PPTABLE_V20_SMU_VERSION);
+ return -EINVAL;
+ }
//dump_pptable(&powerplay_table->smcPPTable);
@@ -716,10 +722,6 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
"[appendVbiosPPTable] Failed to retrieve Smc Dpm Table from VBIOS!",
return -1);
- memset(ppsmc_pptable->Padding32,
- 0,
- sizeof(struct atom_smc_dpm_info_v4_4) -
- sizeof(struct atom_common_table_header));
ppsmc_pptable->MaxVoltageStepGfx = smc_dpm_table->maxvoltagestepgfx;
ppsmc_pptable->MaxVoltageStepSoc = smc_dpm_table->maxvoltagestepsoc;
@@ -778,22 +780,19 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
ppsmc_pptable->FllGfxclkSpreadPercent = smc_dpm_table->fllgfxclkspreadpercent;
ppsmc_pptable->FllGfxclkSpreadFreq = smc_dpm_table->fllgfxclkspreadfreq;
- if ((smc_dpm_table->table_header.format_revision == 4) &&
- (smc_dpm_table->table_header.content_revision == 4)) {
- for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) {
- ppsmc_pptable->I2cControllers[i].Enabled =
- smc_dpm_table->i2ccontrollers[i].enabled;
- ppsmc_pptable->I2cControllers[i].SlaveAddress =
- smc_dpm_table->i2ccontrollers[i].slaveaddress;
- ppsmc_pptable->I2cControllers[i].ControllerPort =
- smc_dpm_table->i2ccontrollers[i].controllerport;
- ppsmc_pptable->I2cControllers[i].ThermalThrottler =
- smc_dpm_table->i2ccontrollers[i].thermalthrottler;
- ppsmc_pptable->I2cControllers[i].I2cProtocol =
- smc_dpm_table->i2ccontrollers[i].i2cprotocol;
- ppsmc_pptable->I2cControllers[i].I2cSpeed =
- smc_dpm_table->i2ccontrollers[i].i2cspeed;
- }
+ for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) {
+ ppsmc_pptable->I2cControllers[i].Enabled =
+ smc_dpm_table->i2ccontrollers[i].enabled;
+ ppsmc_pptable->I2cControllers[i].SlaveAddress =
+ smc_dpm_table->i2ccontrollers[i].slaveaddress;
+ ppsmc_pptable->I2cControllers[i].ControllerPort =
+ smc_dpm_table->i2ccontrollers[i].controllerport;
+ ppsmc_pptable->I2cControllers[i].ThermalThrottler =
+ smc_dpm_table->i2ccontrollers[i].thermalthrottler;
+ ppsmc_pptable->I2cControllers[i].I2cProtocol =
+ smc_dpm_table->i2ccontrollers[i].i2cprotocol;
+ ppsmc_pptable->I2cControllers[i].I2cSpeed =
+ smc_dpm_table->i2ccontrollers[i].i2cspeed;
}
return 0;
@@ -882,15 +881,10 @@ static int init_powerplay_table_information(
if (pptable_information->smc_pptable == NULL)
return -ENOMEM;
- if (powerplay_table->smcPPTable.Version <= 2)
- memcpy(pptable_information->smc_pptable,
- &(powerplay_table->smcPPTable),
- sizeof(PPTable_t) -
- sizeof(I2cControllerConfig_t) * I2C_CONTROLLER_NAME_COUNT);
- else
- memcpy(pptable_information->smc_pptable,
- &(powerplay_table->smcPPTable),
- sizeof(PPTable_t));
+ memcpy(pptable_information->smc_pptable,
+ &(powerplay_table->smcPPTable),
+ sizeof(PPTable_t));
+
result = append_vbios_pptable(hwmgr, (pptable_information->smc_pptable));
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
index 2998a49..63d5cf6 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
@@ -29,7 +29,7 @@
// any structure is changed in this file
#define SMU11_DRIVER_IF_VERSION 0x12
-#define PPTABLE_V20_SMU_VERSION 2
+#define PPTABLE_V20_SMU_VERSION 3
#define NUM_GFXCLK_DPM_LEVELS 16
#define NUM_VCLK_DPM_LEVELS 8
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
index f836d30..09b844e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
@@ -71,7 +71,11 @@ static int smu8_send_msg_to_smc_async(struct pp_hwmgr *hwmgr, uint16_t msg)
result = PHM_WAIT_FIELD_UNEQUAL(hwmgr,
SMU_MP1_SRBM2P_RESP_0, CONTENT, 0);
if (result != 0) {
+ /* Read the last message to SMU, to report actual cause */
+ uint32_t val = cgs_read_register(hwmgr->device,
+ mmSMU_MP1_SRBM2P_MSG_0);
pr_err("smu8_send_msg_to_smc_async (0x%04x) failed\n", msg);
+ pr_err("SMU still servicing msg (0x%04x)\n", val);
return result;
}
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index f8a931c..680566d 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -458,18 +458,6 @@ static void ti_sn_bridge_enable(struct drm_bridge *bridge)
unsigned int val;
int ret;
- /*
- * FIXME:
- * This 70ms was found necessary by experimentation. If it's not
- * present, link training fails. It seems like it can go anywhere from
- * pre_enable() up to semi-auto link training initiation below.
- *
- * Neither the datasheet for the bridge nor the panel tested mention a
- * delay of this magnitude in the timing requirements. So for now, add
- * the mystery delay until someone figures out a better fix.
- */
- msleep(70);
-
/* DSI_A lane config */
val = CHA_DSI_LANES(4 - pdata->dsi->lanes);
regmap_update_bits(pdata->regmap, SN_DSI_LANES_REG,
@@ -536,7 +524,22 @@ static void ti_sn_bridge_pre_enable(struct drm_bridge *bridge)
/* configure bridge ref_clk */
ti_sn_bridge_set_refclk_freq(pdata);
- /* in case drm_panel is connected then HPD is not supported */
+ /*
+ * HPD on this bridge chip is a bit useless. This is an eDP bridge
+ * so the HPD is an internal signal that's only there to signal that
+ * the panel is done powering up. ...but the bridge chip debounces
+ * this signal by between 100 ms and 400 ms (depending on process,
+ * voltage, and temperate--I measured it at about 200 ms). One
+ * particular panel asserted HPD 84 ms after it was powered on meaning
+ * that we saw HPD 284 ms after power on. ...but the same panel said
+ * that instead of looking at HPD you could just hardcode a delay of
+ * 200 ms. We'll assume that the panel driver will have the hardcoded
+ * delay in its prepare and always disable HPD.
+ *
+ * If HPD somehow makes sense on some future panel we'll have to
+ * change this to be conditional on someone specifying that HPD should
+ * be used.
+ */
regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE,
HPD_DISABLE);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 701cb33..d8b526b 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -308,6 +308,26 @@ update_connector_routing(struct drm_atomic_state *state,
return 0;
}
+ crtc_state = drm_atomic_get_new_crtc_state(state,
+ new_connector_state->crtc);
+ /*
+ * For compatibility with legacy users, we want to make sure that
+ * we allow DPMS On->Off modesets on unregistered connectors. Modesets
+ * which would result in anything else must be considered invalid, to
+ * avoid turning on new displays on dead connectors.
+ *
+ * Since the connector can be unregistered at any point during an
+ * atomic check or commit, this is racy. But that's OK: all we care
+ * about is ensuring that userspace can't do anything but shut off the
+ * display on a connector that was destroyed after its been notified,
+ * not before.
+ */
+ if (drm_connector_is_unregistered(connector) && crtc_state->active) {
+ DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
+ connector->base.id, connector->name);
+ return -EINVAL;
+ }
+
funcs = connector->helper_private;
if (funcs->atomic_best_encoder)
@@ -352,7 +372,6 @@ update_connector_routing(struct drm_atomic_state *state,
set_best_encoder(state, new_connector_state, new_encoder);
- crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc);
crtc_state->connectors_changed = true;
DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 1e40e5d..4943cef 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -379,7 +379,8 @@ void drm_connector_cleanup(struct drm_connector *connector)
/* The connector should have been removed from userspace long before
* it is finally destroyed.
*/
- if (WARN_ON(connector->registered))
+ if (WARN_ON(connector->registration_state ==
+ DRM_CONNECTOR_REGISTERED))
drm_connector_unregister(connector);
if (connector->tile_group) {
@@ -436,7 +437,7 @@ int drm_connector_register(struct drm_connector *connector)
return 0;
mutex_lock(&connector->mutex);
- if (connector->registered)
+ if (connector->registration_state != DRM_CONNECTOR_INITIALIZING)
goto unlock;
ret = drm_sysfs_connector_add(connector);
@@ -456,7 +457,7 @@ int drm_connector_register(struct drm_connector *connector)
drm_mode_object_register(connector->dev, &connector->base);
- connector->registered = true;
+ connector->registration_state = DRM_CONNECTOR_REGISTERED;
goto unlock;
err_debugfs:
@@ -478,7 +479,7 @@ EXPORT_SYMBOL(drm_connector_register);
void drm_connector_unregister(struct drm_connector *connector)
{
mutex_lock(&connector->mutex);
- if (!connector->registered) {
+ if (connector->registration_state != DRM_CONNECTOR_REGISTERED) {
mutex_unlock(&connector->mutex);
return;
}
@@ -489,7 +490,7 @@ void drm_connector_unregister(struct drm_connector *connector)
drm_sysfs_connector_remove(connector);
drm_debugfs_connector_remove(connector);
- connector->registered = false;
+ connector->registration_state = DRM_CONNECTOR_UNREGISTERED;
mutex_unlock(&connector->mutex);
}
EXPORT_SYMBOL(drm_connector_unregister);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index ff0bfc6..b506e36 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -122,6 +122,9 @@ static const struct edid_quirk {
/* SDC panel of Lenovo B50-80 reports 8 bpc, but is a 6 bpc panel */
{ "SDC", 0x3652, EDID_QUIRK_FORCE_6BPC },
+ /* BOE model 0x0771 reports 8 bpc, but is a 6 bpc panel */
+ { "BOE", 0x0771, EDID_QUIRK_FORCE_6BPC },
+
/* Belinea 10 15 55 */
{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 3fae4da..13f9b56 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -5102,19 +5102,13 @@ intel_dp_long_pulse(struct intel_connector *connector,
*/
status = connector_status_disconnected;
goto out;
- } else {
- /*
- * If display is now connected check links status,
- * there has been known issues of link loss triggering
- * long pulse.
- *
- * Some sinks (eg. ASUS PB287Q) seem to perform some
- * weird HPD ping pong during modesets. So we can apparently
- * end up with HPD going low during a modeset, and then
- * going back up soon after. And once that happens we must
- * retrain the link to get a picture. That's in case no
- * userspace component reacted to intermittent HPD dip.
- */
+ }
+
+ /*
+ * Some external monitors do not signal loss of link synchronization
+ * with an IRQ_HPD, so force a link status check.
+ */
+ if (!intel_dp_is_edp(intel_dp)) {
struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
intel_dp_retrain_link(encoder, ctx);
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 7f155b4..1b00f8e 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -77,7 +77,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
pipe_config->pbn = mst_pbn;
/* Zombie connectors can't have VCPI slots */
- if (READ_ONCE(connector->registered)) {
+ if (!drm_connector_is_unregistered(connector)) {
slots = drm_dp_atomic_find_vcpi_slots(state,
&intel_dp->mst_mgr,
port,
@@ -313,7 +313,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector)
struct edid *edid;
int ret;
- if (!READ_ONCE(connector->registered))
+ if (drm_connector_is_unregistered(connector))
return intel_connector_update_modes(connector, NULL);
edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port);
@@ -329,7 +329,7 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
struct intel_connector *intel_connector = to_intel_connector(connector);
struct intel_dp *intel_dp = intel_connector->mst_port;
- if (!READ_ONCE(connector->registered))
+ if (drm_connector_is_unregistered(connector))
return connector_status_disconnected;
return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr,
intel_connector->port);
@@ -372,7 +372,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
int bpp = 24; /* MST uses fixed bpp */
int max_rate, mode_rate, max_lanes, max_link_clock;
- if (!READ_ONCE(connector->registered))
+ if (drm_connector_is_unregistered(connector))
return MODE_ERROR;
if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 6bb7807..6cbbae3 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -881,22 +881,16 @@ nv50_mstc_atomic_best_encoder(struct drm_connector *connector,
{
struct nv50_head *head = nv50_head(connector_state->crtc);
struct nv50_mstc *mstc = nv50_mstc(connector);
- if (mstc->port) {
- struct nv50_mstm *mstm = mstc->mstm;
- return &mstm->msto[head->base.index]->encoder;
- }
- return NULL;
+
+ return &mstc->mstm->msto[head->base.index]->encoder;
}
static struct drm_encoder *
nv50_mstc_best_encoder(struct drm_connector *connector)
{
struct nv50_mstc *mstc = nv50_mstc(connector);
- if (mstc->port) {
- struct nv50_mstm *mstm = mstc->mstm;
- return &mstm->msto[0]->encoder;
- }
- return NULL;
+
+ return &mstc->mstm->msto[0]->encoder;
}
static enum drm_mode_status
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 97964f7..a04ffb3 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -56,6 +56,8 @@ struct panel_desc {
/**
* @prepare: the time (in milliseconds) that it takes for the panel to
* become ready and start receiving video data
+ * @hpd_absent_delay: Add this to the prepare delay if we know Hot
+ * Plug Detect isn't used.
* @enable: the time (in milliseconds) that it takes for the panel to
* display the first valid frame after starting to receive
* video data
@@ -66,6 +68,7 @@ struct panel_desc {
*/
struct {
unsigned int prepare;
+ unsigned int hpd_absent_delay;
unsigned int enable;
unsigned int disable;
unsigned int unprepare;
@@ -79,6 +82,7 @@ struct panel_simple {
struct drm_panel base;
bool prepared;
bool enabled;
+ bool no_hpd;
const struct panel_desc *desc;
@@ -202,6 +206,7 @@ static int panel_simple_unprepare(struct drm_panel *panel)
static int panel_simple_prepare(struct drm_panel *panel)
{
struct panel_simple *p = to_panel_simple(panel);
+ unsigned int delay;
int err;
if (p->prepared)
@@ -215,8 +220,11 @@ static int panel_simple_prepare(struct drm_panel *panel)
gpiod_set_value_cansleep(p->enable_gpio, 1);
- if (p->desc->delay.prepare)
- msleep(p->desc->delay.prepare);
+ delay = p->desc->delay.prepare;
+ if (p->no_hpd)
+ delay += p->desc->delay.hpd_absent_delay;
+ if (delay)
+ msleep(delay);
p->prepared = true;
@@ -305,6 +313,8 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
panel->prepared = false;
panel->desc = desc;
+ panel->no_hpd = of_property_read_bool(dev->of_node, "no-hpd");
+
panel->supply = devm_regulator_get(dev, "power");
if (IS_ERR(panel->supply))
return PTR_ERR(panel->supply);
@@ -1363,7 +1373,7 @@ static const struct panel_desc innolux_n156bge_l21 = {
},
};
-static const struct drm_display_mode innolux_tv123wam_mode = {
+static const struct drm_display_mode innolux_p120zdg_bf1_mode = {
.clock = 206016,
.hdisplay = 2160,
.hsync_start = 2160 + 48,
@@ -1377,15 +1387,16 @@ static const struct drm_display_mode innolux_tv123wam_mode = {
.flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
};
-static const struct panel_desc innolux_tv123wam = {
- .modes = &innolux_tv123wam_mode,
+static const struct panel_desc innolux_p120zdg_bf1 = {
+ .modes = &innolux_p120zdg_bf1_mode,
.num_modes = 1,
.bpc = 8,
.size = {
- .width = 259,
- .height = 173,
+ .width = 254,
+ .height = 169,
},
.delay = {
+ .hpd_absent_delay = 200,
.unprepare = 500,
},
};
@@ -2445,8 +2456,8 @@ static const struct of_device_id platform_of_match[] = {
.compatible = "innolux,n156bge-l21",
.data = &innolux_n156bge_l21,
}, {
- .compatible = "innolux,tv123wam",
- .data = &innolux_tv123wam,
+ .compatible = "innolux,p120zdg-bf1",
+ .data = &innolux_p120zdg_bf1,
}, {
.compatible = "innolux,zj070na-01p",
.data = &innolux_zj070na_01p,
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 5ed319e..41e9935 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -149,6 +149,7 @@
config HID_ASUS
tristate "Asus"
depends on LEDS_CLASS
+ depends on ASUS_WMI || ASUS_WMI=n
---help---
Support for Asus notebook built-in keyboard and touchpad via i2c, and
the Asus Republic of Gamers laptop keyboard special keys.
diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 88a5672f..dc6d647 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -29,6 +29,7 @@
#include <linux/dmi.h>
#include <linux/hid.h>
#include <linux/module.h>
+#include <linux/platform_data/x86/asus-wmi.h>
#include <linux/input/mt.h>
#include <linux/usb.h> /* For to_usb_interface for T100 touchpad intf check */
@@ -349,6 +350,24 @@ static void asus_kbd_backlight_work(struct work_struct *work)
hid_err(led->hdev, "Asus failed to set keyboard backlight: %d\n", ret);
}
+/* WMI-based keyboard backlight LED control (via asus-wmi driver) takes
+ * precedence. We only activate HID-based backlight control when the
+ * WMI control is not available.
+ */
+static bool asus_kbd_wmi_led_control_present(struct hid_device *hdev)
+{
+ u32 value;
+ int ret;
+
+ ret = asus_wmi_evaluate_method(ASUS_WMI_METHODID_DSTS2,
+ ASUS_WMI_DEVID_KBD_BACKLIGHT, 0, &value);
+ hid_dbg(hdev, "WMI backlight check: rc %d value %x", ret, value);
+ if (ret)
+ return false;
+
+ return !!(value & ASUS_WMI_DSTS_PRESENCE_BIT);
+}
+
static int asus_kbd_register_leds(struct hid_device *hdev)
{
struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
@@ -436,7 +455,9 @@ static int asus_input_configured(struct hid_device *hdev, struct hid_input *hi)
drvdata->input = input;
- if (drvdata->enable_backlight && asus_kbd_register_leds(hdev))
+ if (drvdata->enable_backlight &&
+ !asus_kbd_wmi_led_control_present(hdev) &&
+ asus_kbd_register_leds(hdev))
hid_warn(hdev, "Failed to initialize backlight.\n");
return 0;
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index dc78aa7..28460f6 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -306,10 +306,7 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
if (client->flags & I2C_CLIENT_TEN)
return -EINVAL;
- irq = irq_find_mapping(adap->host_notify_domain, client->addr);
- if (!irq)
- irq = irq_create_mapping(adap->host_notify_domain,
- client->addr);
+ irq = irq_create_mapping(adap->host_notify_domain, client->addr);
return irq > 0 ? irq : -ENXIO;
}
@@ -433,6 +430,8 @@ static int i2c_device_remove(struct device *dev)
dev_pm_clear_wake_irq(&client->dev);
device_init_wakeup(&client->dev, false);
+ client->irq = 0;
+
return status;
}
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 566d69a..add4c9c 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -384,9 +384,9 @@ static int mvebu_sei_probe(struct platform_device *pdev)
sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
sei->base = devm_ioremap_resource(sei->dev, sei->res);
- if (!sei->base) {
+ if (IS_ERR(sei->base)) {
dev_err(sei->dev, "Failed to remap SEI resource\n");
- return -ENODEV;
+ return PTR_ERR(sei->base);
}
/* Retrieve the SEI capabilities with the interrupt ranges */
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index b05022f..072bb5e 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -718,8 +718,7 @@ l1oip_socket_thread(void *data)
printk(KERN_DEBUG "%s: socket created and open\n",
__func__);
while (!signal_pending(current)) {
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1,
- recvbuf_size);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, recvbuf_size);
recvlen = sock_recvmsg(socket, &msg, 0);
if (recvlen > 0) {
l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f3fb5bb..ac1cffd 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
!discard_bio)
continue;
bio_chain(discard_bio, bio);
- bio_clone_blkg_association(discard_bio, bio);
+ bio_clone_blkcg_association(discard_bio, bio);
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
discard_bio, disk_devt(mddev->gendisk),
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index 3370a41..951c984 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -8,7 +8,9 @@
lkdtm-$(CONFIG_LKDTM) += refcount.o
lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o
lkdtm-$(CONFIG_LKDTM) += usercopy.o
+lkdtm-$(CONFIG_LKDTM) += stackleak.o
+KASAN_SANITIZE_stackleak.o := n
KCOV_INSTRUMENT_rodata.o := n
OBJCOPYFLAGS :=
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 5a755590..2837dc77 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -184,6 +184,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(USERCOPY_STACK_BEYOND),
CRASHTYPE(USERCOPY_KERNEL),
CRASHTYPE(USERCOPY_KERNEL_DS),
+ CRASHTYPE(STACKLEAK_ERASING),
};
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 07db641..3c6fd32 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -84,4 +84,7 @@ void lkdtm_USERCOPY_STACK_BEYOND(void);
void lkdtm_USERCOPY_KERNEL(void);
void lkdtm_USERCOPY_KERNEL_DS(void);
+/* lkdtm_stackleak.c */
+void lkdtm_STACKLEAK_ERASING(void);
+
#endif
diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c
new file mode 100644
index 0000000..d5a0844
--- /dev/null
+++ b/drivers/misc/lkdtm/stackleak.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code tests that the current task stack is properly erased (filled
+ * with STACKLEAK_POISON).
+ *
+ * Authors:
+ * Alexander Popov <alex.popov@linux.com>
+ * Tycho Andersen <tycho@tycho.ws>
+ */
+
+#include "lkdtm.h"
+#include <linux/stackleak.h>
+
+void lkdtm_STACKLEAK_ERASING(void)
+{
+ unsigned long *sp, left, found, i;
+ const unsigned long check_depth =
+ STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+
+ /*
+ * For the details about the alignment of the poison values, see
+ * the comment in stackleak_track_stack().
+ */
+ sp = PTR_ALIGN(&i, sizeof(unsigned long));
+
+ left = ((unsigned long)sp & (THREAD_SIZE - 1)) / sizeof(unsigned long);
+ sp--;
+
+ /*
+ * One 'long int' at the bottom of the thread stack is reserved
+ * and not poisoned.
+ */
+ if (left > 1) {
+ left--;
+ } else {
+ pr_err("FAIL: not enough stack space for the test\n");
+ return;
+ }
+
+ pr_info("checking unused part of the thread stack (%lu bytes)...\n",
+ left * sizeof(unsigned long));
+
+ /*
+ * Search for 'check_depth' poison values in a row (just like
+ * stackleak_erase() does).
+ */
+ for (i = 0, found = 0; i < left && found <= check_depth; i++) {
+ if (*(sp - i) == STACKLEAK_POISON)
+ found++;
+ else
+ found = 0;
+ }
+
+ if (found <= check_depth) {
+ pr_err("FAIL: thread stack is not erased (checked %lu bytes)\n",
+ i * sizeof(unsigned long));
+ return;
+ }
+
+ pr_info("first %lu bytes are unpoisoned\n",
+ (i - found) * sizeof(unsigned long));
+
+ /* The rest of thread stack should be erased */
+ for (; i < left; i++) {
+ if (*(sp - i) != STACKLEAK_POISON) {
+ pr_err("FAIL: thread stack is NOT properly erased\n");
+ return;
+ }
+ }
+
+ pr_info("OK: the rest of the thread stack is properly erased\n");
+ return;
+}
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index bd52f29..264f4ed 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -3030,7 +3030,7 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
if (!qpair || !buf)
return VMCI_ERROR_INVALID_ARGS;
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size);
+ iov_iter_kvec(&from, WRITE, &v, 1, buf_size);
qp_lock(qpair);
@@ -3074,7 +3074,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
if (!qpair || !buf)
return VMCI_ERROR_INVALID_ARGS;
- iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+ iov_iter_kvec(&to, READ, &v, 1, buf_size);
qp_lock(qpair);
@@ -3119,7 +3119,7 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
if (!qpair || !buf)
return VMCI_ERROR_INVALID_ARGS;
- iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+ iov_iter_kvec(&to, READ, &v, 1, buf_size);
qp_lock(qpair);
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 9697977..6b9ad86 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -638,8 +638,7 @@ static int bond_fill_info(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM,
- sizeof(bond->params.ad_actor_system),
- &bond->params.ad_actor_system))
+ ETH_ALEN, &bond->params.ad_actor_system))
goto nla_put_failure;
}
if (!bond_3ad_get_active_agg_info(bond, &info)) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e82e4ca..055b406 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -316,8 +316,8 @@ struct hnae3_ae_ops {
int (*set_loopback)(struct hnae3_handle *handle,
enum hnae3_loop loop_mode, bool en);
- void (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
- bool en_mc_pmc);
+ int (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
+ bool en_mc_pmc);
int (*set_mtu)(struct hnae3_handle *handle, int new_mtu);
void (*get_pauseparam)(struct hnae3_handle *handle,
@@ -391,7 +391,7 @@ struct hnae3_ae_ops {
int vector_num,
struct hnae3_ring_chain_node *vr_chain);
- void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
+ int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
u32 (*get_fw_version)(struct hnae3_handle *handle);
void (*get_mdix_mode)(struct hnae3_handle *handle,
u8 *tp_mdix_ctrl, u8 *tp_mdix);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 32f3aca8..3f96aa3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -509,16 +509,18 @@ static void hns3_nic_set_rx_mode(struct net_device *netdev)
h->netdev_flags = new_flags;
}
-void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hnae3_handle *h = priv->ae_handle;
if (h->ae_algo->ops->set_promisc_mode) {
- h->ae_algo->ops->set_promisc_mode(h,
- promisc_flags & HNAE3_UPE,
- promisc_flags & HNAE3_MPE);
+ return h->ae_algo->ops->set_promisc_mode(h,
+ promisc_flags & HNAE3_UPE,
+ promisc_flags & HNAE3_MPE);
}
+
+ return 0;
}
void hns3_enable_vlan_filter(struct net_device *netdev, bool enable)
@@ -1494,18 +1496,22 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
return ret;
}
-static void hns3_restore_vlan(struct net_device *netdev)
+static int hns3_restore_vlan(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
+ int ret = 0;
u16 vid;
- int ret;
for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
- if (ret)
- netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
- vid, ret);
+ if (ret) {
+ netdev_err(netdev, "Restore vlan: %d filter, ret:%d\n",
+ vid, ret);
+ return ret;
+ }
}
+
+ return ret;
}
static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
@@ -2727,7 +2733,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
chain = devm_kzalloc(&pdev->dev, sizeof(*chain),
GFP_KERNEL);
if (!chain)
- return -ENOMEM;
+ goto err_free_chain;
cur_chain->next = chain;
chain->tqp_index = tx_ring->tqp->tqp_index;
@@ -2757,7 +2763,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
while (rx_ring) {
chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL);
if (!chain)
- return -ENOMEM;
+ goto err_free_chain;
cur_chain->next = chain;
chain->tqp_index = rx_ring->tqp->tqp_index;
@@ -2772,6 +2778,16 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
}
return 0;
+
+err_free_chain:
+ cur_chain = head->next;
+ while (cur_chain) {
+ chain = cur_chain->next;
+ devm_kfree(&pdev->dev, chain);
+ cur_chain = chain;
+ }
+
+ return -ENOMEM;
}
static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
@@ -2821,7 +2837,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
struct hnae3_handle *h = priv->ae_handle;
struct hns3_enet_tqp_vector *tqp_vector;
int ret = 0;
- u16 i;
+ int i;
hns3_nic_set_cpumask(priv);
@@ -2868,13 +2884,19 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
if (ret)
- return ret;
+ goto map_ring_fail;
netif_napi_add(priv->netdev, &tqp_vector->napi,
hns3_nic_common_poll, NAPI_POLL_WEIGHT);
}
return 0;
+
+map_ring_fail:
+ while (i--)
+ netif_napi_del(&priv->tqp_vector[i].napi);
+
+ return ret;
}
static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
@@ -3031,8 +3053,10 @@ static int hns3_queue_to_ring(struct hnae3_queue *tqp,
return ret;
ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
- if (ret)
+ if (ret) {
+ devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring);
return ret;
+ }
return 0;
}
@@ -3059,6 +3083,12 @@ static int hns3_get_ring_config(struct hns3_nic_priv *priv)
return 0;
err:
+ while (i--) {
+ devm_kfree(priv->dev, priv->ring_data[i].ring);
+ devm_kfree(priv->dev,
+ priv->ring_data[i + h->kinfo.num_tqps].ring);
+ }
+
devm_kfree(&pdev->dev, priv->ring_data);
return ret;
}
@@ -3226,9 +3256,6 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
int i;
for (i = 0; i < h->kinfo.num_tqps; i++) {
- if (h->ae_algo->ops->reset_queue)
- h->ae_algo->ops->reset_queue(h, i);
-
hns3_fini_ring(priv->ring_data[i].ring);
hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
}
@@ -3236,11 +3263,12 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
}
/* Set mac addr if it is configured. or leave it to the AE driver */
-static void hns3_init_mac_addr(struct net_device *netdev, bool init)
+static int hns3_init_mac_addr(struct net_device *netdev, bool init)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hnae3_handle *h = priv->ae_handle;
u8 mac_addr_temp[ETH_ALEN];
+ int ret = 0;
if (h->ae_algo->ops->get_mac_addr && init) {
h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
@@ -3255,8 +3283,9 @@ static void hns3_init_mac_addr(struct net_device *netdev, bool init)
}
if (h->ae_algo->ops->set_mac_addr)
- h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
+ ret = h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
+ return ret;
}
static int hns3_restore_fd_rules(struct net_device *netdev)
@@ -3469,20 +3498,29 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
return ret;
}
-static void hns3_recover_hw_addr(struct net_device *ndev)
+static int hns3_recover_hw_addr(struct net_device *ndev)
{
struct netdev_hw_addr_list *list;
struct netdev_hw_addr *ha, *tmp;
+ int ret = 0;
/* go through and sync uc_addr entries to the device */
list = &ndev->uc;
- list_for_each_entry_safe(ha, tmp, &list->list, list)
- hns3_nic_uc_sync(ndev, ha->addr);
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ ret = hns3_nic_uc_sync(ndev, ha->addr);
+ if (ret)
+ return ret;
+ }
/* go through and sync mc_addr entries to the device */
list = &ndev->mc;
- list_for_each_entry_safe(ha, tmp, &list->list, list)
- hns3_nic_mc_sync(ndev, ha->addr);
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ ret = hns3_nic_mc_sync(ndev, ha->addr);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
}
static void hns3_remove_hw_addr(struct net_device *netdev)
@@ -3609,7 +3647,10 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
int ret;
for (i = 0; i < h->kinfo.num_tqps; i++) {
- h->ae_algo->ops->reset_queue(h, i);
+ ret = h->ae_algo->ops->reset_queue(h, i);
+ if (ret)
+ return ret;
+
hns3_init_ring_hw(priv->ring_data[i].ring);
/* We need to clear tx ring here because self test will
@@ -3701,18 +3742,30 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
bool vlan_filter_enable;
int ret;
- hns3_init_mac_addr(netdev, false);
- hns3_recover_hw_addr(netdev);
- hns3_update_promisc_mode(netdev, handle->netdev_flags);
+ ret = hns3_init_mac_addr(netdev, false);
+ if (ret)
+ return ret;
+
+ ret = hns3_recover_hw_addr(netdev);
+ if (ret)
+ return ret;
+
+ ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
+ if (ret)
+ return ret;
+
vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
hns3_enable_vlan_filter(netdev, vlan_filter_enable);
-
/* Hardware table is only clear when pf resets */
- if (!(handle->flags & HNAE3_SUPPORT_VF))
- hns3_restore_vlan(netdev);
+ if (!(handle->flags & HNAE3_SUPPORT_VF)) {
+ ret = hns3_restore_vlan(netdev);
+ return ret;
+ }
- hns3_restore_fd_rules(netdev);
+ ret = hns3_restore_fd_rules(netdev);
+ if (ret)
+ return ret;
/* Carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 71cfca1..d3636d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -640,7 +640,7 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
u32 rl_value);
void hns3_enable_vlan_filter(struct net_device *netdev, bool enable);
-void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
#ifdef CONFIG_HNS3_DCB
void hns3_dcbnl_setup(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index ac13cb2..690f62e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -24,15 +24,15 @@ static int hclge_ring_space(struct hclge_cmq_ring *ring)
return ring->desc_num - used - 1;
}
-static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h)
+static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int head)
{
- int u = ring->next_to_use;
- int c = ring->next_to_clean;
+ int ntu = ring->next_to_use;
+ int ntc = ring->next_to_clean;
- if (unlikely(h >= ring->desc_num))
- return 0;
+ if (ntu > ntc)
+ return head >= ntc && head <= ntu;
- return u > c ? (h > c && h <= u) : (h > c || h <= u);
+ return head >= ntc || head <= ntu;
}
static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
@@ -304,6 +304,10 @@ int hclge_cmd_queue_init(struct hclge_dev *hdev)
{
int ret;
+ /* Setup the lock for command queue */
+ spin_lock_init(&hdev->hw.cmq.csq.lock);
+ spin_lock_init(&hdev->hw.cmq.crq.lock);
+
/* Setup the queue entries for use cmd queue */
hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
@@ -337,18 +341,20 @@ int hclge_cmd_init(struct hclge_dev *hdev)
u32 version;
int ret;
+ spin_lock_bh(&hdev->hw.cmq.csq.lock);
+ spin_lock_bh(&hdev->hw.cmq.crq.lock);
+
hdev->hw.cmq.csq.next_to_clean = 0;
hdev->hw.cmq.csq.next_to_use = 0;
hdev->hw.cmq.crq.next_to_clean = 0;
hdev->hw.cmq.crq.next_to_use = 0;
- /* Setup the lock for command queue */
- spin_lock_init(&hdev->hw.cmq.csq.lock);
- spin_lock_init(&hdev->hw.cmq.crq.lock);
-
hclge_cmd_init_regs(&hdev->hw);
clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+ spin_unlock_bh(&hdev->hw.cmq.crq.lock);
+ spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+
ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
if (ret) {
dev_err(&hdev->pdev->dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index dca6f23..123c37e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -751,7 +751,7 @@ static void hclge_process_ncsi_error(struct hclge_dev *hdev,
ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd,
HCLGE_NCSI_INT_CLR, 0);
if (ret)
- dev_err(dev, "failed(=%d) to clear NCSI intrerrupt status\n",
+ dev_err(dev, "failed(=%d) to clear NCSI interrupt status\n",
ret);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 5234b53..ffdd960 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2236,7 +2236,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
}
/* clear the source of interrupt if it is not cause by reset */
- if (event_cause != HCLGE_VECTOR0_EVENT_RST) {
+ if (event_cause == HCLGE_VECTOR0_EVENT_MBX) {
hclge_clear_event_cause(hdev, event_cause, clearval);
hclge_enable_vector(&hdev->misc_vector, true);
}
@@ -2470,14 +2470,17 @@ static void hclge_reset(struct hclge_dev *hdev)
handle = &hdev->vport[0].nic;
rtnl_lock();
hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ rtnl_unlock();
if (!hclge_reset_wait(hdev)) {
+ rtnl_lock();
hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
hclge_reset_ae_dev(hdev->ae_dev);
hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
hclge_clear_reset_cause(hdev);
} else {
+ rtnl_lock();
/* schedule again to check pending resets later */
set_bit(hdev->reset_type, &hdev->reset_pending);
hclge_reset_task_schedule(hdev);
@@ -3314,8 +3317,8 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
param->vf_id = vport_id;
}
-static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
- bool en_mc_pmc)
+static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
+ bool en_mc_pmc)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
@@ -3323,7 +3326,7 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, true,
vport->vport_id);
- hclge_cmd_set_promisc_mode(hdev, ¶m);
+ return hclge_cmd_set_promisc_mode(hdev, ¶m);
}
static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
@@ -6107,31 +6110,28 @@ static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
return tqp->index;
}
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
int reset_try_times = 0;
int reset_status;
u16 queue_gid;
- int ret;
-
- if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
- return;
+ int ret = 0;
queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
ret = hclge_tqp_enable(hdev, queue_id, 0, false);
if (ret) {
- dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
- return;
+ dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
+ return ret;
}
ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
if (ret) {
- dev_warn(&hdev->pdev->dev,
- "Send reset tqp cmd fail, ret = %d\n", ret);
- return;
+ dev_err(&hdev->pdev->dev,
+ "Send reset tqp cmd fail, ret = %d\n", ret);
+ return ret;
}
reset_try_times = 0;
@@ -6144,16 +6144,16 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
}
if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
- dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
- return;
+ dev_err(&hdev->pdev->dev, "Reset TQP fail\n");
+ return ret;
}
ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
- if (ret) {
- dev_warn(&hdev->pdev->dev,
- "Deassert the soft reset fail, ret = %d\n", ret);
- return;
- }
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "Deassert the soft reset fail, ret = %d\n", ret);
+
+ return ret;
}
void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index e3dfd65..0d92154 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -778,7 +778,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev);
void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
void hclge_mbx_handler(struct hclge_dev *hdev);
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
int hclge_cfg_flowctrl(struct hclge_dev *hdev);
int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 04462a3..f890022 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -400,6 +400,12 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
/* handle all the mailbox requests in the queue */
while (!hclge_cmd_crq_empty(&hdev->hw)) {
+ if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+ dev_warn(&hdev->pdev->dev,
+ "command queue needs re-initializing\n");
+ return;
+ }
+
desc = &crq->desc[crq->next_to_use];
req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 24b1f2a..0301863 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -52,7 +52,7 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
struct hclge_desc desc;
int ret;
- if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+ if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
return 0;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
@@ -90,7 +90,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
struct hclge_desc desc;
int ret;
- if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+ if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
return 0;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index e0a86a5..085edb9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -925,12 +925,12 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
return status;
}
-static void hclgevf_set_promisc_mode(struct hnae3_handle *handle,
- bool en_uc_pmc, bool en_mc_pmc)
+static int hclgevf_set_promisc_mode(struct hnae3_handle *handle,
+ bool en_uc_pmc, bool en_mc_pmc)
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
- hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc);
+ return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc);
}
static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id,
@@ -1080,7 +1080,7 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
1, false, NULL, 0);
}
-static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
u8 msg_data[2];
@@ -1091,10 +1091,10 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
/* disable vf queue before send queue reset msg to PF */
ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
if (ret)
- return;
+ return ret;
- hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
- 2, true, NULL, 0);
+ return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+ 2, true, NULL, 0);
}
static int hclgevf_notify_client(struct hclgevf_dev *hdev,
@@ -1170,6 +1170,8 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
/* bring down the nic to stop any ongoing TX/RX */
hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ rtnl_unlock();
+
/* check if VF could successfully fetch the hardware reset completion
* status from the hardware
*/
@@ -1181,12 +1183,15 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
ret);
dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+ rtnl_lock();
hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
rtnl_unlock();
return ret;
}
+ rtnl_lock();
+
/* now, re-initialize the nic client and ae device*/
ret = hclgevf_reset_stack(hdev);
if (ret)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index 967c993..bbf9bdd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -532,7 +532,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task,
}
void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
- enum hinic_l4_offload_type l4_type,
+ enum hinic_l4_tunnel_type l4_type,
u32 tunnel_len)
{
task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index a0dc63a..038522e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -160,7 +160,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task,
u32 network_len);
void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
- enum hinic_l4_offload_type l4_type,
+ enum hinic_l4_tunnel_type l4_type,
u32 tunnel_len);
void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index fd3373d..59e1bc0 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -200,6 +200,15 @@
If unsure, say N.
+config IXGBE_IPSEC
+ bool "IPSec XFRM cryptography-offload acceleration"
+ depends on IXGBE
+ depends on XFRM_OFFLOAD
+ default y
+ select XFRM_ALGO
+ ---help---
+ Enable support for IPSec offload in ixgbe.ko
+
config IXGBEVF
tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support"
depends on PCI_MSI
@@ -217,6 +226,15 @@
will be called ixgbevf. MSI-X interrupt support is required
for this driver to work correctly.
+config IXGBEVF_IPSEC
+ bool "IPSec XFRM cryptography-offload acceleration"
+ depends on IXGBEVF
+ depends on XFRM_OFFLOAD
+ default y
+ select XFRM_ALGO
+ ---help---
+ Enable support for IPSec offload in ixgbevf.ko
+
config I40E
tristate "Intel(R) Ethernet Controller XL710 Family support"
imply PTP_1588_CLOCK
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index e707d71..5d4f176 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -244,7 +244,8 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface)
}
/* guarantee we have free space in the SM mailbox */
- if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
+ if (hw->mbx.state == FM10K_STATE_OPEN &&
+ !hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
/* keep track of how many times this occurs */
interface->hw_sm_mbx_full++;
@@ -302,6 +303,28 @@ void fm10k_iov_suspend(struct pci_dev *pdev)
}
}
+static void fm10k_mask_aer_comp_abort(struct pci_dev *pdev)
+{
+ u32 err_mask;
+ int pos;
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+ if (!pos)
+ return;
+
+ /* Mask the completion abort bit in the ERR_UNCOR_MASK register,
+ * preventing the device from reporting these errors to the upstream
+ * PCIe root device. This avoids bringing down platforms which upgrade
+ * non-fatal completer aborts into machine check exceptions. Completer
+ * aborts can occur whenever a VF reads a queue it doesn't own.
+ */
+ pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, &err_mask);
+ err_mask |= PCI_ERR_UNC_COMP_ABORT;
+ pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, err_mask);
+
+ mmiowb();
+}
+
int fm10k_iov_resume(struct pci_dev *pdev)
{
struct fm10k_intfc *interface = pci_get_drvdata(pdev);
@@ -317,6 +340,12 @@ int fm10k_iov_resume(struct pci_dev *pdev)
if (!iov_data)
return -ENOMEM;
+ /* Lower severity of completer abort error reporting as
+ * the VFs can trigger this any time they read a queue
+ * that they don't own.
+ */
+ fm10k_mask_aer_comp_abort(pdev);
+
/* allocate hardware resources for the VFs */
hw->iov.ops.assign_resources(hw, num_vfs, num_vfs);
@@ -460,20 +489,6 @@ void fm10k_iov_disable(struct pci_dev *pdev)
fm10k_iov_free_data(pdev);
}
-static void fm10k_disable_aer_comp_abort(struct pci_dev *pdev)
-{
- u32 err_sev;
- int pos;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
- if (!pos)
- return;
-
- pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &err_sev);
- err_sev &= ~PCI_ERR_UNC_COMP_ABORT;
- pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, err_sev);
-}
-
int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
{
int current_vfs = pci_num_vf(pdev);
@@ -495,12 +510,6 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
/* allocate VFs if not already allocated */
if (num_vfs && num_vfs != current_vfs) {
- /* Disable completer abort error reporting as
- * the VFs can trigger this any time they read a queue
- * that they don't own.
- */
- fm10k_disable_aer_comp_abort(pdev);
-
err = pci_enable_sriov(pdev, num_vfs);
if (err) {
dev_err(&pdev->dev,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 503bbc0..5b2a50e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -11,7 +11,7 @@
#include "fm10k.h"
-#define DRV_VERSION "0.23.4-k"
+#define DRV_VERSION "0.26.1-k"
#define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver"
const char fm10k_driver_version[] = DRV_VERSION;
char fm10k_driver_name[] = "fm10k";
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 02345d3..e49fb51 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -23,6 +23,8 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
*/
static const struct pci_device_id fm10k_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_PF), fm10k_device_pf },
+ { PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2), fm10k_device_pf },
+ { PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_DA2), fm10k_device_pf },
{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_VF), fm10k_device_vf },
/* required last entry */
{ 0, }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 3e608e4..9fb9fca 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -15,6 +15,8 @@ struct fm10k_hw;
#define FM10K_DEV_ID_PF 0x15A4
#define FM10K_DEV_ID_VF 0x15A5
+#define FM10K_DEV_ID_SDI_FM10420_QDA2 0x15D0
+#define FM10K_DEV_ID_SDI_FM10420_DA2 0x15D5
#define FM10K_MAX_QUEUES 256
#define FM10K_MAX_QUEUES_PF 128
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 81b0e1f8..ac5698e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3674,7 +3674,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
local_vf_id, v_opcode, msglen);
switch (ret) {
- case VIRTCHNL_ERR_PARAM:
+ case VIRTCHNL_STATUS_ERR_PARAM:
return -EPERM;
default:
return -EINVAL;
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 9f4d700..29ced6b 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -51,9 +51,15 @@
*
* The 40 bit 82580 SYSTIM overflows every
* 2^40 * 10^-9 / 60 = 18.3 minutes.
+ *
+ * SYSTIM is converted to real time using a timecounter. As
+ * timecounter_cyc2time() allows old timestamps, the timecounter
+ * needs to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, so we aim for 8
+ * minutes to be sure the actual interval is shorter than 9.16 minutes.
*/
-#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9)
+#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 8)
#define IGB_PTP_TX_TIMEOUT (HZ * 15)
#define INCPERIOD_82576 BIT(E1000_TIMINCA_16NS_SHIFT)
#define INCVALUE_82576_MASK GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index ca6b0c4..4fb0d9e 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -17,4 +17,4 @@
ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
-ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o
+ixgbe-$(CONFIG_IXGBE_IPSEC) += ixgbe_ipsec.o
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index ec1b87c..143bdd5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -769,9 +769,9 @@ struct ixgbe_adapter {
#define IXGBE_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */
u32 *rss_key;
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
struct ixgbe_ipsec *ipsec;
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBE_IPSEC */
/* AF_XDP zero-copy */
struct xdp_umem **xsk_umems;
@@ -1008,7 +1008,7 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter);
void ixgbe_store_reta(struct ixgbe_adapter *adapter);
s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
@@ -1036,5 +1036,5 @@ static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter,
u32 *mbuf, u32 vf) { return -EACCES; }
static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter,
u32 *mbuf, u32 vf) { return -EACCES; }
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBE_IPSEC */
#endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0049a2b..113b38e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8694,7 +8694,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
#endif /* IXGBE_FCOE */
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
goto out_drop;
#endif
@@ -10190,7 +10190,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
* the TSO, so it's the exception.
*/
if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
if (!skb->sp)
#endif
features &= ~NETIF_F_TSO;
@@ -10883,7 +10883,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (hw->mac.type >= ixgbe_mac_82599EB)
netdev->features |= NETIF_F_SCTP_CRC;
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
#define IXGBE_ESP_FEATURES (NETIF_F_HW_ESP | \
NETIF_F_HW_ESP_TX_CSUM | \
NETIF_F_GSO_ESP)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index af25a8f..5dacfc8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -722,8 +722,10 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
adapter->default_up, vf);
- if (vfinfo->spoofchk_enabled)
+ if (vfinfo->spoofchk_enabled) {
hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf);
+ hw->mac.ops.set_mac_anti_spoofing(hw, true, vf);
+ }
}
/* reset multicast table array for vf */
diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
index 297d0f0..186a4bb 100644
--- a/drivers/net/ethernet/intel/ixgbevf/Makefile
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile
@@ -10,5 +10,5 @@
mbx.o \
ethtool.o \
ixgbevf_main.o
-ixgbevf-$(CONFIG_XFRM_OFFLOAD) += ipsec.o
+ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index e399e1c..ecab686 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -459,7 +459,7 @@ int ethtool_ioctl(struct ifreq *ifr);
extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBEVF_IPSEC
void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter);
void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter);
void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter);
@@ -482,7 +482,7 @@ static inline int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
struct ixgbevf_tx_buffer *first,
struct ixgbevf_ipsec_tx_data *itd)
{ return 0; }
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBEVF_IPSEC */
void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 98707ee..5e47ede 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4150,7 +4150,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
first->tx_flags = tx_flags;
first->protocol = vlan_get_protocol(skb);
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBEVF_IPSEC
if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
goto out_drop;
#endif
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 176c6b5..398328f 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -796,6 +796,7 @@ struct mvpp2_queue_vector {
int nrxqs;
u32 pending_cause_rx;
struct mvpp2_port *port;
+ struct cpumask *mask;
};
struct mvpp2_port {
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 14f9679..7a37a37 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3298,24 +3298,30 @@ static int mvpp2_irqs_init(struct mvpp2_port *port)
for (i = 0; i < port->nqvecs; i++) {
struct mvpp2_queue_vector *qv = port->qvecs + i;
- if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+ if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
+ qv->mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ if (!qv->mask) {
+ err = -ENOMEM;
+ goto err;
+ }
+
irq_set_status_flags(qv->irq, IRQ_NO_BALANCING);
+ }
err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
if (err)
goto err;
if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
- unsigned long mask = 0;
unsigned int cpu;
for_each_present_cpu(cpu) {
if (mvpp2_cpu_to_thread(port->priv, cpu) ==
qv->sw_thread_id)
- mask |= BIT(cpu);
+ cpumask_set_cpu(cpu, qv->mask);
}
- irq_set_affinity_hint(qv->irq, to_cpumask(&mask));
+ irq_set_affinity_hint(qv->irq, qv->mask);
}
}
@@ -3325,6 +3331,8 @@ static int mvpp2_irqs_init(struct mvpp2_port *port)
struct mvpp2_queue_vector *qv = port->qvecs + i;
irq_set_affinity_hint(qv->irq, NULL);
+ kfree(qv->mask);
+ qv->mask = NULL;
free_irq(qv->irq, qv);
}
@@ -3339,6 +3347,8 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
struct mvpp2_queue_vector *qv = port->qvecs + i;
irq_set_affinity_hint(qv->irq, NULL);
+ kfree(qv->mask);
+ qv->mask = NULL;
irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING);
free_irq(qv->irq, qv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5a6d091..db00bf1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -43,6 +43,7 @@
#include <linux/vmalloc.h>
#include <linux/irq.h>
+#include <net/ip.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_checksum.h>
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 94224c22..79638dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -713,43 +713,15 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
rq->stats->ecn_mark += !!rc;
}
-static __be32 mlx5e_get_fcs(struct sk_buff *skb)
+static u32 mlx5e_get_fcs(const struct sk_buff *skb)
{
- int last_frag_sz, bytes_in_prev, nr_frags;
- u8 *fcs_p1, *fcs_p2;
- skb_frag_t *last_frag;
- __be32 fcs_bytes;
+ const void *fcs_bytes;
+ u32 _fcs_bytes;
- if (!skb_is_nonlinear(skb))
- return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN);
+ fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
+ ETH_FCS_LEN, &_fcs_bytes);
- nr_frags = skb_shinfo(skb)->nr_frags;
- last_frag = &skb_shinfo(skb)->frags[nr_frags - 1];
- last_frag_sz = skb_frag_size(last_frag);
-
- /* If all FCS data is in last frag */
- if (last_frag_sz >= ETH_FCS_LEN)
- return *(__be32 *)(skb_frag_address(last_frag) +
- last_frag_sz - ETH_FCS_LEN);
-
- fcs_p2 = (u8 *)skb_frag_address(last_frag);
- bytes_in_prev = ETH_FCS_LEN - last_frag_sz;
-
- /* Find where the other part of the FCS is - Linear or another frag */
- if (nr_frags == 1) {
- fcs_p1 = skb_tail_pointer(skb);
- } else {
- skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2];
-
- fcs_p1 = skb_frag_address(prev_frag) +
- skb_frag_size(prev_frag);
- }
- fcs_p1 -= bytes_in_prev;
-
- memcpy(&fcs_bytes, fcs_p1, bytes_in_prev);
- memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz);
-
- return fcs_bytes;
+ return __get_unaligned_cpu32(fcs_bytes);
}
static u8 get_ip_proto(struct sk_buff *skb, __be16 proto)
@@ -797,8 +769,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
network_depth - ETH_HLEN,
skb->csum);
if (unlikely(netdev->features & NETIF_F_RXFCS))
- skb->csum = csum_add(skb->csum,
- (__force __wsum)mlx5e_get_fcs(skb));
+ skb->csum = csum_block_add(skb->csum,
+ (__force __wsum)mlx5e_get_fcs(skb),
+ skb->len - ETH_FCS_LEN);
stats->csum_complete++;
return;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 937d0ac..30f751e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -943,8 +943,8 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
mlxsw_core->bus,
mlxsw_core->bus_priv, true,
devlink);
- if (err)
- mlxsw_core->reload_fail = true;
+ mlxsw_core->reload_fail = !!err;
+
return err;
}
@@ -1083,8 +1083,15 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
{
struct devlink *devlink = priv_to_devlink(mlxsw_core);
- if (mlxsw_core->reload_fail)
- goto reload_fail;
+ if (mlxsw_core->reload_fail) {
+ if (!reload)
+ /* Only the parts that were not de-initialized in the
+ * failed reload attempt need to be de-initialized.
+ */
+ goto reload_fail_deinit;
+ else
+ return;
+ }
if (mlxsw_core->driver->fini)
mlxsw_core->driver->fini(mlxsw_core);
@@ -1098,9 +1105,12 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
if (!reload)
devlink_resources_unregister(devlink, NULL);
mlxsw_core->bus->fini(mlxsw_core->bus_priv);
- if (reload)
- return;
-reload_fail:
+
+ return;
+
+reload_fail_deinit:
+ devlink_unregister(devlink);
+ devlink_resources_unregister(devlink, NULL);
devlink_free(devlink);
}
EXPORT_SYMBOL(mlxsw_core_bus_device_unregister);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 32cb671..db3d279 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
* Configures the ETS elements.
*/
#define MLXSW_REG_QEEC_ID 0x400D
-#define MLXSW_REG_QEEC_LEN 0x1C
+#define MLXSW_REG_QEEC_LEN 0x20
MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
@@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
*/
MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
+/* reg_qeec_mise
+ * Min shaper configuration enable. Enables configuration of the min
+ * shaper on this ETS element
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+
enum {
MLXSW_REG_QEEC_BYTES_MODE,
MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3342,6 +3351,17 @@ enum {
*/
MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
+/* The smallest permitted min shaper rate. */
+#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */
+
+/* reg_qeec_min_shaper_rate
+ * Min shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
+
/* reg_qeec_mase
* Max shaper configuration enable. Enables configuration of the max
* shaper on this ETS element.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8a4983a..a2df12b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}
+static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index, u32 minrate)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+ next_index);
+ mlxsw_reg_qeec_mise_set(qeec_pl, true);
+ mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
u8 switch_prio, u8 tclass)
{
@@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
+ /* Configure the min shaper for multicast TCs. */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i + 8, i,
+ MLXSW_REG_QEEC_MIS_MIN);
+ if (err)
+ return err;
+ }
+
/* Map all priorities to traffic class 0. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index bc60d7a..739a51f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2661,8 +2661,6 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
- if (!fdb_info->added_by_user)
- break;
mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false);
break;
case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index b72ef17..bdd3515 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -243,7 +243,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
*/
int stmmac_mdio_reset(struct mii_bus *bus)
{
-#if defined(CONFIG_STMMAC_PLATFORM)
+#if IS_ENABLED(CONFIG_STMMAC_PLATFORM)
struct net_device *ndev = bus->priv;
struct stmmac_priv *priv = netdev_priv(ndev);
unsigned int mii_address = priv->hw->mii.addr;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e52b9d3..0b70c8b 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
op->fcp_req.rspaddr = &op->rsp_iu;
op->fcp_req.rsplen = sizeof(op->rsp_iu);
op->fcp_req.done = nvme_fc_fcpio_done;
- op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
op->ctrl = ctrl;
op->queue = queue;
op->rq = rq;
@@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
if (res)
return res;
op->op.fcp_req.first_sgl = &op->sgl[0];
+ op->op.fcp_req.private = &op->priv[0];
return res;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f300319..c33bb20 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
struct pci_dev *pdev = to_pci_dev(dev->dev);
int bar;
+ if (dev->cmb_size)
+ return;
+
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!dev->cmbsz)
return;
@@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
- nvme_release_cmb(dev);
pci_free_irq_vectors(pdev);
if (pci_is_enabled(pdev)) {
@@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true);
+ nvme_release_cmb(dev);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 39d972e..01feebe 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -101,7 +101,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
rw = READ;
}
- iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+ iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
iocb->ki_pos = pos;
iocb->ki_filp = req->ns->file;
diff --git a/drivers/of/base.c b/drivers/of/base.c
index d023cf3..09692c9 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -777,8 +777,6 @@ struct device_node *of_get_next_cpu_node(struct device_node *prev)
if (!(of_node_name_eq(next, "cpu") ||
(next->type && !of_node_cmp(next->type, "cpu"))))
continue;
- if (!__of_device_is_available(next))
- continue;
if (of_node_get(next))
break;
}
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index bdac939..54f6a40 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -60,7 +60,10 @@
After loading this driver the BIOS is still in control of the fan.
To let the kernel handle the fan, do:
- echo -n enabled > /sys/class/thermal/thermal_zone0/mode
+ echo -n enabled > /sys/class/thermal/thermal_zoneN/mode
+ where N=0,1,2... depending on the number of thermal nodes and the
+ detection order of your particular system. The "type" parameter
+ in the same node directory will tell you if it is "acerhdf".
For more information about this driver see
<http://piie.net/files/acerhdf_README.txt>
@@ -105,6 +108,22 @@
If you have an ACPI-compatible ASUS laptop, say Y or M here.
+config DCDBAS
+ tristate "Dell Systems Management Base Driver"
+ depends on X86
+ help
+ The Dell Systems Management Base Driver provides a sysfs interface
+ for systems management software to perform System Management
+ Interrupts (SMIs) and Host Control Actions (system power cycle or
+ power off after OS shutdown) on certain Dell systems.
+
+ See <file:Documentation/dcdbas.txt> for more details on the driver
+ and the Dell systems on which Dell systems management software makes
+ use of this driver.
+
+ Say Y or M here to enable the driver for use by Dell systems
+ management software such as Dell OpenManage.
+
#
# The DELL_SMBIOS driver depends on ACPI_WMI and/or DCDBAS if those
# backends are selected. The "depends" line prevents a configuration
@@ -227,6 +246,18 @@
To compile this driver as a module, choose M here: the module will
be called dell-rbtn.
+config DELL_RBU
+ tristate "BIOS update support for DELL systems via sysfs"
+ depends on X86
+ select FW_LOADER
+ select FW_LOADER_USER_HELPER
+ help
+ Say m if you want to have the option of updating the BIOS for your
+ DELL system. Note you need a Dell OpenManage or Dell Update package (DUP)
+ supporting application to communicate with the BIOS regarding the new
+ image for the image update to take effect.
+ See <file:Documentation/dell_rbu.txt> for more details on the driver.
+
config FUJITSU_LAPTOP
tristate "Fujitsu Laptop Extras"
@@ -336,6 +367,20 @@
To compile this driver as a module, choose M here: the module will
be called hp-wmi.
+config LG_LAPTOP
+ tristate "LG Laptop Extras"
+ depends on ACPI
+ depends on ACPI_WMI
+ depends on INPUT
+ select INPUT_SPARSEKMAP
+ select LEDS_CLASS
+ help
+ This driver adds support for hotkeys as well as control of keyboard
+ backlight, battery maximum charge level and various other ACPI
+ features.
+
+ If you have an LG Gram laptop, say Y or M here.
+
config MSI_LAPTOP
tristate "MSI Laptop Extras"
depends on ACPI
@@ -1231,6 +1276,18 @@
To compile this driver as a module, choose M here: the module
will be called i2c-multi-instantiate.
+config INTEL_ATOMISP2_PM
+ tristate "Intel AtomISP2 dummy / power-management driver"
+ depends on PCI && IOSF_MBI && PM
+ help
+ Power-management driver for Intel's Image Signal Processor found on
+ Bay and Cherry Trail devices. This dummy driver's sole purpose is to
+ turn the ISP off (put it in D3) to save power and to allow entering
+ of S0ix modes.
+
+ To compile this driver as a module, choose M here: the module
+ will be called intel_atomisp2_pm.
+
endif # X86_PLATFORM_DEVICES
config PMC_ATOM
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index e6d1bec..39ae941 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -9,9 +9,11 @@
obj-$(CONFIG_ASUS_WIRELESS) += asus-wireless.o
obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o
obj-$(CONFIG_EEEPC_WMI) += eeepc-wmi.o
+obj-$(CONFIG_LG_LAPTOP) += lg-laptop.o
obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o
obj-$(CONFIG_ACPI_CMPC) += classmate-laptop.o
obj-$(CONFIG_COMPAL_LAPTOP) += compal-laptop.o
+obj-$(CONFIG_DCDBAS) += dcdbas.o
obj-$(CONFIG_DELL_SMBIOS) += dell-smbios.o
dell-smbios-objs := dell-smbios-base.o
dell-smbios-$(CONFIG_DELL_SMBIOS_WMI) += dell-smbios-wmi.o
@@ -23,6 +25,7 @@
obj-$(CONFIG_DELL_WMI_LED) += dell-wmi-led.o
obj-$(CONFIG_DELL_SMO8800) += dell-smo8800.o
obj-$(CONFIG_DELL_RBTN) += dell-rbtn.o
+obj-$(CONFIG_DELL_RBU) += dell_rbu.o
obj-$(CONFIG_ACER_WMI) += acer-wmi.o
obj-$(CONFIG_ACER_WIRELESS) += acer-wireless.o
obj-$(CONFIG_ACERHDF) += acerhdf.o
@@ -92,3 +95,4 @@
obj-$(CONFIG_INTEL_TURBO_MAX_3) += intel_turbo_max_3.o
obj-$(CONFIG_INTEL_CHTDC_TI_PWRBTN) += intel_chtdc_ti_pwrbtn.o
obj-$(CONFIG_I2C_MULTI_INSTANTIATE) += i2c-multi-instantiate.o
+obj-$(CONFIG_INTEL_ATOMISP2_PM) += intel_atomisp2_pm.o
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index ea22591..5052242 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -86,6 +86,7 @@ static unsigned int interval = 10;
static unsigned int fanon = 60000;
static unsigned int fanoff = 53000;
static unsigned int verbose;
+static unsigned int list_supported;
static unsigned int fanstate = ACERHDF_FAN_AUTO;
static char force_bios[16];
static char force_product[16];
@@ -104,10 +105,12 @@ module_param(fanoff, uint, 0600);
MODULE_PARM_DESC(fanoff, "Turn the fan off below this temperature");
module_param(verbose, uint, 0600);
MODULE_PARM_DESC(verbose, "Enable verbose dmesg output");
+module_param(list_supported, uint, 0600);
+MODULE_PARM_DESC(list_supported, "List supported models and BIOS versions");
module_param_string(force_bios, force_bios, 16, 0);
-MODULE_PARM_DESC(force_bios, "Force BIOS version and omit BIOS check");
+MODULE_PARM_DESC(force_bios, "Pretend system has this known supported BIOS version");
module_param_string(force_product, force_product, 16, 0);
-MODULE_PARM_DESC(force_product, "Force BIOS product and omit BIOS check");
+MODULE_PARM_DESC(force_product, "Pretend system is this known supported model");
/*
* cmd_off: to switch the fan completely off and check if the fan is off
@@ -130,7 +133,7 @@ static const struct manualcmd mcmd = {
.moff = 0xff,
};
-/* BIOS settings */
+/* BIOS settings - only used during probe */
struct bios_settings {
const char *vendor;
const char *product;
@@ -141,8 +144,18 @@ struct bios_settings {
int mcmd_enable;
};
+/* This could be a daughter struct in the above, but not worth the redirect */
+struct ctrl_settings {
+ u8 fanreg;
+ u8 tempreg;
+ struct fancmd cmd;
+ int mcmd_enable;
+};
+
+static struct ctrl_settings ctrl_cfg __read_mostly;
+
/* Register addresses and values for different BIOS versions */
-static const struct bios_settings bios_tbl[] = {
+static const struct bios_settings bios_tbl[] __initconst = {
/* AOA110 */
{"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00}, 0},
{"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00}, 0},
@@ -233,6 +246,7 @@ static const struct bios_settings bios_tbl[] = {
{"Gateway", "LT31", "v1.3201", 0x55, 0x58, {0x9e, 0x00}, 0},
{"Gateway", "LT31", "v1.3302", 0x55, 0x58, {0x9e, 0x00}, 0},
{"Gateway", "LT31", "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0},
+ {"Gateway", "LT31", "v1.3307", 0x55, 0x58, {0x9e, 0x00}, 0},
/* Packard Bell */
{"Packard Bell", "DOA150", "v0.3104", 0x55, 0x58, {0x21, 0x00}, 0},
{"Packard Bell", "DOA150", "v0.3105", 0x55, 0x58, {0x20, 0x00}, 0},
@@ -256,8 +270,6 @@ static const struct bios_settings bios_tbl[] = {
{"", "", "", 0, 0, {0, 0}, 0}
};
-static const struct bios_settings *bios_cfg __read_mostly;
-
/*
* this struct is used to instruct thermal layer to use bang_bang instead of
* default governor for acerhdf
@@ -270,7 +282,7 @@ static int acerhdf_get_temp(int *temp)
{
u8 read_temp;
- if (ec_read(bios_cfg->tempreg, &read_temp))
+ if (ec_read(ctrl_cfg.tempreg, &read_temp))
return -EINVAL;
*temp = read_temp * 1000;
@@ -282,10 +294,10 @@ static int acerhdf_get_fanstate(int *state)
{
u8 fan;
- if (ec_read(bios_cfg->fanreg, &fan))
+ if (ec_read(ctrl_cfg.fanreg, &fan))
return -EINVAL;
- if (fan != bios_cfg->cmd.cmd_off)
+ if (fan != ctrl_cfg.cmd.cmd_off)
*state = ACERHDF_FAN_AUTO;
else
*state = ACERHDF_FAN_OFF;
@@ -306,13 +318,13 @@ static void acerhdf_change_fanstate(int state)
state = ACERHDF_FAN_AUTO;
}
- cmd = (state == ACERHDF_FAN_OFF) ? bios_cfg->cmd.cmd_off
- : bios_cfg->cmd.cmd_auto;
+ cmd = (state == ACERHDF_FAN_OFF) ? ctrl_cfg.cmd.cmd_off
+ : ctrl_cfg.cmd.cmd_auto;
fanstate = state;
- ec_write(bios_cfg->fanreg, cmd);
+ ec_write(ctrl_cfg.fanreg, cmd);
- if (bios_cfg->mcmd_enable && state == ACERHDF_FAN_OFF) {
+ if (ctrl_cfg.mcmd_enable && state == ACERHDF_FAN_OFF) {
if (verbose)
pr_notice("turning off fan manually\n");
ec_write(mcmd.mreg, mcmd.moff);
@@ -615,10 +627,11 @@ static int str_starts_with(const char *str, const char *start)
}
/* check hardware */
-static int acerhdf_check_hardware(void)
+static int __init acerhdf_check_hardware(void)
{
char const *vendor, *version, *product;
const struct bios_settings *bt = NULL;
+ int found = 0;
/* get BIOS data */
vendor = dmi_get_system_info(DMI_SYS_VENDOR);
@@ -632,6 +645,17 @@ static int acerhdf_check_hardware(void)
pr_info("Acer Aspire One Fan driver, v.%s\n", DRV_VER);
+ if (list_supported) {
+ pr_info("List of supported Manufacturer/Model/BIOS:\n");
+ pr_info("---------------------------------------------------\n");
+ for (bt = bios_tbl; bt->vendor[0]; bt++) {
+ pr_info("%-13s | %-17s | %-10s\n", bt->vendor,
+ bt->product, bt->version);
+ }
+ pr_info("---------------------------------------------------\n");
+ return -ECANCELED;
+ }
+
if (force_bios[0]) {
version = force_bios;
pr_info("forcing BIOS version: %s\n", version);
@@ -657,30 +681,36 @@ static int acerhdf_check_hardware(void)
if (str_starts_with(vendor, bt->vendor) &&
str_starts_with(product, bt->product) &&
str_starts_with(version, bt->version)) {
- bios_cfg = bt;
+ found = 1;
break;
}
}
- if (!bios_cfg) {
+ if (!found) {
pr_err("unknown (unsupported) BIOS version %s/%s/%s, please report, aborting!\n",
vendor, product, version);
return -EINVAL;
}
+ /* Copy control settings from BIOS table before we free it. */
+ ctrl_cfg.fanreg = bt->fanreg;
+ ctrl_cfg.tempreg = bt->tempreg;
+ memcpy(&ctrl_cfg.cmd, &bt->cmd, sizeof(struct fancmd));
+ ctrl_cfg.mcmd_enable = bt->mcmd_enable;
+
/*
* if started with kernel mode off, prevent the kernel from switching
* off the fan
*/
if (!kernelmode) {
pr_notice("Fan control off, to enable do:\n");
- pr_notice("echo -n \"enabled\" > /sys/class/thermal/thermal_zone0/mode\n");
+ pr_notice("echo -n \"enabled\" > /sys/class/thermal/thermal_zoneN/mode # N=0,1,2...\n");
}
return 0;
}
-static int acerhdf_register_platform(void)
+static int __init acerhdf_register_platform(void)
{
int err = 0;
@@ -712,7 +742,7 @@ static void acerhdf_unregister_platform(void)
platform_driver_unregister(&acerhdf_driver);
}
-static int acerhdf_register_thermal(void)
+static int __init acerhdf_register_thermal(void)
{
cl_dev = thermal_cooling_device_register("acerhdf-fan", NULL,
&acerhdf_cooling_ops);
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 93ee2d5..c285a16 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -43,6 +43,7 @@
#include <linux/hwmon-sysfs.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+#include <linux/platform_data/x86/asus-wmi.h>
#include <linux/platform_device.h>
#include <linux/thermal.h>
#include <linux/acpi.h>
@@ -69,89 +70,6 @@ MODULE_LICENSE("GPL");
#define NOTIFY_KBD_BRTDWN 0xc5
#define NOTIFY_KBD_BRTTOGGLE 0xc7
-/* WMI Methods */
-#define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */
-#define ASUS_WMI_METHODID_SFBD 0x44424653 /* Set First Boot Device */
-#define ASUS_WMI_METHODID_GLCD 0x44434C47 /* Get LCD status */
-#define ASUS_WMI_METHODID_GPID 0x44495047 /* Get Panel ID?? (Resol) */
-#define ASUS_WMI_METHODID_QMOD 0x444F4D51 /* Quiet MODe */
-#define ASUS_WMI_METHODID_SPLV 0x4C425053 /* Set Panel Light Value */
-#define ASUS_WMI_METHODID_AGFN 0x4E464741 /* FaN? */
-#define ASUS_WMI_METHODID_SFUN 0x4E554653 /* FUNCtionalities */
-#define ASUS_WMI_METHODID_SDSP 0x50534453 /* Set DiSPlay output */
-#define ASUS_WMI_METHODID_GDSP 0x50534447 /* Get DiSPlay output */
-#define ASUS_WMI_METHODID_DEVP 0x50564544 /* DEVice Policy */
-#define ASUS_WMI_METHODID_OSVR 0x5256534F /* OS VeRsion */
-#define ASUS_WMI_METHODID_DSTS 0x53544344 /* Device STatuS */
-#define ASUS_WMI_METHODID_DSTS2 0x53545344 /* Device STatuS #2*/
-#define ASUS_WMI_METHODID_BSTS 0x53545342 /* Bios STatuS ? */
-#define ASUS_WMI_METHODID_DEVS 0x53564544 /* DEVice Set */
-#define ASUS_WMI_METHODID_CFVS 0x53564643 /* CPU Frequency Volt Set */
-#define ASUS_WMI_METHODID_KBFT 0x5446424B /* KeyBoard FilTer */
-#define ASUS_WMI_METHODID_INIT 0x54494E49 /* INITialize */
-#define ASUS_WMI_METHODID_HKEY 0x59454B48 /* Hot KEY ?? */
-
-#define ASUS_WMI_UNSUPPORTED_METHOD 0xFFFFFFFE
-
-/* Wireless */
-#define ASUS_WMI_DEVID_HW_SWITCH 0x00010001
-#define ASUS_WMI_DEVID_WIRELESS_LED 0x00010002
-#define ASUS_WMI_DEVID_CWAP 0x00010003
-#define ASUS_WMI_DEVID_WLAN 0x00010011
-#define ASUS_WMI_DEVID_WLAN_LED 0x00010012
-#define ASUS_WMI_DEVID_BLUETOOTH 0x00010013
-#define ASUS_WMI_DEVID_GPS 0x00010015
-#define ASUS_WMI_DEVID_WIMAX 0x00010017
-#define ASUS_WMI_DEVID_WWAN3G 0x00010019
-#define ASUS_WMI_DEVID_UWB 0x00010021
-
-/* Leds */
-/* 0x000200XX and 0x000400XX */
-#define ASUS_WMI_DEVID_LED1 0x00020011
-#define ASUS_WMI_DEVID_LED2 0x00020012
-#define ASUS_WMI_DEVID_LED3 0x00020013
-#define ASUS_WMI_DEVID_LED4 0x00020014
-#define ASUS_WMI_DEVID_LED5 0x00020015
-#define ASUS_WMI_DEVID_LED6 0x00020016
-
-/* Backlight and Brightness */
-#define ASUS_WMI_DEVID_ALS_ENABLE 0x00050001 /* Ambient Light Sensor */
-#define ASUS_WMI_DEVID_BACKLIGHT 0x00050011
-#define ASUS_WMI_DEVID_BRIGHTNESS 0x00050012
-#define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021
-#define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */
-#define ASUS_WMI_DEVID_LIGHTBAR 0x00050025
-
-/* Misc */
-#define ASUS_WMI_DEVID_CAMERA 0x00060013
-
-/* Storage */
-#define ASUS_WMI_DEVID_CARDREADER 0x00080013
-
-/* Input */
-#define ASUS_WMI_DEVID_TOUCHPAD 0x00100011
-#define ASUS_WMI_DEVID_TOUCHPAD_LED 0x00100012
-
-/* Fan, Thermal */
-#define ASUS_WMI_DEVID_THERMAL_CTRL 0x00110011
-#define ASUS_WMI_DEVID_FAN_CTRL 0x00110012
-
-/* Power */
-#define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012
-
-/* Deep S3 / Resume on LID open */
-#define ASUS_WMI_DEVID_LID_RESUME 0x00120031
-
-/* DSTS masks */
-#define ASUS_WMI_DSTS_STATUS_BIT 0x00000001
-#define ASUS_WMI_DSTS_UNKNOWN_BIT 0x00000002
-#define ASUS_WMI_DSTS_PRESENCE_BIT 0x00010000
-#define ASUS_WMI_DSTS_USER_BIT 0x00020000
-#define ASUS_WMI_DSTS_BIOS_BIT 0x00040000
-#define ASUS_WMI_DSTS_BRIGHTNESS_MASK 0x000000FF
-#define ASUS_WMI_DSTS_MAX_BRIGTH_MASK 0x0000FF00
-#define ASUS_WMI_DSTS_LIGHTBAR_MASK 0x0000000F
-
#define ASUS_FAN_DESC "cpu_fan"
#define ASUS_FAN_MFUN 0x13
#define ASUS_FAN_SFUN_READ 0x06
@@ -239,7 +157,6 @@ struct asus_wmi {
int lightbar_led_wk;
struct workqueue_struct *led_workqueue;
struct work_struct tpd_led_work;
- struct work_struct kbd_led_work;
struct work_struct wlan_led_work;
struct work_struct lightbar_led_work;
@@ -302,8 +219,7 @@ static void asus_wmi_input_exit(struct asus_wmi *asus)
asus->inputdev = NULL;
}
-static int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
- u32 *retval)
+int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval)
{
struct bios_args args = {
.arg0 = arg0,
@@ -339,6 +255,7 @@ static int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
return 0;
}
+EXPORT_SYMBOL_GPL(asus_wmi_evaluate_method);
static int asus_wmi_evaluate_method_agfn(const struct acpi_buffer args)
{
@@ -456,12 +373,9 @@ static enum led_brightness tpd_led_get(struct led_classdev *led_cdev)
return read_tpd_led_state(asus);
}
-static void kbd_led_update(struct work_struct *work)
+static void kbd_led_update(struct asus_wmi *asus)
{
int ctrl_param = 0;
- struct asus_wmi *asus;
-
- asus = container_of(work, struct asus_wmi, kbd_led_work);
/*
* bits 0-2: level
@@ -471,7 +385,6 @@ static void kbd_led_update(struct work_struct *work)
ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F);
asus_wmi_set_devstate(ASUS_WMI_DEVID_KBD_BACKLIGHT, ctrl_param, NULL);
- led_classdev_notify_brightness_hw_changed(&asus->kbd_led, asus->kbd_led_wk);
}
static int kbd_led_read(struct asus_wmi *asus, int *level, int *env)
@@ -516,7 +429,7 @@ static void do_kbd_led_set(struct led_classdev *led_cdev, int value)
value = 0;
asus->kbd_led_wk = value;
- queue_work(asus->led_workqueue, &asus->kbd_led_work);
+ kbd_led_update(asus);
}
static void kbd_led_set(struct led_classdev *led_cdev,
@@ -525,6 +438,14 @@ static void kbd_led_set(struct led_classdev *led_cdev,
do_kbd_led_set(led_cdev, value);
}
+static void kbd_led_set_by_kbd(struct asus_wmi *asus, enum led_brightness value)
+{
+ struct led_classdev *led_cdev = &asus->kbd_led;
+
+ do_kbd_led_set(led_cdev, value);
+ led_classdev_notify_brightness_hw_changed(led_cdev, asus->kbd_led_wk);
+}
+
static enum led_brightness kbd_led_get(struct led_classdev *led_cdev)
{
struct asus_wmi *asus;
@@ -671,8 +592,6 @@ static int asus_wmi_led_init(struct asus_wmi *asus)
led_val = kbd_led_read(asus, NULL, NULL);
if (led_val >= 0) {
- INIT_WORK(&asus->kbd_led_work, kbd_led_update);
-
asus->kbd_led_wk = led_val;
asus->kbd_led.name = "asus::kbd_backlight";
asus->kbd_led.flags = LED_BRIGHT_HW_CHANGED;
@@ -1746,18 +1665,18 @@ static void asus_wmi_notify(u32 value, void *context)
}
if (code == NOTIFY_KBD_BRTUP) {
- do_kbd_led_set(&asus->kbd_led, asus->kbd_led_wk + 1);
+ kbd_led_set_by_kbd(asus, asus->kbd_led_wk + 1);
goto exit;
}
if (code == NOTIFY_KBD_BRTDWN) {
- do_kbd_led_set(&asus->kbd_led, asus->kbd_led_wk - 1);
+ kbd_led_set_by_kbd(asus, asus->kbd_led_wk - 1);
goto exit;
}
if (code == NOTIFY_KBD_BRTTOGGLE) {
if (asus->kbd_led_wk == asus->kbd_led.max_brightness)
- do_kbd_led_set(&asus->kbd_led, 0);
+ kbd_led_set_by_kbd(asus, 0);
else
- do_kbd_led_set(&asus->kbd_led, asus->kbd_led_wk + 1);
+ kbd_led_set_by_kbd(asus, asus->kbd_led_wk + 1);
goto exit;
}
@@ -2291,7 +2210,7 @@ static int asus_hotk_resume(struct device *device)
struct asus_wmi *asus = dev_get_drvdata(device);
if (!IS_ERR_OR_NULL(asus->kbd_led.dev))
- queue_work(asus->led_workqueue, &asus->kbd_led_work);
+ kbd_led_update(asus);
return 0;
}
@@ -2327,7 +2246,7 @@ static int asus_hotk_restore(struct device *device)
rfkill_set_sw_state(asus->uwb.rfkill, bl);
}
if (!IS_ERR_OR_NULL(asus->kbd_led.dev))
- queue_work(asus->led_workqueue, &asus->kbd_led_work);
+ kbd_led_update(asus);
return 0;
}
diff --git a/drivers/firmware/dcdbas.c b/drivers/platform/x86/dcdbas.c
similarity index 82%
rename from drivers/firmware/dcdbas.c
rename to drivers/platform/x86/dcdbas.c
index 0bdea60..88bd7ef 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/platform/x86/dcdbas.c
@@ -21,11 +21,13 @@
*/
#include <linux/platform_device.h>
+#include <linux/acpi.h>
#include <linux/dma-mapping.h>
#include <linux/errno.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/mc146818rtc.h>
#include <linux/module.h>
@@ -36,12 +38,11 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mutex.h>
-#include <asm/io.h>
#include "dcdbas.h"
#define DRIVER_NAME "dcdbas"
-#define DRIVER_VERSION "5.6.0-3.2"
+#define DRIVER_VERSION "5.6.0-3.3"
#define DRIVER_DESCRIPTION "Dell Systems Management Base Driver"
static struct platform_device *dcdbas_pdev;
@@ -49,19 +50,23 @@ static struct platform_device *dcdbas_pdev;
static u8 *smi_data_buf;
static dma_addr_t smi_data_buf_handle;
static unsigned long smi_data_buf_size;
+static unsigned long max_smi_data_buf_size = MAX_SMI_DATA_BUF_SIZE;
static u32 smi_data_buf_phys_addr;
static DEFINE_MUTEX(smi_data_lock);
+static u8 *eps_buffer;
static unsigned int host_control_action;
static unsigned int host_control_smi_type;
static unsigned int host_control_on_shutdown;
+static bool wsmt_enabled;
+
/**
* smi_data_buf_free: free SMI data buffer
*/
static void smi_data_buf_free(void)
{
- if (!smi_data_buf)
+ if (!smi_data_buf || wsmt_enabled)
return;
dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n",
@@ -86,7 +91,7 @@ static int smi_data_buf_realloc(unsigned long size)
if (smi_data_buf_size >= size)
return 0;
- if (size > MAX_SMI_DATA_BUF_SIZE)
+ if (size > max_smi_data_buf_size)
return -EINVAL;
/* new buffer is needed */
@@ -169,7 +174,7 @@ static ssize_t smi_data_write(struct file *filp, struct kobject *kobj,
{
ssize_t ret;
- if ((pos + count) > MAX_SMI_DATA_BUF_SIZE)
+ if ((pos + count) > max_smi_data_buf_size)
return -EINVAL;
mutex_lock(&smi_data_lock);
@@ -322,8 +327,20 @@ static ssize_t smi_request_store(struct device *dev,
ret = count;
break;
case 1:
- /* Calling Interface SMI */
- smi_cmd->ebx = (u32) virt_to_phys(smi_cmd->command_buffer);
+ /*
+ * Calling Interface SMI
+ *
+ * Provide physical address of command buffer field within
+ * the struct smi_cmd to BIOS.
+ *
+ * Because the address that smi_cmd (smi_data_buf) points to
+ * will be from memremap() of a non-memory address if WSMT
+ * is present, we can't use virt_to_phys() on smi_cmd, so
+ * we have to use the physical address that was saved when
+ * the virtual address for smi_cmd was received.
+ */
+ smi_cmd->ebx = smi_data_buf_phys_addr +
+ offsetof(struct smi_cmd, command_buffer);
ret = dcdbas_smi_request(smi_cmd);
if (!ret)
ret = count;
@@ -482,6 +499,93 @@ static void dcdbas_host_control(void)
}
}
+/* WSMT */
+
+static u8 checksum(u8 *buffer, u8 length)
+{
+ u8 sum = 0;
+ u8 *end = buffer + length;
+
+ while (buffer < end)
+ sum += *buffer++;
+ return sum;
+}
+
+static inline struct smm_eps_table *check_eps_table(u8 *addr)
+{
+ struct smm_eps_table *eps = (struct smm_eps_table *)addr;
+
+ if (strncmp(eps->smm_comm_buff_anchor, SMM_EPS_SIG, 4) != 0)
+ return NULL;
+
+ if (checksum(addr, eps->length) != 0)
+ return NULL;
+
+ return eps;
+}
+
+static int dcdbas_check_wsmt(void)
+{
+ struct acpi_table_wsmt *wsmt = NULL;
+ struct smm_eps_table *eps = NULL;
+ u64 remap_size;
+ u8 *addr;
+
+ acpi_get_table(ACPI_SIG_WSMT, 0, (struct acpi_table_header **)&wsmt);
+ if (!wsmt)
+ return 0;
+
+ /* Check if WSMT ACPI table shows that protection is enabled */
+ if (!(wsmt->protection_flags & ACPI_WSMT_FIXED_COMM_BUFFERS) ||
+ !(wsmt->protection_flags & ACPI_WSMT_COMM_BUFFER_NESTED_PTR_PROTECTION))
+ return 0;
+
+ /* Scan for EPS (entry point structure) */
+ for (addr = (u8 *)__va(0xf0000);
+ addr < (u8 *)__va(0x100000 - sizeof(struct smm_eps_table));
+ addr += 16) {
+ eps = check_eps_table(addr);
+ if (eps)
+ break;
+ }
+
+ if (!eps) {
+ dev_dbg(&dcdbas_pdev->dev, "found WSMT, but no EPS found\n");
+ return -ENODEV;
+ }
+
+ /*
+ * Get physical address of buffer and map to virtual address.
+ * Table gives size in 4K pages, regardless of actual system page size.
+ */
+ if (upper_32_bits(eps->smm_comm_buff_addr + 8)) {
+ dev_warn(&dcdbas_pdev->dev, "found WSMT, but EPS buffer address is above 4GB\n");
+ return -EINVAL;
+ }
+ /*
+ * Limit remap size to MAX_SMI_DATA_BUF_SIZE + 8 (since the first 8
+ * bytes are used for a semaphore, not the data buffer itself).
+ */
+ remap_size = eps->num_of_4k_pages * PAGE_SIZE;
+ if (remap_size > MAX_SMI_DATA_BUF_SIZE + 8)
+ remap_size = MAX_SMI_DATA_BUF_SIZE + 8;
+ eps_buffer = memremap(eps->smm_comm_buff_addr, remap_size, MEMREMAP_WB);
+ if (!eps_buffer) {
+ dev_warn(&dcdbas_pdev->dev, "found WSMT, but failed to map EPS buffer\n");
+ return -ENOMEM;
+ }
+
+ /* First 8 bytes is for a semaphore, not part of the smi_data_buf */
+ smi_data_buf_phys_addr = eps->smm_comm_buff_addr + 8;
+ smi_data_buf = eps_buffer + 8;
+ smi_data_buf_size = remap_size - 8;
+ max_smi_data_buf_size = smi_data_buf_size;
+ wsmt_enabled = true;
+ dev_info(&dcdbas_pdev->dev,
+ "WSMT found, using firmware-provided SMI buffer.\n");
+ return 1;
+}
+
/**
* dcdbas_reboot_notify: handle reboot notification for host control
*/
@@ -548,6 +652,11 @@ static int dcdbas_probe(struct platform_device *dev)
dcdbas_pdev = dev;
+ /* Check if ACPI WSMT table specifies protected SMI buffer address */
+ error = dcdbas_check_wsmt();
+ if (error < 0)
+ return error;
+
/*
* BIOS SMI calls require buffer addresses be in 32-bit address space.
* This is done by setting the DMA mask below.
@@ -635,6 +744,8 @@ static void __exit dcdbas_exit(void)
*/
if (dcdbas_pdev)
smi_data_buf_free();
+ if (eps_buffer)
+ memunmap(eps_buffer);
platform_device_unregister(dcdbas_pdev_reg);
platform_driver_unregister(&dcdbas_driver);
}
diff --git a/drivers/firmware/dcdbas.h b/drivers/platform/x86/dcdbas.h
similarity index 93%
rename from drivers/firmware/dcdbas.h
rename to drivers/platform/x86/dcdbas.h
index ca3cb0a..52729a4 100644
--- a/drivers/firmware/dcdbas.h
+++ b/drivers/platform/x86/dcdbas.h
@@ -53,6 +53,7 @@
#define EXPIRED_TIMER (0)
#define SMI_CMD_MAGIC (0x534D4931)
+#define SMM_EPS_SIG "$SCB"
#define DCDBAS_DEV_ATTR_RW(_name) \
DEVICE_ATTR(_name,0600,_name##_show,_name##_store);
@@ -103,5 +104,14 @@ struct apm_cmd {
int dcdbas_smi_request(struct smi_cmd *smi_cmd);
+struct smm_eps_table {
+ char smm_comm_buff_anchor[4];
+ u8 length;
+ u8 checksum;
+ u8 version;
+ u64 smm_comm_buff_addr;
+ u64 num_of_4k_pages;
+} __packed;
+
#endif /* _DCDBAS_H_ */
diff --git a/drivers/platform/x86/dell-smbios-smm.c b/drivers/platform/x86/dell-smbios-smm.c
index 97a90be..ab9b822 100644
--- a/drivers/platform/x86/dell-smbios-smm.c
+++ b/drivers/platform/x86/dell-smbios-smm.c
@@ -18,7 +18,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
-#include "../../firmware/dcdbas.h"
+#include "dcdbas.h"
#include "dell-smbios.h"
static int da_command_address;
diff --git a/drivers/firmware/dell_rbu.c b/drivers/platform/x86/dell_rbu.c
similarity index 98%
rename from drivers/firmware/dell_rbu.c
rename to drivers/platform/x86/dell_rbu.c
index fb8af5c..ccefa84 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/platform/x86/dell_rbu.c
@@ -45,6 +45,7 @@
#include <linux/moduleparam.h>
#include <linux/firmware.h>
#include <linux/dma-mapping.h>
+#include <asm/set_memory.h>
MODULE_AUTHOR("Abhay Salunke <abhay_salunke@dell.com>");
MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems");
@@ -181,6 +182,11 @@ static int create_packet(void *data, size_t length)
packet_data_temp_buf = NULL;
}
}
+ /*
+ * set to uncachable or it may never get written back before reboot
+ */
+ set_memory_uc((unsigned long)packet_data_temp_buf, 1 << ordernum);
+
spin_lock(&rbu_data.lock);
newpacket->data = packet_data_temp_buf;
@@ -349,6 +355,8 @@ static void packet_empty_list(void)
* to make sure there are no stale RBU packets left in memory
*/
memset(newpacket->data, 0, rbu_data.packetsize);
+ set_memory_wb((unsigned long)newpacket->data,
+ 1 << newpacket->ordernum);
free_pages((unsigned long) newpacket->data,
newpacket->ordernum);
kfree(newpacket);
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index d4f1259..b6489cb 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -212,7 +212,7 @@ static int read_ec_data(acpi_handle handle, int cmd, unsigned long *data)
return 0;
}
}
- pr_err("timeout in read_ec_cmd\n");
+ pr_err("timeout in %s\n", __func__);
return -1;
}
@@ -1147,6 +1147,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
},
},
{
+ .ident = "Lenovo Legion Y530-15ICH",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion Y530-15ICH"),
+ },
+ },
+ {
.ident = "Lenovo Legion Y720-15IKB",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index 6cf9b7f..e28bcf6 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Intel HID event & 5 button array driver
*
* Copyright (C) 2015 Alex Hung <alex.hung@canonical.com>
* Copyright (C) 2015 Andrew Lutomirski <luto@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/acpi.h>
diff --git a/drivers/platform/x86/intel-rst.c b/drivers/platform/x86/intel-rst.c
index 7344d84..3b81cb8 100644
--- a/drivers/platform/x86/intel-rst.c
+++ b/drivers/platform/x86/intel-rst.c
@@ -1,26 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2013 Matthew Garrett <mjg59@srcf.ucam.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
-
-#include <linux/init.h>
+#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/acpi.h>
MODULE_LICENSE("GPL");
@@ -53,12 +38,10 @@ static ssize_t irst_store_wakeup_events(struct device *dev,
acpi = to_acpi_device(dev);
error = kstrtoul(buf, 0, &value);
-
if (error)
return error;
status = acpi_execute_simple_method(acpi->handle, "SFFS", value);
-
if (ACPI_FAILURE(status))
return -EINVAL;
@@ -99,12 +82,10 @@ static ssize_t irst_store_wakeup_time(struct device *dev,
acpi = to_acpi_device(dev);
error = kstrtoul(buf, 0, &value);
-
if (error)
return error;
status = acpi_execute_simple_method(acpi->handle, "SFTV", value);
-
if (ACPI_FAILURE(status))
return -EINVAL;
diff --git a/drivers/platform/x86/intel-smartconnect.c b/drivers/platform/x86/intel-smartconnect.c
index bbe4c06..64c2dc9 100644
--- a/drivers/platform/x86/intel-smartconnect.c
+++ b/drivers/platform/x86/intel-smartconnect.c
@@ -1,25 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2013 Matthew Garrett <mjg59@srcf.ucam.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
-
-#include <linux/init.h>
-#include <linux/module.h>
#include <linux/acpi.h>
+#include <linux/module.h>
MODULE_LICENSE("GPL");
@@ -44,6 +29,7 @@ static const struct acpi_device_id smartconnect_ids[] = {
{"INT33A0", 0},
{"", 0}
};
+MODULE_DEVICE_TABLE(acpi, smartconnect_ids);
static struct acpi_driver smartconnect_driver = {
.owner = THIS_MODULE,
@@ -56,5 +42,3 @@ static struct acpi_driver smartconnect_driver = {
};
module_acpi_driver(smartconnect_driver);
-
-MODULE_DEVICE_TABLE(acpi, smartconnect_ids);
diff --git a/drivers/platform/x86/intel-wmi-thunderbolt.c b/drivers/platform/x86/intel-wmi-thunderbolt.c
index c2257bd..9ded8e2 100644
--- a/drivers/platform/x86/intel-wmi-thunderbolt.c
+++ b/drivers/platform/x86/intel-wmi-thunderbolt.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* WMI Thunderbolt driver
*
* Copyright (C) 2017 Dell Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -38,12 +30,16 @@ static ssize_t force_power_store(struct device *dev,
input.length = sizeof(u8);
input.pointer = &mode;
mode = hex_to_bin(buf[0]);
+ dev_dbg(dev, "force_power: storing %#x\n", mode);
if (mode == 0 || mode == 1) {
status = wmi_evaluate_method(INTEL_WMI_THUNDERBOLT_GUID, 0, 1,
&input, NULL);
- if (ACPI_FAILURE(status))
+ if (ACPI_FAILURE(status)) {
+ dev_dbg(dev, "force_power: failed to evaluate ACPI method\n");
return -ENODEV;
+ }
} else {
+ dev_dbg(dev, "force_power: unsupported mode\n");
return -EINVAL;
}
return count;
@@ -95,4 +91,4 @@ module_wmi_driver(intel_wmi_thunderbolt_driver);
MODULE_ALIAS("wmi:" INTEL_WMI_THUNDERBOLT_GUID);
MODULE_AUTHOR("Mario Limonciello <mario.limonciello@dell.com>");
MODULE_DESCRIPTION("Intel WMI Thunderbolt force power driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_atomisp2_pm.c b/drivers/platform/x86/intel_atomisp2_pm.c
new file mode 100644
index 0000000..9371603
--- /dev/null
+++ b/drivers/platform/x86/intel_atomisp2_pm.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dummy driver for Intel's Image Signal Processor found on Bay and Cherry
+ * Trail devices. The sole purpose of this driver is to allow the ISP to
+ * be put in D3.
+ *
+ * Copyright (C) 2018 Hans de Goede <hdegoede@redhat.com>
+ *
+ * Based on various non upstream patches for ISP support:
+ * Copyright (C) 2010-2017 Intel Corporation. All rights reserved.
+ * Copyright (c) 2010 Silicon Hive www.siliconhive.com.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <asm/iosf_mbi.h>
+
+/* PCI configuration regs */
+#define PCI_INTERRUPT_CTRL 0x9c
+
+#define PCI_CSI_CONTROL 0xe8
+#define PCI_CSI_CONTROL_PORTS_OFF_MASK 0x7
+
+/* IOSF BT_MBI_UNIT_PMC regs */
+#define ISPSSPM0 0x39
+#define ISPSSPM0_ISPSSC_OFFSET 0
+#define ISPSSPM0_ISPSSC_MASK 0x00000003
+#define ISPSSPM0_ISPSSS_OFFSET 24
+#define ISPSSPM0_ISPSSS_MASK 0x03000000
+#define ISPSSPM0_IUNIT_POWER_ON 0x0
+#define ISPSSPM0_IUNIT_POWER_OFF 0x3
+
+static int isp_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ unsigned long timeout;
+ u32 val;
+
+ pci_write_config_dword(dev, PCI_INTERRUPT_CTRL, 0);
+
+ /*
+ * MRFLD IUNIT DPHY is located in an always-power-on island
+ * MRFLD HW design need all CSI ports are disabled before
+ * powering down the IUNIT.
+ */
+ pci_read_config_dword(dev, PCI_CSI_CONTROL, &val);
+ val |= PCI_CSI_CONTROL_PORTS_OFF_MASK;
+ pci_write_config_dword(dev, PCI_CSI_CONTROL, val);
+
+ /* Write 0x3 to ISPSSPM0 bit[1:0] to power off the IUNIT */
+ iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, ISPSSPM0,
+ ISPSSPM0_IUNIT_POWER_OFF, ISPSSPM0_ISPSSC_MASK);
+
+ /*
+ * There should be no IUNIT access while power-down is
+ * in progress HW sighting: 4567865
+ * Wait up to 50 ms for the IUNIT to shut down.
+ */
+ timeout = jiffies + msecs_to_jiffies(50);
+ while (1) {
+ /* Wait until ISPSSPM0 bit[25:24] shows 0x3 */
+ iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, ISPSSPM0, &val);
+ val = (val & ISPSSPM0_ISPSSS_MASK) >> ISPSSPM0_ISPSSS_OFFSET;
+ if (val == ISPSSPM0_IUNIT_POWER_OFF)
+ break;
+
+ if (time_after(jiffies, timeout)) {
+ dev_err(&dev->dev, "IUNIT power-off timeout.\n");
+ return -EBUSY;
+ }
+ usleep_range(1000, 2000);
+ }
+
+ pm_runtime_allow(&dev->dev);
+ pm_runtime_put_sync_suspend(&dev->dev);
+
+ return 0;
+}
+
+static void isp_remove(struct pci_dev *dev)
+{
+ pm_runtime_get_sync(&dev->dev);
+ pm_runtime_forbid(&dev->dev);
+}
+
+static int isp_pci_suspend(struct device *dev)
+{
+ return 0;
+}
+
+static int isp_pci_resume(struct device *dev)
+{
+ return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(isp_pm_ops, isp_pci_suspend,
+ isp_pci_resume, NULL);
+
+static const struct pci_device_id isp_id_table[] = {
+ { PCI_VDEVICE(INTEL, 0x22b8), },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, isp_id_table);
+
+static struct pci_driver isp_pci_driver = {
+ .name = "intel_atomisp2_pm",
+ .id_table = isp_id_table,
+ .probe = isp_probe,
+ .remove = isp_remove,
+ .driver.pm = &isp_pm_ops,
+};
+
+module_pci_driver(isp_pci_driver);
+
+MODULE_DESCRIPTION("Intel AtomISP2 dummy / power-management drv (for suspend)");
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_bxtwc_tmu.c b/drivers/platform/x86/intel_bxtwc_tmu.c
index 227943a..951c105 100644
--- a/drivers/platform/x86/intel_bxtwc_tmu.c
+++ b/drivers/platform/x86/intel_bxtwc_tmu.c
@@ -1,21 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * intel_bxtwc_tmu.c - Intel BXT Whiskey Cove PMIC TMU driver
+ * Intel BXT Whiskey Cove PMIC TMU driver
*
* Copyright (C) 2016 Intel Corporation. All rights reserved.
*
* This driver adds TMU (Time Management Unit) support for Intel BXT platform.
* It enables the alarm wake-up functionality in the TMU unit of Whiskey Cove
* PMIC.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/module.h>
diff --git a/drivers/platform/x86/intel_cht_int33fe.c b/drivers/platform/x86/intel_cht_int33fe.c
index f40b1c19..464fe93 100644
--- a/drivers/platform/x86/intel_cht_int33fe.c
+++ b/drivers/platform/x86/intel_cht_int33fe.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel Cherry Trail ACPI INT33FE pseudo device driver
*
* Copyright (C) 2017 Hans de Goede <hdegoede@redhat.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Some Intel Cherry Trail based device which ship with Windows 10, have
* this weird INT33FE ACPI device with a CRS table with 4 I2cSerialBusV2
* resources, for 4 different chips attached to various i2c busses:
@@ -257,4 +254,4 @@ module_platform_driver(cht_int33fe_driver);
MODULE_DESCRIPTION("Intel Cherry Trail ACPI INT33FE pseudo device driver");
MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_chtdc_ti_pwrbtn.c b/drivers/platform/x86/intel_chtdc_ti_pwrbtn.c
index 38b8e7c..0df2e82 100644
--- a/drivers/platform/x86/intel_chtdc_ti_pwrbtn.c
+++ b/drivers/platform/x86/intel_chtdc_ti_pwrbtn.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Power-button driver for Dollar Cove TI PMIC
* Copyright (C) 2014 Intel Corp
diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
index e89ad49..4b8f730 100644
--- a/drivers/platform/x86/intel_int0002_vgpio.c
+++ b/drivers/platform/x86/intel_int0002_vgpio.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel INT0002 "Virtual GPIO" driver
*
@@ -9,10 +10,6 @@
*
* Author: Dyut Kumar Sil <dyut.k.sil@intel.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Some peripherals on Bay Trail and Cherry Trail platforms signal a Power
* Management Event (PME) to the Power Management Controller (PMC) to wakeup
* the system. When this happens software needs to clear the PME bus 0 status
@@ -57,11 +54,7 @@
#define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
static const struct x86_cpu_id int0002_cpu_ids[] = {
-/*
- * Limit ourselves to Cherry Trail for now, until testing shows we
- * need to handle the INT0002 device on Baytrail too.
- * ICPU(INTEL_FAM6_ATOM_SILVERMONT), * Valleyview, Bay Trail *
- */
+ ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */
ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */
{}
};
@@ -110,6 +103,21 @@ static void int0002_irq_mask(struct irq_data *data)
outl(gpe_en_reg, GPE0A_EN_PORT);
}
+static int int0002_irq_set_wake(struct irq_data *data, unsigned int on)
+{
+ struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+ struct platform_device *pdev = to_platform_device(chip->parent);
+ int irq = platform_get_irq(pdev, 0);
+
+ /* Propagate to parent irq */
+ if (on)
+ enable_irq_wake(irq);
+ else
+ disable_irq_wake(irq);
+
+ return 0;
+}
+
static irqreturn_t int0002_irq(int irq, void *data)
{
struct gpio_chip *chip = data;
@@ -132,6 +140,7 @@ static struct irq_chip int0002_irqchip = {
.irq_ack = int0002_irq_ack,
.irq_mask = int0002_irq_mask,
.irq_unmask = int0002_irq_unmask,
+ .irq_set_wake = int0002_irq_set_wake,
};
static int int0002_probe(struct platform_device *pdev)
@@ -216,4 +225,4 @@ module_platform_driver(int0002_driver);
MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
MODULE_DESCRIPTION("Intel INT0002 Virtual GPIO driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index c5ece7e..225638a 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2009-2010 Intel Corporation
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
* Authors:
* Jesse Barnes <jbarnes@virtuousgeek.org>
*/
@@ -1697,6 +1686,6 @@ static struct pci_driver ips_pci_driver = {
module_pci_driver(ips_pci_driver);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
MODULE_DESCRIPTION("Intelligent Power Sharing Driver");
diff --git a/drivers/platform/x86/intel_ips.h b/drivers/platform/x86/intel_ips.h
index 60f4e3d..512ad23 100644
--- a/drivers/platform/x86/intel_ips.h
+++ b/drivers/platform/x86/intel_ips.h
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
*/
void ips_link_to_i915_driver(void);
diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c
index ef9b0af..77eb870 100644
--- a/drivers/platform/x86/intel_menlow.c
+++ b/drivers/platform/x86/intel_menlow.c
@@ -1,25 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * intel_menlow.c - Intel menlow Driver for thermal management extension
+ * Intel menlow Driver for thermal management extension
*
* Copyright (C) 2008 Intel Corp
* Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
* Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* This driver creates the sys I/F for programming the sensors.
* It also implements the driver for intel menlow memory controller (hardware
@@ -29,20 +14,19 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
#include <linux/pci.h>
#include <linux/pm.h>
+#include <linux/slab.h>
#include <linux/thermal.h>
-#include <linux/acpi.h>
+#include <linux/types.h>
MODULE_AUTHOR("Thomas Sujith");
MODULE_AUTHOR("Zhang Rui");
MODULE_DESCRIPTION("Intel Menlow platform specific driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
/*
* Memory controller device control
diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c
index 5ad4420..292bace 100644
--- a/drivers/platform/x86/intel_mid_powerbtn.c
+++ b/drivers/platform/x86/intel_mid_powerbtn.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Power button driver for Intel MID platforms.
*
@@ -5,18 +6,8 @@
*
* Author: Hong Liu <hong.liu@intel.com>
* Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
-#include <linux/init.h>
#include <linux/input.h>
#include <linux/interrupt.h>
#include <linux/mfd/intel_msic.h>
@@ -121,12 +112,9 @@ static const struct mid_pb_ddata mrfld_ddata = {
.setup = mrfld_setup,
};
-#define ICPU(model, ddata) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
-
static const struct x86_cpu_id mid_pb_cpu_ids[] = {
- ICPU(INTEL_FAM6_ATOM_SALTWELL_MID, mfld_ddata),
- ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, mrfld_ddata),
+ INTEL_CPU_FAM6(ATOM_SALTWELL_MID, mfld_ddata),
+ INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, mrfld_ddata),
{}
};
diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c
index 008a769..f402e2e 100644
--- a/drivers/platform/x86/intel_mid_thermal.c
+++ b/drivers/platform/x86/intel_mid_thermal.c
@@ -1,39 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * intel_mid_thermal.c - Intel MID platform thermal driver
+ * Intel MID platform thermal driver
*
* Copyright (C) 2011 Intel Corporation
*
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Author: Durgadoss R <durgadoss.r@intel.com>
*/
#define pr_fmt(fmt) "intel_mid_thermal: " fmt
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/param.h>
#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/pm.h>
-#include <linux/thermal.h>
+#include <linux/err.h>
#include <linux/mfd/intel_msic.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
/* Number of thermal sensors */
#define MSIC_THERMAL_SENSORS 4
@@ -567,4 +551,4 @@ module_platform_driver(mid_thermal_driver);
MODULE_AUTHOR("Durgadoss R <durgadoss.r@intel.com>");
MODULE_DESCRIPTION("Intel Medfield Platform Thermal Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c
index 5747f63..3c0438b 100644
--- a/drivers/platform/x86/intel_oaktrail.c
+++ b/drivers/platform/x86/intel_oaktrail.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * intel_oaktrail.c - Intel OakTrail Platform support.
+ * Intel OakTrail Platform support
*
* Copyright (C) 2010-2011 Intel Corporation
* Author: Yin Kangkai (kangkai.yin@intel.com)
@@ -8,21 +9,6 @@
* <cezary.jackiewicz (at) gmail.com>, based on MSI driver
* Copyright (C) 2006 Lennart Poettering <mzxreary (at) 0pointer (dot) de>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
* This driver does below things:
* 1. registers itself in the Linux backlight control in
* /sys/class/backlight/intel_oaktrail/
@@ -38,18 +24,18 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
#include <linux/acpi.h>
-#include <linux/fb.h>
-#include <linux/mutex.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
#include <linux/backlight.h>
-#include <linux/platform_device.h>
#include <linux/dmi.h>
+#include <linux/err.h>
+#include <linux/fb.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
#include <linux/rfkill.h>
+
#include <acpi/video.h>
#define DRIVER_NAME "intel_oaktrail"
diff --git a/drivers/platform/x86/intel_pmc_core.c b/drivers/platform/x86/intel_pmc_core.c
index 2d272a3..6b31d41 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel Core SoC Power Management Controller Driver
*
@@ -6,16 +7,6 @@
*
* Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
* Vishwanath Somayaji <vishwanath.somayaji@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/platform/x86/intel_pmc_core.h b/drivers/platform/x86/intel_pmc_core.h
index 93a7e99..be04534 100644
--- a/drivers/platform/x86/intel_pmc_core.h
+++ b/drivers/platform/x86/intel_pmc_core.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel Core SoC Power Management Controller Header File
*
@@ -6,16 +7,6 @@
*
* Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
* Vishwanath Somayaji <vishwanath.somayaji@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
*/
#ifndef PMC_CORE_H
diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c
index e7edc8c..7964ba2 100644
--- a/drivers/platform/x86/intel_pmc_ipc.c
+++ b/drivers/platform/x86/intel_pmc_ipc.c
@@ -1,39 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * intel_pmc_ipc.c: Driver for the Intel PMC IPC mechanism
+ * Driver for the Intel PMC IPC mechanism
*
* (C) Copyright 2014-2015 Intel Corporation
*
- * This driver is based on Intel SCU IPC driver(intel_scu_opc.c) by
+ * This driver is based on Intel SCU IPC driver(intel_scu_ipc.c) by
* Sreedhara DS <sreedhara.ds@intel.com>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* PMC running in ARC processor communicates with other entity running in IA
* core through IPC mechanism which in turn messaging between IA core ad PMC.
*/
-#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/init.h>
#include <linux/device.h>
-#include <linux/pm.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
-#include <linux/interrupt.h>
+#include <linux/pm.h>
#include <linux/pm_qos.h>
-#include <linux/kernel.h>
-#include <linux/bitops.h>
#include <linux/sched.h>
-#include <linux/atomic.h>
-#include <linux/notifier.h>
-#include <linux/suspend.h>
-#include <linux/acpi.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/spinlock.h>
+#include <linux/suspend.h>
#include <asm/intel_pmc_ipc.h>
@@ -1029,7 +1024,7 @@ static void __exit intel_pmc_ipc_exit(void)
MODULE_AUTHOR("Zha Qipeng <qipeng.zha@intel.com>");
MODULE_DESCRIPTION("Intel PMC IPC driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
/* Some modules are dependent on this, so init earlier */
fs_initcall(intel_pmc_ipc_init);
diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c
index 2efeab6..7967192 100644
--- a/drivers/platform/x86/intel_punit_ipc.c
+++ b/drivers/platform/x86/intel_punit_ipc.c
@@ -1,25 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Driver for the Intel P-Unit Mailbox IPC mechanism
*
* (C) Copyright 2015 Intel Corporation
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* The heart of the P-Unit is the Foxton microcontroller and its firmware,
* which provide mailbox interface for power management usage.
*/
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
#include <linux/acpi.h>
-#include <linux/delay.h>
#include <linux/bitops.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
#include <linux/platform_device.h>
+
#include <asm/intel_punit_ipc.h>
/* IPC Mailbox registers */
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index 75c8fef..cdab916 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * intel_scu_ipc.c: Driver for the Intel SCU IPC mechanism
+ * Driver for the Intel SCU IPC mechanism
*
* (C) Copyright 2008-2010,2015 Intel Corporation
* Author: Sreedhara DS (sreedhara.ds@intel.com)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
* SCU running in ARC processor communicates with other entity running in IA
* core through IPC mechanism which in turn messaging between IA core ad SCU.
* SCU has two IPC mechanism IPC-1 and IPC-2. IPC-1 is used between IA32 and
@@ -16,14 +12,16 @@
* IPC-1 Driver provides an API for power control unit registers (e.g. MSIC)
* along with other APIs.
*/
+
#include <linux/delay.h>
+#include <linux/device.h>
#include <linux/errno.h>
#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pm.h>
-#include <linux/pci.h>
#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/pm.h>
#include <linux/sfi.h>
+
#include <asm/intel-mid.h>
#include <asm/intel_scu_ipc.h>
diff --git a/drivers/platform/x86/intel_scu_ipcutil.c b/drivers/platform/x86/intel_scu_ipcutil.c
index aa45424..8afe6fa 100644
--- a/drivers/platform/x86/intel_scu_ipcutil.c
+++ b/drivers/platform/x86/intel_scu_ipcutil.c
@@ -1,32 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * intel_scu_ipc.c: Driver for the Intel SCU IPC mechanism
+ * Driver for the Intel SCU IPC mechanism
*
* (C) Copyright 2008-2010 Intel Corporation
* Author: Sreedhara DS (sreedhara.ds@intel.com)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * This driver provides ioctl interfaces to call intel scu ipc driver api
+ * This driver provides IOCTL interfaces to call Intel SCU IPC driver API.
*/
-#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/fs.h>
#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/uaccess.h>
#include <linux/slab.h>
-#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
#include <asm/intel_scu_ipc.h>
static int major;
-/* ioctl commnds */
+/* IOCTL commands */
#define INTE_SCU_IPC_REGISTER_READ 0
#define INTE_SCU_IPC_REGISTER_WRITE 1
#define INTE_SCU_IPC_REGISTER_UPDATE 2
diff --git a/drivers/platform/x86/intel_telemetry_core.c b/drivers/platform/x86/intel_telemetry_core.c
index f378621..d4040bb 100644
--- a/drivers/platform/x86/intel_telemetry_core.c
+++ b/drivers/platform/x86/intel_telemetry_core.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel SoC Core Telemetry Driver
* Copyright (C) 2015, Intel Corporation.
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* Telemetry Framework provides platform related PM and performance statistics.
* This file provides the core telemetry API implementation.
*/
@@ -460,4 +452,4 @@ module_exit(telemetry_module_exit);
MODULE_AUTHOR("Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>");
MODULE_DESCRIPTION("Intel SoC Telemetry Interface");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index cee08f2..40bce56 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel SOC Telemetry debugfs Driver: Currently supports APL
* Copyright (c) 2015, Intel Corporation.
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* This file provides the debugfs interfaces for telemetry.
* /sys/kernel/debug/telemetry/pss_info: Shows Primary Control Sub-Sys Counters
* /sys/kernel/debug/telemetry/ioss_info: Shows IO Sub-System Counters
@@ -72,9 +64,6 @@
#define TELEM_IOSS_DX_D0IX_EVTS 25
#define TELEM_IOSS_PG_EVTS 30
-#define TELEM_DEBUGFS_CPU(model, data) \
- { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&data}
-
#define TELEM_CHECK_AND_PARSE_EVTS(EVTID, EVTNUM, BUF, EVTLOG, EVTDAT, MASK) { \
if (evtlog[index].telem_evtid == (EVTID)) { \
for (idx = 0; idx < (EVTNUM); idx++) \
@@ -319,8 +308,8 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = {
};
static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = {
- TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf),
- TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf),
+ INTEL_CPU_FAM6(ATOM_GOLDMONT, telem_apl_debugfs_conf),
+ INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf),
{}
};
@@ -951,12 +940,16 @@ static int __init telemetry_debugfs_init(void)
debugfs_conf = (struct telemetry_debugfs_conf *)id->driver_data;
err = telemetry_pltconfig_valid();
- if (err < 0)
+ if (err < 0) {
+ pr_info("Invalid pltconfig, ensure IPC1 device is enabled in BIOS\n");
return -ENODEV;
+ }
err = telemetry_debugfs_check_evts();
- if (err < 0)
+ if (err < 0) {
+ pr_info("telemetry_debugfs_check_evts failed\n");
return -EINVAL;
+ }
register_pm_notifier(&pm_notifier);
@@ -1037,4 +1030,4 @@ module_exit(telemetry_debugfs_exit);
MODULE_AUTHOR("Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>");
MODULE_DESCRIPTION("Intel SoC Telemetry debugfs Interface");
MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
index fcc6bee..df8565b 100644
--- a/drivers/platform/x86/intel_telemetry_pltdrv.c
+++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel SOC Telemetry Platform Driver: Currently supports APL
* Copyright (c) 2015, Intel Corporation.
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
* This file provides the platform specific telemetry implementation for APL.
* It used the PUNIT and PMC IPC interfaces for configuring the counters.
* The accumulated results are fetched from SRAM.
@@ -1242,4 +1234,4 @@ module_exit(telemetry_module_exit);
MODULE_AUTHOR("Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>");
MODULE_DESCRIPTION("Intel SoC Telemetry Platform Driver");
MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/platform/x86/intel_turbo_max_3.c b/drivers/platform/x86/intel_turbo_max_3.c
index a6d5aa0..7b9cc84 100644
--- a/drivers/platform/x86/intel_turbo_max_3.c
+++ b/drivers/platform/x86/intel_turbo_max_3.c
@@ -1,28 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel Turbo Boost Max Technology 3.0 legacy (non HWP) enumeration driver
* Copyright (c) 2017, Intel Corporation.
* All rights reserved.
*
* Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <linux/cpuhotplug.h>
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/topology.h>
#include <linux/workqueue.h>
-#include <linux/cpuhotplug.h>
-#include <linux/cpufeature.h>
+
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
new file mode 100644
index 0000000..c0bb1f8
--- /dev/null
+++ b/drivers/platform/x86/lg-laptop.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * lg-laptop.c - LG Gram ACPI features and hotkeys Driver
+ *
+ * Copyright (C) 2018 Matan Ziv-Av <matan@svgalib.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define LED_DEVICE(_name, max) struct led_classdev _name = { \
+ .name = __stringify(_name), \
+ .max_brightness = max, \
+ .brightness_set = _name##_set, \
+ .brightness_get = _name##_get, \
+}
+
+MODULE_AUTHOR("Matan Ziv-Av");
+MODULE_DESCRIPTION("LG WMI Hotkey Driver");
+MODULE_LICENSE("GPL");
+
+#define WMI_EVENT_GUID0 "E4FB94F9-7F2B-4173-AD1A-CD1D95086248"
+#define WMI_EVENT_GUID1 "023B133E-49D1-4E10-B313-698220140DC2"
+#define WMI_EVENT_GUID2 "37BE1AC0-C3F2-4B1F-BFBE-8FDEAF2814D6"
+#define WMI_EVENT_GUID3 "911BAD44-7DF8-4FBB-9319-BABA1C4B293B"
+#define WMI_METHOD_WMAB "C3A72B38-D3EF-42D3-8CBB-D5A57049F66D"
+#define WMI_METHOD_WMBB "2B4F501A-BD3C-4394-8DCF-00A7D2BC8210"
+#define WMI_EVENT_GUID WMI_EVENT_GUID0
+
+#define WMAB_METHOD "\\XINI.WMAB"
+#define WMBB_METHOD "\\XINI.WMBB"
+#define SB_GGOV_METHOD "\\_SB.GGOV"
+#define GOV_TLED 0x2020008
+#define WM_GET 1
+#define WM_SET 2
+#define WM_KEY_LIGHT 0x400
+#define WM_TLED 0x404
+#define WM_FN_LOCK 0x407
+#define WM_BATT_LIMIT 0x61
+#define WM_READER_MODE 0xBF
+#define WM_FAN_MODE 0x33
+#define WMBB_USB_CHARGE 0x10B
+#define WMBB_BATT_LIMIT 0x10C
+
+#define PLATFORM_NAME "lg-laptop"
+
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID0);
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID1);
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID2);
+MODULE_ALIAS("wmi:" WMI_EVENT_GUID3);
+MODULE_ALIAS("wmi:" WMI_METHOD_WMAB);
+MODULE_ALIAS("wmi:" WMI_METHOD_WMBB);
+MODULE_ALIAS("acpi*:LGEX0815:*");
+
+static struct platform_device *pf_device;
+static struct input_dev *wmi_input_dev;
+
+static u32 inited;
+#define INIT_INPUT_WMI_0 0x01
+#define INIT_INPUT_WMI_2 0x02
+#define INIT_INPUT_ACPI 0x04
+#define INIT_TPAD_LED 0x08
+#define INIT_KBD_LED 0x10
+#define INIT_SPARSE_KEYMAP 0x80
+
+static const struct key_entry wmi_keymap[] = {
+ {KE_KEY, 0x70, {KEY_F15} }, /* LG control panel (F1) */
+ {KE_KEY, 0x74, {KEY_F13} }, /* Touchpad toggle (F5) */
+ {KE_KEY, 0xf020000, {KEY_F14} }, /* Read mode (F9) */
+ {KE_KEY, 0x10000000, {KEY_F16} },/* Keyboard backlight (F8) - pressing
+ * this key both sends an event and
+ * changes backlight level.
+ */
+ {KE_KEY, 0x80, {KEY_RFKILL} },
+ {KE_END, 0}
+};
+
+static int ggov(u32 arg0)
+{
+ union acpi_object args[1];
+ union acpi_object *r;
+ acpi_status status;
+ acpi_handle handle;
+ struct acpi_object_list arg;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ int res;
+
+ args[0].type = ACPI_TYPE_INTEGER;
+ args[0].integer.value = arg0;
+
+ status = acpi_get_handle(NULL, (acpi_string) SB_GGOV_METHOD, &handle);
+ if (ACPI_FAILURE(status)) {
+ pr_err("Cannot get handle");
+ return -ENODEV;
+ }
+
+ arg.count = 1;
+ arg.pointer = args;
+
+ status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_handle_err(handle, "GGOV: call failed.\n");
+ return -EINVAL;
+ }
+
+ r = buffer.pointer;
+ if (r->type != ACPI_TYPE_INTEGER) {
+ kfree(r);
+ return -EINVAL;
+ }
+
+ res = r->integer.value;
+ kfree(r);
+
+ return res;
+}
+
+static union acpi_object *lg_wmab(u32 method, u32 arg1, u32 arg2)
+{
+ union acpi_object args[3];
+ acpi_status status;
+ acpi_handle handle;
+ struct acpi_object_list arg;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+ args[0].type = ACPI_TYPE_INTEGER;
+ args[0].integer.value = method;
+ args[1].type = ACPI_TYPE_INTEGER;
+ args[1].integer.value = arg1;
+ args[2].type = ACPI_TYPE_INTEGER;
+ args[2].integer.value = arg2;
+
+ status = acpi_get_handle(NULL, (acpi_string) WMAB_METHOD, &handle);
+ if (ACPI_FAILURE(status)) {
+ pr_err("Cannot get handle");
+ return NULL;
+ }
+
+ arg.count = 3;
+ arg.pointer = args;
+
+ status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_handle_err(handle, "WMAB: call failed.\n");
+ return NULL;
+ }
+
+ return buffer.pointer;
+}
+
+static union acpi_object *lg_wmbb(u32 method_id, u32 arg1, u32 arg2)
+{
+ union acpi_object args[3];
+ acpi_status status;
+ acpi_handle handle;
+ struct acpi_object_list arg;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ u8 buf[32];
+
+ *(u32 *)buf = method_id;
+ *(u32 *)(buf + 4) = arg1;
+ *(u32 *)(buf + 16) = arg2;
+ args[0].type = ACPI_TYPE_INTEGER;
+ args[0].integer.value = 0; /* ignored */
+ args[1].type = ACPI_TYPE_INTEGER;
+ args[1].integer.value = 1; /* Must be 1 or 2. Does not matter which */
+ args[2].type = ACPI_TYPE_BUFFER;
+ args[2].buffer.length = 32;
+ args[2].buffer.pointer = buf;
+
+ status = acpi_get_handle(NULL, (acpi_string)WMBB_METHOD, &handle);
+ if (ACPI_FAILURE(status)) {
+ pr_err("Cannot get handle");
+ return NULL;
+ }
+
+ arg.count = 3;
+ arg.pointer = args;
+
+ status = acpi_evaluate_object(handle, NULL, &arg, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_handle_err(handle, "WMAB: call failed.\n");
+ return NULL;
+ }
+
+ return (union acpi_object *)buffer.pointer;
+}
+
+static void wmi_notify(u32 value, void *context)
+{
+ struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *obj;
+ acpi_status status;
+ long data = (long)context;
+
+ pr_debug("event guid %li\n", data);
+ status = wmi_get_event_data(value, &response);
+ if (ACPI_FAILURE(status)) {
+ pr_err("Bad event status 0x%x\n", status);
+ return;
+ }
+
+ obj = (union acpi_object *)response.pointer;
+ if (!obj)
+ return;
+
+ if (obj->type == ACPI_TYPE_INTEGER) {
+ int eventcode = obj->integer.value;
+ struct key_entry *key;
+
+ key =
+ sparse_keymap_entry_from_scancode(wmi_input_dev, eventcode);
+ if (key && key->type == KE_KEY)
+ sparse_keymap_report_entry(wmi_input_dev, key, 1, true);
+ }
+
+ pr_debug("Type: %i Eventcode: 0x%llx\n", obj->type,
+ obj->integer.value);
+ kfree(response.pointer);
+}
+
+static void wmi_input_setup(void)
+{
+ acpi_status status;
+
+ wmi_input_dev = input_allocate_device();
+ if (wmi_input_dev) {
+ wmi_input_dev->name = "LG WMI hotkeys";
+ wmi_input_dev->phys = "wmi/input0";
+ wmi_input_dev->id.bustype = BUS_HOST;
+
+ if (sparse_keymap_setup(wmi_input_dev, wmi_keymap, NULL) ||
+ input_register_device(wmi_input_dev)) {
+ pr_info("Cannot initialize input device");
+ input_free_device(wmi_input_dev);
+ return;
+ }
+
+ inited |= INIT_SPARSE_KEYMAP;
+ status = wmi_install_notify_handler(WMI_EVENT_GUID0, wmi_notify,
+ (void *)0);
+ if (ACPI_SUCCESS(status))
+ inited |= INIT_INPUT_WMI_0;
+
+ status = wmi_install_notify_handler(WMI_EVENT_GUID2, wmi_notify,
+ (void *)2);
+ if (ACPI_SUCCESS(status))
+ inited |= INIT_INPUT_WMI_2;
+ } else {
+ pr_info("Cannot allocate input device");
+ }
+}
+
+static void acpi_notify(struct acpi_device *device, u32 event)
+{
+ struct key_entry *key;
+
+ acpi_handle_debug(device->handle, "notify: %d\n", event);
+ if (inited & INIT_SPARSE_KEYMAP) {
+ key = sparse_keymap_entry_from_scancode(wmi_input_dev, 0x80);
+ if (key && key->type == KE_KEY)
+ sparse_keymap_report_entry(wmi_input_dev, key, 1, true);
+ }
+}
+
+static ssize_t fan_mode_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buffer, size_t count)
+{
+ bool value;
+ union acpi_object *r;
+ u32 m;
+ int ret;
+
+ ret = kstrtobool(buffer, &value);
+ if (ret)
+ return ret;
+
+ r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+ if (!r)
+ return -EIO;
+
+ if (r->type != ACPI_TYPE_INTEGER) {
+ kfree(r);
+ return -EIO;
+ }
+
+ m = r->integer.value;
+ kfree(r);
+ r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xffffff0f) | (value << 4));
+ kfree(r);
+ r = lg_wmab(WM_FAN_MODE, WM_SET, (m & 0xfffffff0) | value);
+ kfree(r);
+
+ return count;
+}
+
+static ssize_t fan_mode_show(struct device *dev,
+ struct device_attribute *attr, char *buffer)
+{
+ unsigned int status;
+ union acpi_object *r;
+
+ r = lg_wmab(WM_FAN_MODE, WM_GET, 0);
+ if (!r)
+ return -EIO;
+
+ if (r->type != ACPI_TYPE_INTEGER) {
+ kfree(r);
+ return -EIO;
+ }
+
+ status = r->integer.value & 0x01;
+ kfree(r);
+
+ return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t usb_charge_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buffer, size_t count)
+{
+ bool value;
+ union acpi_object *r;
+ int ret;
+
+ ret = kstrtobool(buffer, &value);
+ if (ret)
+ return ret;
+
+ r = lg_wmbb(WMBB_USB_CHARGE, WM_SET, value);
+ if (!r)
+ return -EIO;
+
+ kfree(r);
+ return count;
+}
+
+static ssize_t usb_charge_show(struct device *dev,
+ struct device_attribute *attr, char *buffer)
+{
+ unsigned int status;
+ union acpi_object *r;
+
+ r = lg_wmbb(WMBB_USB_CHARGE, WM_GET, 0);
+ if (!r)
+ return -EIO;
+
+ if (r->type != ACPI_TYPE_BUFFER) {
+ kfree(r);
+ return -EIO;
+ }
+
+ status = !!r->buffer.pointer[0x10];
+
+ kfree(r);
+
+ return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t reader_mode_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buffer, size_t count)
+{
+ bool value;
+ union acpi_object *r;
+ int ret;
+
+ ret = kstrtobool(buffer, &value);
+ if (ret)
+ return ret;
+
+ r = lg_wmab(WM_READER_MODE, WM_SET, value);
+ if (!r)
+ return -EIO;
+
+ kfree(r);
+ return count;
+}
+
+static ssize_t reader_mode_show(struct device *dev,
+ struct device_attribute *attr, char *buffer)
+{
+ unsigned int status;
+ union acpi_object *r;
+
+ r = lg_wmab(WM_READER_MODE, WM_GET, 0);
+ if (!r)
+ return -EIO;
+
+ if (r->type != ACPI_TYPE_INTEGER) {
+ kfree(r);
+ return -EIO;
+ }
+
+ status = !!r->integer.value;
+
+ kfree(r);
+
+ return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t fn_lock_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buffer, size_t count)
+{
+ bool value;
+ union acpi_object *r;
+ int ret;
+
+ ret = kstrtobool(buffer, &value);
+ if (ret)
+ return ret;
+
+ r = lg_wmab(WM_FN_LOCK, WM_SET, value);
+ if (!r)
+ return -EIO;
+
+ kfree(r);
+ return count;
+}
+
+static ssize_t fn_lock_show(struct device *dev,
+ struct device_attribute *attr, char *buffer)
+{
+ unsigned int status;
+ union acpi_object *r;
+
+ r = lg_wmab(WM_FN_LOCK, WM_GET, 0);
+ if (!r)
+ return -EIO;
+
+ if (r->type != ACPI_TYPE_BUFFER) {
+ kfree(r);
+ return -EIO;
+ }
+
+ status = !!r->buffer.pointer[0];
+ kfree(r);
+
+ return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static ssize_t battery_care_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buffer, size_t count)
+{
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buffer, 10, &value);
+ if (ret)
+ return ret;
+
+ if (value == 100 || value == 80) {
+ union acpi_object *r;
+
+ r = lg_wmab(WM_BATT_LIMIT, WM_SET, value);
+ if (!r)
+ return -EIO;
+
+ kfree(r);
+ return count;
+ }
+
+ return -EINVAL;
+}
+
+static ssize_t battery_care_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buffer)
+{
+ unsigned int status;
+ union acpi_object *r;
+
+ r = lg_wmab(WM_BATT_LIMIT, WM_GET, 0);
+ if (!r)
+ return -EIO;
+
+ if (r->type != ACPI_TYPE_INTEGER) {
+ kfree(r);
+ return -EIO;
+ }
+
+ status = r->integer.value;
+ kfree(r);
+ if (status != 80 && status != 100)
+ status = 0;
+
+ return snprintf(buffer, PAGE_SIZE, "%d\n", status);
+}
+
+static DEVICE_ATTR_RW(fan_mode);
+static DEVICE_ATTR_RW(usb_charge);
+static DEVICE_ATTR_RW(reader_mode);
+static DEVICE_ATTR_RW(fn_lock);
+static DEVICE_ATTR_RW(battery_care_limit);
+
+static struct attribute *dev_attributes[] = {
+ &dev_attr_fan_mode.attr,
+ &dev_attr_usb_charge.attr,
+ &dev_attr_reader_mode.attr,
+ &dev_attr_fn_lock.attr,
+ &dev_attr_battery_care_limit.attr,
+ NULL
+};
+
+static const struct attribute_group dev_attribute_group = {
+ .attrs = dev_attributes,
+};
+
+static void tpad_led_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ union acpi_object *r;
+
+ r = lg_wmab(WM_TLED, WM_SET, brightness > LED_OFF);
+ kfree(r);
+}
+
+static enum led_brightness tpad_led_get(struct led_classdev *cdev)
+{
+ return ggov(GOV_TLED) > 0 ? LED_ON : LED_OFF;
+}
+
+static LED_DEVICE(tpad_led, 1);
+
+static void kbd_backlight_set(struct led_classdev *cdev,
+ enum led_brightness brightness)
+{
+ u32 val;
+ union acpi_object *r;
+
+ val = 0x22;
+ if (brightness <= LED_OFF)
+ val = 0;
+ if (brightness >= LED_FULL)
+ val = 0x24;
+ r = lg_wmab(WM_KEY_LIGHT, WM_SET, val);
+ kfree(r);
+}
+
+static enum led_brightness kbd_backlight_get(struct led_classdev *cdev)
+{
+ union acpi_object *r;
+ int val;
+
+ r = lg_wmab(WM_KEY_LIGHT, WM_GET, 0);
+
+ if (!r)
+ return LED_OFF;
+
+ if (r->type != ACPI_TYPE_BUFFER || r->buffer.pointer[1] != 0x05) {
+ kfree(r);
+ return LED_OFF;
+ }
+
+ switch (r->buffer.pointer[0] & 0x27) {
+ case 0x24:
+ val = LED_FULL;
+ break;
+ case 0x22:
+ val = LED_HALF;
+ break;
+ default:
+ val = LED_OFF;
+ }
+
+ kfree(r);
+
+ return val;
+}
+
+static LED_DEVICE(kbd_backlight, 255);
+
+static void wmi_input_destroy(void)
+{
+ if (inited & INIT_INPUT_WMI_2)
+ wmi_remove_notify_handler(WMI_EVENT_GUID2);
+
+ if (inited & INIT_INPUT_WMI_0)
+ wmi_remove_notify_handler(WMI_EVENT_GUID0);
+
+ if (inited & INIT_SPARSE_KEYMAP)
+ input_unregister_device(wmi_input_dev);
+
+ inited &= ~(INIT_INPUT_WMI_0 | INIT_INPUT_WMI_2 | INIT_SPARSE_KEYMAP);
+}
+
+static struct platform_driver pf_driver = {
+ .driver = {
+ .name = PLATFORM_NAME,
+ }
+};
+
+static int acpi_add(struct acpi_device *device)
+{
+ int ret;
+
+ if (pf_device)
+ return 0;
+
+ ret = platform_driver_register(&pf_driver);
+ if (ret)
+ return ret;
+
+ pf_device = platform_device_register_simple(PLATFORM_NAME,
+ PLATFORM_DEVID_NONE,
+ NULL, 0);
+ if (IS_ERR(pf_device)) {
+ ret = PTR_ERR(pf_device);
+ pf_device = NULL;
+ pr_err("unable to register platform device\n");
+ goto out_platform_registered;
+ }
+
+ ret = sysfs_create_group(&pf_device->dev.kobj, &dev_attribute_group);
+ if (ret)
+ goto out_platform_device;
+
+ if (!led_classdev_register(&pf_device->dev, &kbd_backlight))
+ inited |= INIT_KBD_LED;
+
+ if (!led_classdev_register(&pf_device->dev, &tpad_led))
+ inited |= INIT_TPAD_LED;
+
+ wmi_input_setup();
+
+ return 0;
+
+out_platform_device:
+ platform_device_unregister(pf_device);
+out_platform_registered:
+ platform_driver_unregister(&pf_driver);
+ return ret;
+}
+
+static int acpi_remove(struct acpi_device *device)
+{
+ sysfs_remove_group(&pf_device->dev.kobj, &dev_attribute_group);
+ if (inited & INIT_KBD_LED)
+ led_classdev_unregister(&kbd_backlight);
+
+ if (inited & INIT_TPAD_LED)
+ led_classdev_unregister(&tpad_led);
+
+ wmi_input_destroy();
+ platform_device_unregister(pf_device);
+ pf_device = NULL;
+ platform_driver_unregister(&pf_driver);
+
+ return 0;
+}
+
+static const struct acpi_device_id device_ids[] = {
+ {"LGEX0815", 0},
+ {"", 0}
+};
+MODULE_DEVICE_TABLE(acpi, device_ids);
+
+static struct acpi_driver acpi_driver = {
+ .name = "LG Gram Laptop Support",
+ .class = "lg-laptop",
+ .ids = device_ids,
+ .ops = {
+ .add = acpi_add,
+ .remove = acpi_remove,
+ .notify = acpi_notify,
+ },
+ .owner = THIS_MODULE,
+};
+
+static int __init acpi_init(void)
+{
+ int result;
+
+ result = acpi_bus_register_driver(&acpi_driver);
+ if (result < 0) {
+ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Error registering driver\n"));
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void __exit acpi_exit(void)
+{
+ acpi_bus_unregister_driver(&acpi_driver);
+}
+
+module_init(acpi_init);
+module_exit(acpi_exit);
diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c
index d89936c..c2c3a1a 100644
--- a/drivers/platform/x86/mlx-platform.c
+++ b/drivers/platform/x86/mlx-platform.c
@@ -575,7 +575,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_msn201x_items[] = {
static
struct mlxreg_core_hotplug_platform_data mlxplat_mlxcpld_msn201x_data = {
- .items = mlxplat_mlxcpld_msn21xx_items,
+ .items = mlxplat_mlxcpld_msn201x_items,
.counter = ARRAY_SIZE(mlxplat_mlxcpld_msn201x_items),
.cell = MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET,
.mask = MLXPLAT_CPLD_AGGR_MASK_DEF,
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index cb204f9..5f2d7ea 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -42,10 +42,13 @@ static const struct ts_dmi_data chuwi_hi8_data = {
};
static const struct property_entry chuwi_hi8_pro_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 6),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 3),
PROPERTY_ENTRY_U32("touchscreen-size-x", 1728),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1148),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-chuwi-hi8-pro.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
PROPERTY_ENTRY_BOOL("silead,home-button"),
{ }
};
@@ -56,6 +59,8 @@ static const struct ts_dmi_data chuwi_hi8_pro_data = {
};
static const struct property_entry chuwi_vi8_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 4),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 6),
PROPERTY_ENTRY_U32("touchscreen-size-x", 1724),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
@@ -88,9 +93,9 @@ static const struct ts_dmi_data chuwi_vi10_data = {
static const struct property_entry connect_tablet9_props[] = {
PROPERTY_ENTRY_U32("touchscreen-min-x", 9),
- PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 10),
PROPERTY_ENTRY_U32("touchscreen-size-x", 1664),
- PROPERTY_ENTRY_U32("touchscreen-size-y", 878),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 880),
PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-connect-tablet9.fw"),
@@ -104,8 +109,10 @@ static const struct ts_dmi_data connect_tablet9_data = {
};
static const struct property_entry cube_iwork8_air_props[] = {
- PROPERTY_ENTRY_U32("touchscreen-size-x", 1660),
- PROPERTY_ENTRY_U32("touchscreen-size-y", 900),
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 3),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1664),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 896),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-cube-iwork8-air.fw"),
PROPERTY_ENTRY_U32("silead,max-fingers", 10),
@@ -179,11 +186,14 @@ static const struct ts_dmi_data gp_electronic_t701_data = {
};
static const struct property_entry itworks_tw891_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 5),
PROPERTY_ENTRY_U32("touchscreen-size-x", 1600),
- PROPERTY_ENTRY_U32("touchscreen-size-y", 890),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 896),
PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-itworks-tw891.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
{ }
};
@@ -207,8 +217,10 @@ static const struct ts_dmi_data jumper_ezpad_6_pro_data = {
};
static const struct property_entry jumper_ezpad_mini3_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 23),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 16),
PROPERTY_ENTRY_U32("touchscreen-size-x", 1700),
- PROPERTY_ENTRY_U32("touchscreen-size-y", 1150),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1138),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-jumper-ezpad-mini3.fw"),
PROPERTY_ENTRY_U32("silead,max-fingers", 10),
@@ -237,6 +249,24 @@ static const struct ts_dmi_data onda_obook_20_plus_data = {
.properties = onda_obook_20_plus_props,
};
+static const struct property_entry onda_v80_plus_v3_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 22),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 15),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1698),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
+ PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+ PROPERTY_ENTRY_STRING("firmware-name",
+ "gsl3676-onda-v80-plus-v3.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ PROPERTY_ENTRY_BOOL("silead,home-button"),
+ { }
+};
+
+static const struct ts_dmi_data onda_v80_plus_v3_data = {
+ .acpi_name = "MSSL1680:00",
+ .properties = onda_v80_plus_v3_props,
+};
+
static const struct property_entry onda_v820w_32g_props[] = {
PROPERTY_ENTRY_U32("touchscreen-size-x", 1665),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
@@ -322,11 +352,14 @@ static const struct ts_dmi_data pov_mobii_wintab_p800w_v20_data = {
};
static const struct property_entry pov_mobii_wintab_p800w_v21_props[] = {
- PROPERTY_ENTRY_U32("touchscreen-size-x", 1800),
- PROPERTY_ENTRY_U32("touchscreen-size-y", 1150),
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1794),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1148),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
PROPERTY_ENTRY_STRING("firmware-name",
"gsl3692-pov-mobii-wintab-p800w.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
PROPERTY_ENTRY_BOOL("silead,home-button"),
{ }
};
@@ -366,6 +399,22 @@ static const struct ts_dmi_data teclast_x98plus2_data = {
.properties = teclast_x98plus2_props,
};
+static const struct property_entry trekstor_primebook_c11_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1970),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1530),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+ PROPERTY_ENTRY_STRING("firmware-name",
+ "gsl1680-trekstor-primebook-c11.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ PROPERTY_ENTRY_BOOL("silead,home-button"),
+ { }
+};
+
+static const struct ts_dmi_data trekstor_primebook_c11_data = {
+ .acpi_name = "MSSL1680:00",
+ .properties = trekstor_primebook_c11_props,
+};
+
static const struct property_entry trekstor_primebook_c13_props[] = {
PROPERTY_ENTRY_U32("touchscreen-size-x", 2624),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1920),
@@ -381,6 +430,22 @@ static const struct ts_dmi_data trekstor_primebook_c13_data = {
.properties = trekstor_primebook_c13_props,
};
+static const struct property_entry trekstor_primetab_t13b_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 2500),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1900),
+ PROPERTY_ENTRY_STRING("firmware-name",
+ "gsl1680-trekstor-primetab-t13b.fw"),
+ PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ PROPERTY_ENTRY_BOOL("silead,home-button"),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+ { }
+};
+
+static const struct ts_dmi_data trekstor_primetab_t13b_data = {
+ .acpi_name = "MSSL1680:00",
+ .properties = trekstor_primetab_t13b_props,
+};
+
static const struct property_entry trekstor_surftab_twin_10_1_props[] = {
PROPERTY_ENTRY_U32("touchscreen-size-x", 1900),
PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
@@ -397,6 +462,8 @@ static const struct ts_dmi_data trekstor_surftab_twin_10_1_data = {
};
static const struct property_entry trekstor_surftab_wintron70_props[] = {
+ PROPERTY_ENTRY_U32("touchscreen-min-x", 12),
+ PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
PROPERTY_ENTRY_U32("touchscreen-size-x", 884),
PROPERTY_ENTRY_U32("touchscreen-size-y", 632),
PROPERTY_ENTRY_STRING("firmware-name",
@@ -556,6 +623,14 @@ static const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* ONDA V80 plus v3 (P80PSBG9V3A01501) */
+ .driver_data = (void *)&onda_v80_plus_v3_data,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONDA"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "V80 PLUS")
+ },
+ },
+ {
/* ONDA V820w DualOS */
.driver_data = (void *)&onda_v820w_32g_data,
.matches = {
@@ -641,6 +716,14 @@ static const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* Trekstor Primebook C11 */
+ .driver_data = (void *)&trekstor_primebook_c11_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Primebook C11"),
+ },
+ },
+ {
/* Trekstor Primebook C13 */
.driver_data = (void *)&trekstor_primebook_c13_data,
.matches = {
@@ -649,6 +732,14 @@ static const struct dmi_system_id touchscreen_dmi_table[] = {
},
},
{
+ /* Trekstor Primetab T13B */
+ .driver_data = (void *)&trekstor_primetab_t13b_data,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Primetab T13B"),
+ },
+ },
+ {
/* TrekStor SurfTab twin 10.1 ST10432-8 */
.driver_data = (void *)&trekstor_surftab_twin_10_1_data,
.matches = {
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 04791ea..bea35be 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -987,19 +987,19 @@ static struct bus_type wmi_bus_type = {
.remove = wmi_dev_remove,
};
-static struct device_type wmi_type_event = {
+static const struct device_type wmi_type_event = {
.name = "event",
.groups = wmi_event_groups,
.release = wmi_dev_release,
};
-static struct device_type wmi_type_method = {
+static const struct device_type wmi_type_method = {
.name = "method",
.groups = wmi_method_groups,
.release = wmi_dev_release,
};
-static struct device_type wmi_type_data = {
+static const struct device_type wmi_type_data = {
.name = "data",
.groups = wmi_data_groups,
.release = wmi_dev_release,
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 504d252..27e5dd4 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -447,10 +447,9 @@
config PWM_TIECAP
tristate "ECAP PWM support"
- depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE
+ depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE || ARCH_K3
help
- PWM driver support for the ECAP APWM controller found on AM33XX
- TI SOC
+ PWM driver support for the ECAP APWM controller found on TI SOCs
To compile this driver as a module, choose M here: the module
will be called pwm-tiecap.
diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
index 5561b9e..757230e 100644
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -30,6 +30,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
.clk_rate = 19200000,
.npwm = 1,
.base_unit_bits = 16,
+ .other_devices_aml_touches_pwm_regs = true,
};
/* Broxton */
@@ -60,6 +61,7 @@ static int pwm_lpss_probe_platform(struct platform_device *pdev)
platform_set_drvdata(pdev, lpwm);
+ dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
pm_runtime_set_active(&pdev->dev);
pm_runtime_enable(&pdev->dev);
@@ -74,13 +76,29 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev)
return pwm_lpss_remove(lpwm);
}
-static SIMPLE_DEV_PM_OPS(pwm_lpss_platform_pm_ops,
- pwm_lpss_suspend,
- pwm_lpss_resume);
+static int pwm_lpss_prepare(struct device *dev)
+{
+ struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev);
+
+ /*
+ * If other device's AML code touches the PWM regs on suspend/resume
+ * force runtime-resume the PWM controller to allow this.
+ */
+ if (lpwm->info->other_devices_aml_touches_pwm_regs)
+ return 0; /* Force runtime-resume */
+
+ return 1; /* If runtime-suspended leave as is */
+}
+
+static const struct dev_pm_ops pwm_lpss_platform_pm_ops = {
+ .prepare = pwm_lpss_prepare,
+ SET_SYSTEM_SLEEP_PM_OPS(pwm_lpss_suspend, pwm_lpss_resume)
+};
static const struct acpi_device_id pwm_lpss_acpi_match[] = {
{ "80860F09", (unsigned long)&pwm_lpss_byt_info },
{ "80862288", (unsigned long)&pwm_lpss_bsw_info },
+ { "80862289", (unsigned long)&pwm_lpss_bsw_info },
{ "80865AC8", (unsigned long)&pwm_lpss_bxt_info },
{ },
};
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 4721a26..2ac3a2a 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -32,15 +32,6 @@
/* Size of each PWM register space if multiple */
#define PWM_SIZE 0x400
-#define MAX_PWMS 4
-
-struct pwm_lpss_chip {
- struct pwm_chip chip;
- void __iomem *regs;
- const struct pwm_lpss_boardinfo *info;
- u32 saved_ctrl[MAX_PWMS];
-};
-
static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip)
{
return container_of(chip, struct pwm_lpss_chip, chip);
@@ -97,7 +88,7 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
unsigned long long on_time_div;
unsigned long c = lpwm->info->clk_rate, base_unit_range;
unsigned long long base_unit, freq = NSEC_PER_SEC;
- u32 ctrl;
+ u32 orig_ctrl, ctrl;
do_div(freq, period_ns);
@@ -114,13 +105,17 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
do_div(on_time_div, period_ns);
on_time_div = 255ULL - on_time_div;
- ctrl = pwm_lpss_read(pwm);
+ orig_ctrl = ctrl = pwm_lpss_read(pwm);
ctrl &= ~PWM_ON_TIME_DIV_MASK;
ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT);
base_unit &= base_unit_range;
ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT;
ctrl |= on_time_div;
- pwm_lpss_write(pwm, ctrl);
+
+ if (orig_ctrl != ctrl) {
+ pwm_lpss_write(pwm, ctrl);
+ pwm_lpss_write(pwm, ctrl | PWM_SW_UPDATE);
+ }
}
static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
@@ -144,7 +139,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
return ret;
}
pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
- pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false);
ret = pwm_lpss_wait_for_update(pwm);
if (ret) {
@@ -157,7 +151,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
if (ret)
return ret;
pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
- pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
return pwm_lpss_wait_for_update(pwm);
}
} else if (pwm_is_enabled(pwm)) {
@@ -168,8 +161,42 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
return 0;
}
+/* This function gets called once from pwmchip_add to get the initial state */
+static void pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct pwm_lpss_chip *lpwm = to_lpwm(chip);
+ unsigned long base_unit_range;
+ unsigned long long base_unit, freq, on_time_div;
+ u32 ctrl;
+
+ base_unit_range = BIT(lpwm->info->base_unit_bits);
+
+ ctrl = pwm_lpss_read(pwm);
+ on_time_div = 255 - (ctrl & PWM_ON_TIME_DIV_MASK);
+ base_unit = (ctrl >> PWM_BASE_UNIT_SHIFT) & (base_unit_range - 1);
+
+ freq = base_unit * lpwm->info->clk_rate;
+ do_div(freq, base_unit_range);
+ if (freq == 0)
+ state->period = NSEC_PER_SEC;
+ else
+ state->period = NSEC_PER_SEC / (unsigned long)freq;
+
+ on_time_div *= state->period;
+ do_div(on_time_div, 255);
+ state->duty_cycle = on_time_div;
+
+ state->polarity = PWM_POLARITY_NORMAL;
+ state->enabled = !!(ctrl & PWM_ENABLE);
+
+ if (state->enabled)
+ pm_runtime_get(chip->dev);
+}
+
static const struct pwm_ops pwm_lpss_ops = {
.apply = pwm_lpss_apply,
+ .get_state = pwm_lpss_get_state,
.owner = THIS_MODULE,
};
@@ -214,6 +241,12 @@ EXPORT_SYMBOL_GPL(pwm_lpss_probe);
int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
{
+ int i;
+
+ for (i = 0; i < lpwm->info->npwm; i++) {
+ if (pwm_is_enabled(&lpwm->chip.pwms[i]))
+ pm_runtime_put(lpwm->chip.dev);
+ }
return pwmchip_remove(&lpwm->chip);
}
EXPORT_SYMBOL_GPL(pwm_lpss_remove);
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
index 7a4238a..3236be8 100644
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -16,13 +16,25 @@
#include <linux/device.h>
#include <linux/pwm.h>
-struct pwm_lpss_chip;
+#define MAX_PWMS 4
+
+struct pwm_lpss_chip {
+ struct pwm_chip chip;
+ void __iomem *regs;
+ const struct pwm_lpss_boardinfo *info;
+ u32 saved_ctrl[MAX_PWMS];
+};
struct pwm_lpss_boardinfo {
unsigned long clk_rate;
unsigned int npwm;
unsigned long base_unit_bits;
bool bypass;
+ /*
+ * On some devices the _PS0/_PS3 AML code of the GPU (GFX0) device
+ * messes with the PWM0 controllers state,
+ */
+ bool other_devices_aml_touches_pwm_regs;
};
struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index 748f614..a41812f 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* R-Car PWM Timer driver
*
* Copyright (C) 2015 Renesas Electronics Corporation
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
*/
#include <linux/clk.h>
diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index 29267d1..4a855a2 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* R-Mobile TPU PWM driver
*
* Copyright (C) 2012 Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/clk.h>
diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index f8ebbec..48c4595 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -300,7 +300,6 @@ static const struct of_device_id tegra_pwm_of_match[] = {
{ .compatible = "nvidia,tegra186-pwm", .data = &tegra186_pwm_soc },
{ }
};
-
MODULE_DEVICE_TABLE(of, tegra_pwm_of_match);
static const struct dev_pm_ops tegra_pwm_pm_ops = {
diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index 7c71cdb..ceb233d 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -249,6 +249,7 @@ static void pwm_export_release(struct device *child)
static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
{
struct pwm_export *export;
+ char *pwm_prop[2];
int ret;
if (test_and_set_bit(PWMF_EXPORTED, &pwm->flags))
@@ -263,7 +264,6 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
export->pwm = pwm;
mutex_init(&export->lock);
- export->child.class = parent->class;
export->child.release = pwm_export_release;
export->child.parent = parent;
export->child.devt = MKDEV(0, 0);
@@ -277,6 +277,10 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
export = NULL;
return ret;
}
+ pwm_prop[0] = kasprintf(GFP_KERNEL, "EXPORT=pwm%u", pwm->hwpwm);
+ pwm_prop[1] = NULL;
+ kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop);
+ kfree(pwm_prop[0]);
return 0;
}
@@ -289,6 +293,7 @@ static int pwm_unexport_match(struct device *child, void *data)
static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm)
{
struct device *child;
+ char *pwm_prop[2];
if (!test_and_clear_bit(PWMF_EXPORTED, &pwm->flags))
return -ENODEV;
@@ -297,6 +302,11 @@ static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm)
if (!child)
return -ENODEV;
+ pwm_prop[0] = kasprintf(GFP_KERNEL, "UNEXPORT=pwm%u", pwm->hwpwm);
+ pwm_prop[1] = NULL;
+ kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop);
+ kfree(pwm_prop[0]);
+
/* for device_find_child() */
put_device(child);
device_unregister(child);
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 05293ba..2d655a9 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -143,7 +143,9 @@ static int twa_poll_status_gone(TW_Device_Extension *tw_dev, u32 flag, int secon
static int twa_post_command_packet(TW_Device_Extension *tw_dev, int request_id, char internal);
static int twa_reset_device_extension(TW_Device_Extension *tw_dev);
static int twa_reset_sequence(TW_Device_Extension *tw_dev, int soft_reset);
-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg);
+static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+ unsigned char *cdb, int use_sg,
+ TW_SG_Entry *sglistarg);
static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id);
static char *twa_string_lookup(twa_message_type *table, unsigned int aen_code);
@@ -278,7 +280,7 @@ static int twa_aen_complete(TW_Device_Extension *tw_dev, int request_id)
static int twa_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset)
{
int request_id = 0;
- char cdb[TW_MAX_CDB_LEN];
+ unsigned char cdb[TW_MAX_CDB_LEN];
TW_SG_Entry sglist[1];
int finished = 0, count = 0;
TW_Command_Full *full_command_packet;
@@ -423,7 +425,7 @@ static void twa_aen_queue_event(TW_Device_Extension *tw_dev, TW_Command_Apache_H
/* This function will read the aen queue from the isr */
static int twa_aen_read_queue(TW_Device_Extension *tw_dev, int request_id)
{
- char cdb[TW_MAX_CDB_LEN];
+ unsigned char cdb[TW_MAX_CDB_LEN];
TW_SG_Entry sglist[1];
TW_Command_Full *full_command_packet;
int retval = 1;
@@ -1798,7 +1800,9 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_
static DEF_SCSI_QCMD(twa_scsi_queue)
/* This function hands scsi cdb's to the firmware */
-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg)
+static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+ unsigned char *cdb, int use_sg,
+ TW_SG_Entry *sglistarg)
{
TW_Command_Full *full_command_packet;
TW_Command_Apache *command_packet;
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 266bdac..480cf82 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -287,7 +287,9 @@ static int twl_post_command_packet(TW_Device_Extension *tw_dev, int request_id)
} /* End twl_post_command_packet() */
/* This function hands scsi cdb's to the firmware */
-static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry_ISO *sglistarg)
+static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+ unsigned char *cdb, int use_sg,
+ TW_SG_Entry_ISO *sglistarg)
{
TW_Command_Full *full_command_packet;
TW_Command_Apache *command_packet;
@@ -372,7 +374,7 @@ static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
/* This function will read the aen queue from the isr */
static int twl_aen_read_queue(TW_Device_Extension *tw_dev, int request_id)
{
- char cdb[TW_MAX_CDB_LEN];
+ unsigned char cdb[TW_MAX_CDB_LEN];
TW_SG_Entry_ISO sglist[1];
TW_Command_Full *full_command_packet;
int retval = 1;
@@ -554,7 +556,7 @@ static int twl_poll_response(TW_Device_Extension *tw_dev, int request_id, int se
static int twl_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset)
{
int request_id = 0;
- char cdb[TW_MAX_CDB_LEN];
+ unsigned char cdb[TW_MAX_CDB_LEN];
TW_SG_Entry_ISO sglist[1];
int finished = 0, count = 0;
TW_Command_Full *full_command_packet;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 70988c3..f07444d 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -538,7 +538,7 @@
config SCSI_BUSLOGIC
tristate "BusLogic SCSI support"
- depends on (PCI || ISA || MCA) && SCSI && ISA_DMA_API && VIRT_TO_BUS
+ depends on (PCI || ISA) && SCSI && ISA_DMA_API && VIRT_TO_BUS
---help---
This is support for BusLogic MultiMaster and FlashPoint SCSI Host
Adapters. Consult the SCSI-HOWTO, available from
@@ -1175,12 +1175,12 @@
config SCSI_SIM710
tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
- depends on (EISA || MCA) && SCSI
+ depends on EISA && SCSI
select SCSI_SPI_ATTRS
---help---
This driver is for NCR53c710 based SCSI host adapters.
- It currently supports Compaq EISA cards and NCR MCA cards
+ It currently supports Compaq EISA cards.
config SCSI_DC395x
tristate "Tekram DC395(U/UW/F) and DC315(U) SCSI support"
diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index 4d7b0e0..301b3ca 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -269,7 +269,7 @@ static LIST_HEAD(aha152x_host_list);
/* DEFINES */
/* For PCMCIA cards, always use AUTOCONF */
-#if defined(PCMCIA) || defined(MODULE)
+#if defined(AHA152X_PCMCIA) || defined(MODULE)
#if !defined(AUTOCONF)
#define AUTOCONF
#endif
@@ -297,7 +297,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
#define DELAY_DEFAULT 1000
-#if defined(PCMCIA)
+#if defined(AHA152X_PCMCIA)
#define IRQ_MIN 0
#define IRQ_MAX 16
#else
@@ -328,7 +328,7 @@ MODULE_AUTHOR("Jürgen Fischer");
MODULE_DESCRIPTION(AHA152X_REVID);
MODULE_LICENSE("GPL");
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
#if defined(MODULE)
static int io[] = {0, 0};
module_param_hw_array(io, int, ioport, NULL, 0);
@@ -391,7 +391,7 @@ static struct isapnp_device_id id_table[] = {
MODULE_DEVICE_TABLE(isapnp, id_table);
#endif /* ISAPNP */
-#endif /* !PCMCIA */
+#endif /* !AHA152X_PCMCIA */
static struct scsi_host_template aha152x_driver_template;
@@ -863,7 +863,7 @@ void aha152x_release(struct Scsi_Host *shpnt)
if (shpnt->irq)
free_irq(shpnt->irq, shpnt);
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
if (shpnt->io_port)
release_region(shpnt->io_port, IO_RANGE);
#endif
@@ -2924,7 +2924,7 @@ static struct scsi_host_template aha152x_driver_template = {
.slave_alloc = aha152x_adjust_queue,
};
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
static int setup_count;
static struct aha152x_setup setup[2];
@@ -3392,4 +3392,4 @@ static int __init aha152x_setup(char *str)
__setup("aha152x=", aha152x_setup);
#endif
-#endif /* !PCMCIA */
+#endif /* !AHA152X_PCMCIA */
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 3df1428..311d23c 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -790,12 +790,11 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf
slot->n_elem = n_elem;
slot->slot_tag = tag;
- slot->buf = dma_pool_alloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
+ slot->buf = dma_pool_zalloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
if (!slot->buf) {
rc = -ENOMEM;
goto err_out_tag;
}
- memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
tei.task = task;
tei.hdr = &mvi->slot[tag];
@@ -1906,8 +1905,7 @@ static void mvs_work_queue(struct work_struct *work)
if (phy->phy_event & PHY_PLUG_OUT) {
u32 tmp;
- struct sas_identify_frame *id;
- id = (struct sas_identify_frame *)phy->frame_rcvd;
+
tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no);
phy->phy_event &= ~PHY_PLUG_OUT;
if (!(tmp & PHY_READY_MASK)) {
diff --git a/drivers/scsi/pcmcia/aha152x_core.c b/drivers/scsi/pcmcia/aha152x_core.c
index dba3716..24b8922 100644
--- a/drivers/scsi/pcmcia/aha152x_core.c
+++ b/drivers/scsi/pcmcia/aha152x_core.c
@@ -1,3 +1,3 @@
-#define PCMCIA 1
+#define AHA152X_PCMCIA 1
#define AHA152X_STAT 1
#include "aha152x.c"
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index b28f159..0bb9ac6 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -218,7 +218,7 @@ qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj,
mutex_lock(&ha->optrom_mutex);
if (qla2x00_chip_is_down(vha)) {
- mutex_unlock(&vha->hw->optrom_mutex);
+ mutex_unlock(&ha->optrom_mutex);
return -EAGAIN;
}
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index c72d801..6fe20c2 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -425,7 +425,7 @@ void qla24xx_handle_adisc_event(scsi_qla_host_t *vha, struct event_arg *ea)
__qla24xx_handle_gpdb_event(vha, ea);
}
-int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+static int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
{
struct qla_work_evt *e;
@@ -680,7 +680,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
fcport);
break;
}
- /* drop through */
+ /* fall through */
default:
if (fcport_is_smaller(fcport)) {
/* local adapter is bigger */
@@ -1551,7 +1551,8 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
}
-void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea)
+static void qla_handle_els_plogi_done(scsi_qla_host_t *vha,
+ struct event_arg *ea)
{
ql_dbg(ql_dbg_disc, vha, 0x2118,
"%s %d %8phC post PRLI\n",
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 86fb8b2..0326353 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1195,8 +1195,8 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
* @sp: SRB command to process
* @cmd_pkt: Command type 3 IOCB
* @tot_dsds: Total number of segments to transfer
- * @tot_prot_dsds:
- * @fw_prot_opts:
+ * @tot_prot_dsds: Total number of segments with protection information
+ * @fw_prot_opts: Protection options to be passed to firmware
*/
inline int
qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d73b04e..30d3090 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -25,7 +25,7 @@ static int qla2x00_error_entry(scsi_qla_host_t *, struct rsp_que *,
/**
* qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200.
- * @irq:
+ * @irq: interrupt number
* @dev_id: SCSI driver HA context
*
* Called by system whenever the host adapter generates an interrupt.
@@ -144,7 +144,7 @@ qla2x00_check_reg16_for_disconnect(scsi_qla_host_t *vha, uint16_t reg)
/**
* qla2300_intr_handler() - Process interrupts for the ISP23xx and ISP63xx.
- * @irq:
+ * @irq: interrupt number
* @dev_id: SCSI driver HA context
*
* Called by system whenever the host adapter generates an interrupt.
@@ -3109,7 +3109,7 @@ qla2xxx_check_risc_status(scsi_qla_host_t *vha)
/**
* qla24xx_intr_handler() - Process interrupts for the ISP23xx and ISP24xx.
- * @irq:
+ * @irq: interrupt number
* @dev_id: SCSI driver HA context
*
* Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 2f3e507..191b6b7 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3478,9 +3478,9 @@ qla8044_read_serdes_word(scsi_qla_host_t *vha, uint32_t addr, uint32_t *data)
/**
* qla2x00_set_serdes_params() -
* @vha: HA context
- * @sw_em_1g:
- * @sw_em_2g:
- * @sw_em_4g:
+ * @sw_em_1g: serial link options
+ * @sw_em_2g: serial link options
+ * @sw_em_4g: serial link options
*
* Returns
*/
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 521a513..60f964c 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2212,7 +2212,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct bsg_job *bsg_job;
struct fc_bsg_reply *bsg_reply;
struct srb_iocb *iocb_job;
- int res;
+ int res = 0;
struct qla_mt_iocb_rsp_fx00 fstatus;
uint8_t *fw_sts_ptr;
@@ -2624,7 +2624,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
* qlafx00_multistatus_entry() - Process Multi response queue entries.
* @vha: SCSI driver HA context
* @rsp: response queue
- * @pkt:
+ * @pkt: received packet
*/
static void
qlafx00_multistatus_entry(struct scsi_qla_host *vha,
@@ -2681,12 +2681,10 @@ qlafx00_multistatus_entry(struct scsi_qla_host *vha,
* @vha: SCSI driver HA context
* @rsp: response queue
* @pkt: Entry pointer
- * @estatus:
- * @etype:
*/
static void
qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
- struct sts_entry_fx00 *pkt, uint8_t estatus, uint8_t etype)
+ struct sts_entry_fx00 *pkt)
{
srb_t *sp;
struct qla_hw_data *ha = vha->hw;
@@ -2695,9 +2693,6 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
struct req_que *req = NULL;
int res = DID_ERROR << 16;
- ql_dbg(ql_dbg_async, vha, 0x507f,
- "type of error status in response: 0x%x\n", estatus);
-
req = ha->req_q_map[que];
sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
@@ -2745,9 +2740,11 @@ qlafx00_process_response_queue(struct scsi_qla_host *vha,
if (pkt->entry_status != 0 &&
pkt->entry_type != IOCTL_IOSB_TYPE_FX00) {
+ ql_dbg(ql_dbg_async, vha, 0x507f,
+ "type of error status in response: 0x%x\n",
+ pkt->entry_status);
qlafx00_error_entry(vha, rsp,
- (struct sts_entry_fx00 *)pkt, pkt->entry_status,
- pkt->entry_type);
+ (struct sts_entry_fx00 *)pkt);
continue;
}
@@ -2867,7 +2864,7 @@ qlafx00_async_event(scsi_qla_host_t *vha)
/**
* qlafx00x_mbx_completion() - Process mailbox command completions.
* @vha: SCSI driver HA context
- * @mb0:
+ * @mb0: value to be written into mailbox register 0
*/
static void
qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0)
@@ -2893,7 +2890,7 @@ qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0)
/**
* qlafx00_intr_handler() - Process interrupts for the ISPFX00.
- * @irq:
+ * @irq: interrupt number
* @dev_id: SCSI driver HA context
*
* Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 121e18b..f2f5480 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -2010,7 +2010,7 @@ qla82xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
/**
* qla82xx_intr_handler() - Process interrupts for the ISP23xx and ISP63xx.
- * @irq:
+ * @irq: interrupt number
* @dev_id: SCSI driver HA context
*
* Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c
index 3a2b028..fe856b6 100644
--- a/drivers/scsi/qla2xxx/qla_nx2.c
+++ b/drivers/scsi/qla2xxx/qla_nx2.c
@@ -3878,7 +3878,7 @@ qla8044_write_optrom_data(struct scsi_qla_host *vha, uint8_t *buf,
#define PF_BITS_MASK (0xF << 16)
/**
* qla8044_intr_handler() - Process interrupts for the ISP8044
- * @irq:
+ * @irq: interrupt number
* @dev_id: SCSI driver HA context
*
* Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 8794e54..518f151 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1749,7 +1749,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
static void
__qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
{
- int cnt, status;
+ int cnt;
unsigned long flags;
srb_t *sp;
scsi_qla_host_t *vha = qp->vha;
@@ -1799,8 +1799,8 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
if (!sp_get(sp)) {
spin_unlock_irqrestore
(qp->qp_lock_ptr, flags);
- status = qla2xxx_eh_abort(
- GET_CMD_SP(sp));
+ qla2xxx_eh_abort(
+ GET_CMD_SP(sp));
spin_lock_irqsave
(qp->qp_lock_ptr, flags);
}
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 4499c78..2a3055c 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -2229,7 +2229,7 @@ qla2x00_erase_flash_sector(struct qla_hw_data *ha, uint32_t addr,
/**
* qla2x00_get_flash_manufacturer() - Read manufacturer ID from flash chip.
- * @ha:
+ * @ha: host adapter
* @man_id: Flash manufacturer ID
* @flash_id: Flash ID
*/
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 3982820..c450474 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -4540,7 +4540,7 @@ static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
case QLA_TGT_CLEAR_TS:
case QLA_TGT_ABORT_TS:
abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id);
- /* drop through */
+ /* fall through */
case QLA_TGT_CLEAR_ACA:
h = qlt_find_qphint(vha, mcmd->unpacked_lun);
mcmd->qpair = h->qpair;
@@ -6598,9 +6598,9 @@ static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
* qla_tgt_lport_register - register lport with external module
*
* @target_lport_ptr: pointer for tcm_qla2xxx specific lport data
- * @phys_wwpn:
- * @npiv_wwpn:
- * @npiv_wwnn:
+ * @phys_wwpn: physical port WWPN
+ * @npiv_wwpn: NPIV WWPN
+ * @npiv_wwnn: NPIV WWNN
* @callback: lport initialization callback for tcm_qla2xxx code
*/
int qlt_lport_register(void *target_lport_ptr, u64 phys_wwpn,
diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
index 7c12813..4c28fa9 100644
--- a/drivers/soc/ti/knav_qmss.h
+++ b/drivers/soc/ti/knav_qmss.h
@@ -329,8 +329,8 @@ struct knav_range_ops {
};
struct knav_irq_info {
- int irq;
- u32 cpu_map;
+ int irq;
+ struct cpumask *cpu_mask;
};
struct knav_range_info {
diff --git a/drivers/soc/ti/knav_qmss_acc.c b/drivers/soc/ti/knav_qmss_acc.c
index 316e82e..2f7fb2d 100644
--- a/drivers/soc/ti/knav_qmss_acc.c
+++ b/drivers/soc/ti/knav_qmss_acc.c
@@ -205,18 +205,18 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range,
{
struct knav_device *kdev = range->kdev;
struct knav_acc_channel *acc;
- unsigned long cpu_map;
+ struct cpumask *cpu_mask;
int ret = 0, irq;
u32 old, new;
if (range->flags & RANGE_MULTI_QUEUE) {
acc = range->acc;
irq = range->irqs[0].irq;
- cpu_map = range->irqs[0].cpu_map;
+ cpu_mask = range->irqs[0].cpu_mask;
} else {
acc = range->acc + queue;
irq = range->irqs[queue].irq;
- cpu_map = range->irqs[queue].cpu_map;
+ cpu_mask = range->irqs[queue].cpu_mask;
}
old = acc->open_mask;
@@ -239,8 +239,8 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range,
acc->name, acc->name);
ret = request_irq(irq, knav_acc_int_handler, 0, acc->name,
range);
- if (!ret && cpu_map) {
- ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+ if (!ret && cpu_mask) {
+ ret = irq_set_affinity_hint(irq, cpu_mask);
if (ret) {
dev_warn(range->kdev->dev,
"Failed to set IRQ affinity\n");
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index b5d5673..8b41837 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -118,19 +118,17 @@ static int knav_queue_setup_irq(struct knav_range_info *range,
struct knav_queue_inst *inst)
{
unsigned queue = inst->id - range->queue_base;
- unsigned long cpu_map;
int ret = 0, irq;
if (range->flags & RANGE_HAS_IRQ) {
irq = range->irqs[queue].irq;
- cpu_map = range->irqs[queue].cpu_map;
ret = request_irq(irq, knav_queue_int_handler, 0,
inst->irq_name, inst);
if (ret)
return ret;
disable_irq(irq);
- if (cpu_map) {
- ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+ if (range->irqs[queue].cpu_mask) {
+ ret = irq_set_affinity_hint(irq, range->irqs[queue].cpu_mask);
if (ret) {
dev_warn(range->kdev->dev,
"Failed to set IRQ affinity\n");
@@ -1262,9 +1260,19 @@ static int knav_setup_queue_range(struct knav_device *kdev,
range->num_irqs++;
- if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3)
- range->irqs[i].cpu_map =
- (oirq.args[2] & 0x0000ff00) >> 8;
+ if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) {
+ unsigned long mask;
+ int bit;
+
+ range->irqs[i].cpu_mask = devm_kzalloc(dev,
+ cpumask_size(), GFP_KERNEL);
+ if (!range->irqs[i].cpu_mask)
+ return -ENOMEM;
+
+ mask = (oirq.args[2] & 0x0000ff00) >> 8;
+ for_each_set_bit(bit, &mask, BITS_PER_LONG)
+ cpumask_set_cpu(bit, range->irqs[i].cpu_mask);
+ }
}
range->num_irqs = min(range->num_irqs, range->num_queues);
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 1227872..36b7429 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1245,8 +1245,7 @@ static int iscsit_do_rx_data(
return -1;
memset(&msg, 0, sizeof(struct msghdr));
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC,
- count->iov, count->iov_count, data);
+ iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data);
while (msg_data_left(&msg)) {
rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL);
@@ -1302,8 +1301,7 @@ int tx_data(
memset(&msg, 0, sizeof(struct msghdr));
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
- iov, iov_count, data);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, iov_count, data);
while (msg_data_left(&msg)) {
int tx_loop = sock_sendmsg(conn->sock, &msg);
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index e46ca96..4f134b0 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -268,7 +268,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
}
transport_kunmap_data_sg(cmd);
- target_complete_cmd(cmd, GOOD);
+ target_complete_cmd_with_length(cmd, GOOD, rd_len + 4);
return 0;
}
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 16751ae..49b110d 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
len += sg->length;
}
- iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
+ iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len);
aio_cmd->cmd = cmd;
aio_cmd->len = len;
@@ -353,7 +353,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
len += sg->length;
}
- iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
+ iov_iter_bvec(&iter, READ, bvec, sgl_nents, len);
if (is_write)
ret = vfs_iter_write(fd, &iter, &pos, 0);
else
@@ -490,7 +490,7 @@ fd_execute_write_same(struct se_cmd *cmd)
len += se_dev->dev_attrib.block_size;
}
- iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
+ iov_iter_bvec(&iter, READ, bvec, nolb, len);
ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
kfree(bvec);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 4cf33e2..e31e4fc 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -205,19 +205,19 @@ void transport_subsystem_check_init(void)
if (sub_api_initialized)
return;
- ret = request_module("target_core_iblock");
+ ret = IS_ENABLED(CONFIG_TCM_IBLOCK) && request_module("target_core_iblock");
if (ret != 0)
pr_err("Unable to load target_core_iblock\n");
- ret = request_module("target_core_file");
+ ret = IS_ENABLED(CONFIG_TCM_FILEIO) && request_module("target_core_file");
if (ret != 0)
pr_err("Unable to load target_core_file\n");
- ret = request_module("target_core_pscsi");
+ ret = IS_ENABLED(CONFIG_TCM_PSCSI) && request_module("target_core_pscsi");
if (ret != 0)
pr_err("Unable to load target_core_pscsi\n");
- ret = request_module("target_core_user");
+ ret = IS_ENABLED(CONFIG_TCM_USER2) && request_module("target_core_user");
if (ret != 0)
pr_err("Unable to load target_core_user\n");
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 9756752..45da3e0 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -309,7 +309,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
if (!sock || !buf || !size)
return -EINVAL;
- iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
usbip_dbg_xmit("enter\n");
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index c24bb69..50dffe8 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -203,6 +203,19 @@ struct vhost_scsi {
int vs_events_nr; /* num of pending events, protected by vq->mutex */
};
+/*
+ * Context for processing request and control queue operations.
+ */
+struct vhost_scsi_ctx {
+ int head;
+ unsigned int out, in;
+ size_t req_size, rsp_size;
+ size_t out_size, in_size;
+ u8 *target, *lunp;
+ void *req;
+ struct iov_iter out_iter;
+};
+
static struct workqueue_struct *vhost_scsi_workqueue;
/* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
@@ -800,24 +813,120 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
pr_err("Faulted on virtio_scsi_cmd_resp\n");
}
+static int
+vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+ struct vhost_scsi_ctx *vc)
+{
+ int ret = -ENXIO;
+
+ vc->head = vhost_get_vq_desc(vq, vq->iov,
+ ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
+ NULL, NULL);
+
+ pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
+ vc->head, vc->out, vc->in);
+
+ /* On error, stop handling until the next kick. */
+ if (unlikely(vc->head < 0))
+ goto done;
+
+ /* Nothing new? Wait for eventfd to tell us they refilled. */
+ if (vc->head == vq->num) {
+ if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
+ vhost_disable_notify(&vs->dev, vq);
+ ret = -EAGAIN;
+ }
+ goto done;
+ }
+
+ /*
+ * Get the size of request and response buffers.
+ * FIXME: Not correct for BIDI operation
+ */
+ vc->out_size = iov_length(vq->iov, vc->out);
+ vc->in_size = iov_length(&vq->iov[vc->out], vc->in);
+
+ /*
+ * Copy over the virtio-scsi request header, which for a
+ * ANY_LAYOUT enabled guest may span multiple iovecs, or a
+ * single iovec may contain both the header + outgoing
+ * WRITE payloads.
+ *
+ * copy_from_iter() will advance out_iter, so that it will
+ * point at the start of the outgoing WRITE payload, if
+ * DMA_TO_DEVICE is set.
+ */
+ iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size);
+ ret = 0;
+
+done:
+ return ret;
+}
+
+static int
+vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc)
+{
+ if (unlikely(vc->in_size < vc->rsp_size)) {
+ vq_err(vq,
+ "Response buf too small, need min %zu bytes got %zu",
+ vc->rsp_size, vc->in_size);
+ return -EINVAL;
+ } else if (unlikely(vc->out_size < vc->req_size)) {
+ vq_err(vq,
+ "Request buf too small, need min %zu bytes got %zu",
+ vc->req_size, vc->out_size);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int
+vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
+ struct vhost_scsi_tpg **tpgp)
+{
+ int ret = -EIO;
+
+ if (unlikely(!copy_from_iter_full(vc->req, vc->req_size,
+ &vc->out_iter))) {
+ vq_err(vq, "Faulted on copy_from_iter\n");
+ } else if (unlikely(*vc->lunp != 1)) {
+ /* virtio-scsi spec requires byte 0 of the lun to be 1 */
+ vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
+ } else {
+ struct vhost_scsi_tpg **vs_tpg, *tpg;
+
+ vs_tpg = vq->private_data; /* validated at handler entry */
+
+ tpg = READ_ONCE(vs_tpg[*vc->target]);
+ if (unlikely(!tpg)) {
+ vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
+ } else {
+ if (tpgp)
+ *tpgp = tpg;
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
static void
vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
{
struct vhost_scsi_tpg **vs_tpg, *tpg;
struct virtio_scsi_cmd_req v_req;
struct virtio_scsi_cmd_req_pi v_req_pi;
+ struct vhost_scsi_ctx vc;
struct vhost_scsi_cmd *cmd;
- struct iov_iter out_iter, in_iter, prot_iter, data_iter;
+ struct iov_iter in_iter, prot_iter, data_iter;
u64 tag;
u32 exp_data_len, data_direction;
- unsigned int out = 0, in = 0;
- int head, ret, prot_bytes;
- size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
- size_t out_size, in_size;
+ int ret, prot_bytes;
u16 lun;
- u8 *target, *lunp, task_attr;
+ u8 task_attr;
bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
- void *req, *cdb;
+ void *cdb;
mutex_lock(&vq->mutex);
/*
@@ -828,85 +937,47 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
if (!vs_tpg)
goto out;
+ memset(&vc, 0, sizeof(vc));
+ vc.rsp_size = sizeof(struct virtio_scsi_cmd_resp);
+
vhost_disable_notify(&vs->dev, vq);
for (;;) {
- head = vhost_get_vq_desc(vq, vq->iov,
- ARRAY_SIZE(vq->iov), &out, &in,
- NULL, NULL);
- pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
- head, out, in);
- /* On error, stop handling until the next kick. */
- if (unlikely(head < 0))
- break;
- /* Nothing new? Wait for eventfd to tell us they refilled. */
- if (head == vq->num) {
- if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
- vhost_disable_notify(&vs->dev, vq);
- continue;
- }
- break;
- }
- /*
- * Check for a sane response buffer so we can report early
- * errors back to the guest.
- */
- if (unlikely(vq->iov[out].iov_len < rsp_size)) {
- vq_err(vq, "Expecting at least virtio_scsi_cmd_resp"
- " size, got %zu bytes\n", vq->iov[out].iov_len);
- break;
- }
+ ret = vhost_scsi_get_desc(vs, vq, &vc);
+ if (ret)
+ goto err;
+
/*
* Setup pointers and values based upon different virtio-scsi
* request header if T10_PI is enabled in KVM guest.
*/
if (t10_pi) {
- req = &v_req_pi;
- req_size = sizeof(v_req_pi);
- lunp = &v_req_pi.lun[0];
- target = &v_req_pi.lun[1];
+ vc.req = &v_req_pi;
+ vc.req_size = sizeof(v_req_pi);
+ vc.lunp = &v_req_pi.lun[0];
+ vc.target = &v_req_pi.lun[1];
} else {
- req = &v_req;
- req_size = sizeof(v_req);
- lunp = &v_req.lun[0];
- target = &v_req.lun[1];
+ vc.req = &v_req;
+ vc.req_size = sizeof(v_req);
+ vc.lunp = &v_req.lun[0];
+ vc.target = &v_req.lun[1];
}
- /*
- * FIXME: Not correct for BIDI operation
- */
- out_size = iov_length(vq->iov, out);
- in_size = iov_length(&vq->iov[out], in);
/*
- * Copy over the virtio-scsi request header, which for a
- * ANY_LAYOUT enabled guest may span multiple iovecs, or a
- * single iovec may contain both the header + outgoing
- * WRITE payloads.
- *
- * copy_from_iter() will advance out_iter, so that it will
- * point at the start of the outgoing WRITE payload, if
- * DMA_TO_DEVICE is set.
+ * Validate the size of request and response buffers.
+ * Check for a sane response buffer so we can report
+ * early errors back to the guest.
*/
- iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
+ ret = vhost_scsi_chk_size(vq, &vc);
+ if (ret)
+ goto err;
- if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
- vq_err(vq, "Faulted on copy_from_iter\n");
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
- }
- /* virtio-scsi spec requires byte 0 of the lun to be 1 */
- if (unlikely(*lunp != 1)) {
- vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
- }
+ ret = vhost_scsi_get_req(vq, &vc, &tpg);
+ if (ret)
+ goto err;
- tpg = READ_ONCE(vs_tpg[*target]);
- if (unlikely(!tpg)) {
- /* Target does not exist, fail the request */
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
- }
+ ret = -EIO; /* bad target on any error from here on */
+
/*
* Determine data_direction by calculating the total outgoing
* iovec sizes + incoming iovec sizes vs. virtio-scsi request +
@@ -924,17 +995,17 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
*/
prot_bytes = 0;
- if (out_size > req_size) {
+ if (vc.out_size > vc.req_size) {
data_direction = DMA_TO_DEVICE;
- exp_data_len = out_size - req_size;
- data_iter = out_iter;
- } else if (in_size > rsp_size) {
+ exp_data_len = vc.out_size - vc.req_size;
+ data_iter = vc.out_iter;
+ } else if (vc.in_size > vc.rsp_size) {
data_direction = DMA_FROM_DEVICE;
- exp_data_len = in_size - rsp_size;
+ exp_data_len = vc.in_size - vc.rsp_size;
- iov_iter_init(&in_iter, READ, &vq->iov[out], in,
- rsp_size + exp_data_len);
- iov_iter_advance(&in_iter, rsp_size);
+ iov_iter_init(&in_iter, READ, &vq->iov[vc.out], vc.in,
+ vc.rsp_size + exp_data_len);
+ iov_iter_advance(&in_iter, vc.rsp_size);
data_iter = in_iter;
} else {
data_direction = DMA_NONE;
@@ -950,21 +1021,20 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
if (data_direction != DMA_TO_DEVICE) {
vq_err(vq, "Received non zero pi_bytesout,"
" but wrong data_direction\n");
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
+ goto err;
}
prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout);
} else if (v_req_pi.pi_bytesin) {
if (data_direction != DMA_FROM_DEVICE) {
vq_err(vq, "Received non zero pi_bytesin,"
" but wrong data_direction\n");
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
+ goto err;
}
prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
}
/*
- * Set prot_iter to data_iter, and advance past any
+ * Set prot_iter to data_iter and truncate it to
+ * prot_bytes, and advance data_iter past any
* preceeding prot_bytes that may be present.
*
* Also fix up the exp_data_len to reflect only the
@@ -973,6 +1043,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
if (prot_bytes) {
exp_data_len -= prot_bytes;
prot_iter = data_iter;
+ iov_iter_truncate(&prot_iter, prot_bytes);
iov_iter_advance(&data_iter, prot_bytes);
}
tag = vhost64_to_cpu(vq, v_req_pi.tag);
@@ -996,8 +1067,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
vq_err(vq, "Received SCSI CDB with command_size: %d that"
" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
+ goto err;
}
cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
exp_data_len + prot_bytes,
@@ -1005,13 +1075,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
if (IS_ERR(cmd)) {
vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
PTR_ERR(cmd));
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
+ goto err;
}
cmd->tvc_vhost = vs;
cmd->tvc_vq = vq;
- cmd->tvc_resp_iov = vq->iov[out];
- cmd->tvc_in_iovs = in;
+ cmd->tvc_resp_iov = vq->iov[vc.out];
+ cmd->tvc_in_iovs = vc.in;
pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
cmd->tvc_cdb[0], cmd->tvc_lun);
@@ -1019,14 +1088,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
" %d\n", cmd, exp_data_len, prot_bytes, data_direction);
if (data_direction != DMA_NONE) {
- ret = vhost_scsi_mapal(cmd,
- prot_bytes, &prot_iter,
- exp_data_len, &data_iter);
- if (unlikely(ret)) {
+ if (unlikely(vhost_scsi_mapal(cmd, prot_bytes,
+ &prot_iter, exp_data_len,
+ &data_iter))) {
vq_err(vq, "Failed to map iov to sgl\n");
vhost_scsi_release_cmd(&cmd->tvc_se_cmd);
- vhost_scsi_send_bad_target(vs, vq, head, out);
- continue;
+ goto err;
}
}
/*
@@ -1034,7 +1101,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
* complete the virtio-scsi request in TCM callback context via
* vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
*/
- cmd->tvc_vq_desc = head;
+ cmd->tvc_vq_desc = vc.head;
/*
* Dispatch cmd descriptor for cmwq execution in process
* context provided by vhost_scsi_workqueue. This also ensures
@@ -1043,6 +1110,166 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
*/
INIT_WORK(&cmd->work, vhost_scsi_submission_work);
queue_work(vhost_scsi_workqueue, &cmd->work);
+ ret = 0;
+err:
+ /*
+ * ENXIO: No more requests, or read error, wait for next kick
+ * EINVAL: Invalid response buffer, drop the request
+ * EIO: Respond with bad target
+ * EAGAIN: Pending request
+ */
+ if (ret == -ENXIO)
+ break;
+ else if (ret == -EIO)
+ vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
+ }
+out:
+ mutex_unlock(&vq->mutex);
+}
+
+static void
+vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
+ struct vhost_virtqueue *vq,
+ struct vhost_scsi_ctx *vc)
+{
+ struct virtio_scsi_ctrl_tmf_resp __user *resp;
+ struct virtio_scsi_ctrl_tmf_resp rsp;
+ int ret;
+
+ pr_debug("%s\n", __func__);
+ memset(&rsp, 0, sizeof(rsp));
+ rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+ resp = vq->iov[vc->out].iov_base;
+ ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+ if (!ret)
+ vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+ else
+ pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
+}
+
+static void
+vhost_scsi_send_an_resp(struct vhost_scsi *vs,
+ struct vhost_virtqueue *vq,
+ struct vhost_scsi_ctx *vc)
+{
+ struct virtio_scsi_ctrl_an_resp __user *resp;
+ struct virtio_scsi_ctrl_an_resp rsp;
+ int ret;
+
+ pr_debug("%s\n", __func__);
+ memset(&rsp, 0, sizeof(rsp)); /* event_actual = 0 */
+ rsp.response = VIRTIO_SCSI_S_OK;
+ resp = vq->iov[vc->out].iov_base;
+ ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+ if (!ret)
+ vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+ else
+ pr_err("Faulted on virtio_scsi_ctrl_an_resp\n");
+}
+
+static void
+vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+{
+ union {
+ __virtio32 type;
+ struct virtio_scsi_ctrl_an_req an;
+ struct virtio_scsi_ctrl_tmf_req tmf;
+ } v_req;
+ struct vhost_scsi_ctx vc;
+ size_t typ_size;
+ int ret;
+
+ mutex_lock(&vq->mutex);
+ /*
+ * We can handle the vq only after the endpoint is setup by calling the
+ * VHOST_SCSI_SET_ENDPOINT ioctl.
+ */
+ if (!vq->private_data)
+ goto out;
+
+ memset(&vc, 0, sizeof(vc));
+
+ vhost_disable_notify(&vs->dev, vq);
+
+ for (;;) {
+ ret = vhost_scsi_get_desc(vs, vq, &vc);
+ if (ret)
+ goto err;
+
+ /*
+ * Get the request type first in order to setup
+ * other parameters dependent on the type.
+ */
+ vc.req = &v_req.type;
+ typ_size = sizeof(v_req.type);
+
+ if (unlikely(!copy_from_iter_full(vc.req, typ_size,
+ &vc.out_iter))) {
+ vq_err(vq, "Faulted on copy_from_iter tmf type\n");
+ /*
+ * The size of the response buffer depends on the
+ * request type and must be validated against it.
+ * Since the request type is not known, don't send
+ * a response.
+ */
+ continue;
+ }
+
+ switch (v_req.type) {
+ case VIRTIO_SCSI_T_TMF:
+ vc.req = &v_req.tmf;
+ vc.req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
+ vc.rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
+ vc.lunp = &v_req.tmf.lun[0];
+ vc.target = &v_req.tmf.lun[1];
+ break;
+ case VIRTIO_SCSI_T_AN_QUERY:
+ case VIRTIO_SCSI_T_AN_SUBSCRIBE:
+ vc.req = &v_req.an;
+ vc.req_size = sizeof(struct virtio_scsi_ctrl_an_req);
+ vc.rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
+ vc.lunp = &v_req.an.lun[0];
+ vc.target = NULL;
+ break;
+ default:
+ vq_err(vq, "Unknown control request %d", v_req.type);
+ continue;
+ }
+
+ /*
+ * Validate the size of request and response buffers.
+ * Check for a sane response buffer so we can report
+ * early errors back to the guest.
+ */
+ ret = vhost_scsi_chk_size(vq, &vc);
+ if (ret)
+ goto err;
+
+ /*
+ * Get the rest of the request now that its size is known.
+ */
+ vc.req += typ_size;
+ vc.req_size -= typ_size;
+
+ ret = vhost_scsi_get_req(vq, &vc, NULL);
+ if (ret)
+ goto err;
+
+ if (v_req.type == VIRTIO_SCSI_T_TMF)
+ vhost_scsi_send_tmf_reject(vs, vq, &vc);
+ else
+ vhost_scsi_send_an_resp(vs, vq, &vc);
+err:
+ /*
+ * ENXIO: No more requests, or read error, wait for next kick
+ * EINVAL: Invalid response buffer, drop the request
+ * EIO: Respond with bad target
+ * EAGAIN: Pending request
+ */
+ if (ret == -ENXIO)
+ break;
+ else if (ret == -EIO)
+ vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
}
out:
mutex_unlock(&vq->mutex);
@@ -1050,7 +1277,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
{
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+ poll.work);
+ struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
+
pr_debug("%s: The handling func for control queue.\n", __func__);
+ vhost_scsi_ctl_handle_vq(vs, vq);
}
static void
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f52008b..3a5f81a 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -30,6 +30,7 @@
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/interval_tree_generic.h>
+#include <linux/nospec.h>
#include "vhost.h"
@@ -1387,6 +1388,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
if (idx >= d->nvqs)
return -ENOBUFS;
+ idx = array_index_nospec(idx, d->nvqs);
vq = d->vqs[idx];
mutex_lock(&vq->mutex);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d1c1f62..728ecd1 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -41,13 +41,34 @@
#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
+ __GFP_NOMEMALLOC)
+/* The order of free page blocks to report to host */
+#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+/* The size of a free page block in bytes */
+#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
+ (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+
#ifdef CONFIG_BALLOON_COMPACTION
static struct vfsmount *balloon_mnt;
#endif
+enum virtio_balloon_vq {
+ VIRTIO_BALLOON_VQ_INFLATE,
+ VIRTIO_BALLOON_VQ_DEFLATE,
+ VIRTIO_BALLOON_VQ_STATS,
+ VIRTIO_BALLOON_VQ_FREE_PAGE,
+ VIRTIO_BALLOON_VQ_MAX
+};
+
struct virtio_balloon {
struct virtio_device *vdev;
- struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+ struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+
+ /* Balloon's own wq for cpu-intensive work items */
+ struct workqueue_struct *balloon_wq;
+ /* The free page reporting work item submitted to the balloon wq */
+ struct work_struct report_free_page_work;
/* The balloon servicing is delegated to a freezable workqueue. */
struct work_struct update_balloon_stats_work;
@@ -57,6 +78,18 @@ struct virtio_balloon {
spinlock_t stop_update_lock;
bool stop_update;
+ /* The list of allocated free pages, waiting to be given back to mm */
+ struct list_head free_page_list;
+ spinlock_t free_page_list_lock;
+ /* The number of free page blocks on the above list */
+ unsigned long num_free_page_blocks;
+ /* The cmd id received from host */
+ u32 cmd_id_received;
+ /* The cmd id that is actively in use */
+ __virtio32 cmd_id_active;
+ /* Buffer to store the stop sign */
+ __virtio32 cmd_id_stop;
+
/* Waiting for host to ack the pages we released. */
wait_queue_head_t acked;
@@ -320,17 +353,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
virtqueue_kick(vq);
}
-static void virtballoon_changed(struct virtio_device *vdev)
-{
- struct virtio_balloon *vb = vdev->priv;
- unsigned long flags;
-
- spin_lock_irqsave(&vb->stop_update_lock, flags);
- if (!vb->stop_update)
- queue_work(system_freezable_wq, &vb->update_balloon_size_work);
- spin_unlock_irqrestore(&vb->stop_update_lock, flags);
-}
-
static inline s64 towards_target(struct virtio_balloon *vb)
{
s64 target;
@@ -347,6 +369,60 @@ static inline s64 towards_target(struct virtio_balloon *vb)
return target - vb->num_pages;
}
+/* Gives back @num_to_return blocks of free pages to mm. */
+static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
+ unsigned long num_to_return)
+{
+ struct page *page;
+ unsigned long num_returned;
+
+ spin_lock_irq(&vb->free_page_list_lock);
+ for (num_returned = 0; num_returned < num_to_return; num_returned++) {
+ page = balloon_page_pop(&vb->free_page_list);
+ if (!page)
+ break;
+ free_pages((unsigned long)page_address(page),
+ VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ }
+ vb->num_free_page_blocks -= num_returned;
+ spin_unlock_irq(&vb->free_page_list_lock);
+
+ return num_returned;
+}
+
+static void virtballoon_changed(struct virtio_device *vdev)
+{
+ struct virtio_balloon *vb = vdev->priv;
+ unsigned long flags;
+ s64 diff = towards_target(vb);
+
+ if (diff) {
+ spin_lock_irqsave(&vb->stop_update_lock, flags);
+ if (!vb->stop_update)
+ queue_work(system_freezable_wq,
+ &vb->update_balloon_size_work);
+ spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+ }
+
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ virtio_cread(vdev, struct virtio_balloon_config,
+ free_page_report_cmd_id, &vb->cmd_id_received);
+ if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
+ /* Pass ULONG_MAX to give back all the free pages */
+ return_free_pages_to_mm(vb, ULONG_MAX);
+ } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
+ vb->cmd_id_received !=
+ virtio32_to_cpu(vdev, vb->cmd_id_active)) {
+ spin_lock_irqsave(&vb->stop_update_lock, flags);
+ if (!vb->stop_update) {
+ queue_work(vb->balloon_wq,
+ &vb->report_free_page_work);
+ }
+ spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+ }
+ }
+}
+
static void update_balloon_size(struct virtio_balloon *vb)
{
u32 actual = vb->num_pages;
@@ -389,26 +465,44 @@ static void update_balloon_size_func(struct work_struct *work)
static int init_vqs(struct virtio_balloon *vb)
{
- struct virtqueue *vqs[3];
- vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- static const char * const names[] = { "inflate", "deflate", "stats" };
- int err, nvqs;
+ struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
+ vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
+ const char *names[VIRTIO_BALLOON_VQ_MAX];
+ int err;
/*
- * We expect two virtqueues: inflate and deflate, and
- * optionally stat.
+ * Inflateq and deflateq are used unconditionally. The names[]
+ * will be NULL if the related feature is not enabled, which will
+ * cause no allocation for the corresponding virtqueue in find_vqs.
*/
- nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
- err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+ callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
+ names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
+ callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
+ names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
+ names[VIRTIO_BALLOON_VQ_STATS] = NULL;
+ names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+ names[VIRTIO_BALLOON_VQ_STATS] = "stats";
+ callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
+ }
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
+ callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+ }
+
+ err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
+ vqs, callbacks, names, NULL, NULL);
if (err)
return err;
- vb->inflate_vq = vqs[0];
- vb->deflate_vq = vqs[1];
+ vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
+ vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
struct scatterlist sg;
unsigned int num_stats;
- vb->stats_vq = vqs[2];
+ vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
/*
* Prime this virtqueue with one buffer so the hypervisor can
@@ -426,9 +520,145 @@ static int init_vqs(struct virtio_balloon *vb)
}
virtqueue_kick(vb->stats_vq);
}
+
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
+
return 0;
}
+static int send_cmd_id_start(struct virtio_balloon *vb)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq = vb->free_page_vq;
+ int err, unused;
+
+ /* Detach all the used buffers from the vq */
+ while (virtqueue_get_buf(vq, &unused))
+ ;
+
+ vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received);
+ sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
+ err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
+ if (!err)
+ virtqueue_kick(vq);
+ return err;
+}
+
+static int send_cmd_id_stop(struct virtio_balloon *vb)
+{
+ struct scatterlist sg;
+ struct virtqueue *vq = vb->free_page_vq;
+ int err, unused;
+
+ /* Detach all the used buffers from the vq */
+ while (virtqueue_get_buf(vq, &unused))
+ ;
+
+ sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
+ err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
+ if (!err)
+ virtqueue_kick(vq);
+ return err;
+}
+
+static int get_free_page_and_send(struct virtio_balloon *vb)
+{
+ struct virtqueue *vq = vb->free_page_vq;
+ struct page *page;
+ struct scatterlist sg;
+ int err, unused;
+ void *p;
+
+ /* Detach all the used buffers from the vq */
+ while (virtqueue_get_buf(vq, &unused))
+ ;
+
+ page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
+ VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ /*
+ * When the allocation returns NULL, it indicates that we have got all
+ * the possible free pages, so return -EINTR to stop.
+ */
+ if (!page)
+ return -EINTR;
+
+ p = page_address(page);
+ sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
+ /* There is always 1 entry reserved for the cmd id to use. */
+ if (vq->num_free > 1) {
+ err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
+ if (unlikely(err)) {
+ free_pages((unsigned long)p,
+ VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ return err;
+ }
+ virtqueue_kick(vq);
+ spin_lock_irq(&vb->free_page_list_lock);
+ balloon_page_push(&vb->free_page_list, page);
+ vb->num_free_page_blocks++;
+ spin_unlock_irq(&vb->free_page_list_lock);
+ } else {
+ /*
+ * The vq has no available entry to add this page block, so
+ * just free it.
+ */
+ free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ }
+
+ return 0;
+}
+
+static int send_free_pages(struct virtio_balloon *vb)
+{
+ int err;
+ u32 cmd_id_active;
+
+ while (1) {
+ /*
+ * If a stop id or a new cmd id was just received from host,
+ * stop the reporting.
+ */
+ cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
+ if (cmd_id_active != vb->cmd_id_received)
+ break;
+
+ /*
+ * The free page blocks are allocated and sent to host one by
+ * one.
+ */
+ err = get_free_page_and_send(vb);
+ if (err == -EINTR)
+ break;
+ else if (unlikely(err))
+ return err;
+ }
+
+ return 0;
+}
+
+static void report_free_page_func(struct work_struct *work)
+{
+ int err;
+ struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
+ report_free_page_work);
+ struct device *dev = &vb->vdev->dev;
+
+ /* Start by sending the received cmd id to host with an outbuf. */
+ err = send_cmd_id_start(vb);
+ if (unlikely(err))
+ dev_err(dev, "Failed to send a start id, err = %d\n", err);
+
+ err = send_free_pages(vb);
+ if (unlikely(err))
+ dev_err(dev, "Failed to send a free page, err = %d\n", err);
+
+ /* End by sending a stop id to host with an outbuf. */
+ err = send_cmd_id_stop(vb);
+ if (unlikely(err))
+ dev_err(dev, "Failed to send a stop id, err = %d\n", err);
+}
+
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -512,6 +742,39 @@ static struct file_system_type balloon_fs = {
#endif /* CONFIG_BALLOON_COMPACTION */
+static unsigned long shrink_free_pages(struct virtio_balloon *vb,
+ unsigned long pages_to_free)
+{
+ unsigned long blocks_to_free, blocks_freed;
+
+ pages_to_free = round_up(pages_to_free,
+ 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+ blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
+
+ return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+}
+
+static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
+ unsigned long pages_to_free)
+{
+ unsigned long pages_freed = 0;
+
+ /*
+ * One invocation of leak_balloon can deflate at most
+ * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
+ * multiple times to deflate pages till reaching pages_to_free.
+ */
+ while (vb->num_pages && pages_to_free) {
+ pages_freed += leak_balloon(vb, pages_to_free) /
+ VIRTIO_BALLOON_PAGES_PER_PAGE;
+ pages_to_free -= pages_freed;
+ }
+ update_balloon_size(vb);
+
+ return pages_freed;
+}
+
static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
struct shrink_control *sc)
{
@@ -521,18 +784,15 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
- /*
- * One invocation of leak_balloon can deflate at most
- * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
- * multiple times to deflate pages till reaching pages_to_free.
- */
- while (vb->num_pages && pages_to_free) {
- pages_to_free -= pages_freed;
- pages_freed += leak_balloon(vb, pages_to_free);
- }
- update_balloon_size(vb);
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ pages_freed = shrink_free_pages(vb, pages_to_free);
- return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
+ if (pages_freed >= pages_to_free)
+ return pages_freed;
+
+ pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
+
+ return pages_freed;
}
static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -540,8 +800,12 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
{
struct virtio_balloon *vb = container_of(shrinker,
struct virtio_balloon, shrinker);
+ unsigned long count;
- return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+ count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+ count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+
+ return count;
}
static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -561,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
+ __u32 poison_val;
int err;
if (!vdev->config->get) {
@@ -604,6 +869,36 @@ static int virtballoon_probe(struct virtio_device *vdev)
}
vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
#endif
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ /*
+ * There is always one entry reserved for cmd id, so the ring
+ * size needs to be at least two to report free page hints.
+ */
+ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
+ err = -ENOSPC;
+ goto out_del_vqs;
+ }
+ vb->balloon_wq = alloc_workqueue("balloon-wq",
+ WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
+ if (!vb->balloon_wq) {
+ err = -ENOMEM;
+ goto out_del_vqs;
+ }
+ INIT_WORK(&vb->report_free_page_work, report_free_page_func);
+ vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
+ vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
+ VIRTIO_BALLOON_CMD_ID_STOP);
+ vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
+ VIRTIO_BALLOON_CMD_ID_STOP);
+ vb->num_free_page_blocks = 0;
+ spin_lock_init(&vb->free_page_list_lock);
+ INIT_LIST_HEAD(&vb->free_page_list);
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+ memset(&poison_val, PAGE_POISON, sizeof(poison_val));
+ virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+ poison_val, &poison_val);
+ }
+ }
/*
* We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
* shrinker needs to be registered to relieve memory pressure.
@@ -611,7 +906,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
err = virtio_balloon_register_shrinker(vb);
if (err)
- goto out_del_vqs;
+ goto out_del_balloon_wq;
}
virtio_device_ready(vdev);
@@ -619,6 +914,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
virtballoon_changed(vdev);
return 0;
+out_del_balloon_wq:
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+ destroy_workqueue(vb->balloon_wq);
out_del_vqs:
vdev->config->del_vqs(vdev);
out_free_vb:
@@ -652,6 +950,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
cancel_work_sync(&vb->update_balloon_size_work);
cancel_work_sync(&vb->update_balloon_stats_work);
+ if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+ cancel_work_sync(&vb->report_free_page_work);
+ destroy_workqueue(vb->balloon_wq);
+ }
+
remove_common(vb);
#ifdef CONFIG_BALLOON_COMPACTION
if (vb->vb_dev_info.inode)
@@ -695,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev)
static int virtballoon_validate(struct virtio_device *vdev)
{
+ if (!page_poisoning_enabled())
+ __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+
__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
return 0;
}
@@ -703,6 +1009,8 @@ static unsigned int features[] = {
VIRTIO_BALLOON_F_MUST_TELL_HOST,
VIRTIO_BALLOON_F_STATS_VQ,
VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+ VIRTIO_BALLOON_F_FREE_PAGE_HINT,
+ VIRTIO_BALLOON_F_PAGE_POISON,
};
static struct virtio_driver virtio_balloon_driver = {
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index b1092fb..2e5d845 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -137,13 +137,13 @@ static void pvcalls_conn_back_read(void *opaque)
if (masked_prod < masked_cons) {
vec[0].iov_base = data->in + masked_prod;
vec[0].iov_len = wanted;
- iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted);
+ iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted);
} else {
vec[0].iov_base = data->in + masked_prod;
vec[0].iov_len = array_size - masked_prod;
vec[1].iov_base = data->in;
vec[1].iov_len = wanted - vec[0].iov_len;
- iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted);
+ iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted);
}
atomic_set(&map->read, 0);
@@ -195,13 +195,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
vec[0].iov_len = size;
- iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size);
+ iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size);
} else {
vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
vec[1].iov_base = data->out;
vec[1].iov_len = size - vec[0].iov_len;
- iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size);
+ iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size);
}
atomic_set(&map->write, 0);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index e1cbdfd..0bcbcc2 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -65,7 +65,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
if (retval == 0)
return retval;
- iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
+ iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE);
retval = p9_client_read(fid, page_offset(page), &to, &err);
if (err) {
@@ -175,7 +175,7 @@ static int v9fs_vfs_writepage_locked(struct page *page)
bvec.bv_page = page;
bvec.bv_offset = 0;
bvec.bv_len = len;
- iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
+ iov_iter_bvec(&from, WRITE, &bvec, 1, len);
/* We should have writeback_fid always set */
BUG_ON(!v9inode->writeback_fid);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index cb6c403..0074514 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -123,7 +123,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
if (rdir->tail == rdir->head) {
struct iov_iter to;
int n;
- iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
+ iov_iter_kvec(&to, READ, &kvec, 1, buflen);
n = p9_client_read(file->private_data, ctx->pos, &to,
&err);
if (err)
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 352abc3..ac8ff8c 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -32,7 +32,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
struct iov_iter to;
int err;
- iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
+ iov_iter_kvec(&to, READ, &kvec, 1, buffer_size);
attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
if (IS_ERR(attr_fid)) {
@@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
struct iov_iter from;
int retval, err;
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
+ iov_iter_kvec(&from, WRITE, &kvec, 1, value_len);
p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
name, value_len, flags);
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index ebba3b1..701aaa9 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -27,3 +27,15 @@
help
Say Y here if you want AFS data to be cached locally on disk through
the generic filesystem cache manager
+
+config AFS_DEBUG_CURSOR
+ bool "AFS server cursor debugging"
+ depends on AFS_FS
+ help
+ Say Y here to cause the contents of a server cursor to be dumped to
+ the dmesg log if the server rotation algorithm fails to successfully
+ contact a server.
+
+ See <file:Documentation/filesystems/afs.txt> for more information.
+
+ If unsure, say N.
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 5468740..0738e2b 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -17,6 +17,7 @@
file.o \
flock.o \
fsclient.o \
+ fs_probe.o \
inode.o \
main.o \
misc.o \
@@ -29,9 +30,13 @@
super.o \
netdevices.o \
vlclient.o \
+ vl_list.o \
+ vl_probe.o \
+ vl_rotate.o \
volume.o \
write.o \
- xattr.o
+ xattr.o \
+ yfsclient.o
kafs-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_AFS_FS) := kafs.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 55a756c..967db33 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
/*
* Parse a text string consisting of delimited addresses.
*/
-struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
- char delim,
- unsigned short service,
- unsigned short port)
+struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
+ const char *text, size_t len,
+ char delim,
+ unsigned short service,
+ unsigned short port)
{
+ struct afs_vlserver_list *vllist;
struct afs_addr_list *alist;
const char *p, *end = text + len;
+ const char *problem;
unsigned int nr = 0;
+ int ret = -ENOMEM;
_enter("%*.*s,%c", (int)len, (int)len, text, delim);
- if (!len)
+ if (!len) {
+ _leave(" = -EDESTADDRREQ [empty]");
return ERR_PTR(-EDESTADDRREQ);
+ }
if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
delim = ',';
@@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
/* Count the addresses */
p = text;
do {
- if (!*p)
- return ERR_PTR(-EINVAL);
+ if (!*p) {
+ problem = "nul";
+ goto inval;
+ }
if (*p == delim)
continue;
nr++;
if (*p == '[') {
p++;
- if (p == end)
- return ERR_PTR(-EINVAL);
+ if (p == end) {
+ problem = "brace1";
+ goto inval;
+ }
p = memchr(p, ']', end - p);
- if (!p)
- return ERR_PTR(-EINVAL);
+ if (!p) {
+ problem = "brace2";
+ goto inval;
+ }
p++;
if (p >= end)
break;
@@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
- alist = afs_alloc_addrlist(nr, service, port);
- if (!alist)
+ vllist = afs_alloc_vlserver_list(1);
+ if (!vllist)
return ERR_PTR(-ENOMEM);
+ vllist->nr_servers = 1;
+ vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
+ if (!vllist->servers[0].server)
+ goto error_vl;
+
+ alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
+ if (!alist)
+ goto error;
+
/* Extract the addresses */
p = text;
do {
@@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
break;
}
- if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop))
+ if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
family = AF_INET;
- else if (in6_pton(p, q - p, (u8 *)x, -1, &stop))
+ } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
family = AF_INET6;
- else
+ } else {
+ problem = "family";
goto bad_address;
-
- if (stop != q)
- goto bad_address;
+ }
p = q;
+ if (stop != p) {
+ problem = "nostop";
+ goto bad_address;
+ }
+
if (q < end && *q == ']')
p++;
@@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
/* Port number specification "+1234" */
xport = 0;
p++;
- if (p >= end || !isdigit(*p))
+ if (p >= end || !isdigit(*p)) {
+ problem = "port";
goto bad_address;
+ }
do {
xport *= 10;
xport += *p - '0';
- if (xport > 65535)
+ if (xport > 65535) {
+ problem = "pval";
goto bad_address;
+ }
p++;
} while (p < end && isdigit(*p));
} else if (*p == delim) {
p++;
} else {
+ problem = "weird";
goto bad_address;
}
}
@@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
} while (p < end);
+ rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
_leave(" = [nr %u]", alist->nr_addrs);
- return alist;
+ return vllist;
-bad_address:
- kfree(alist);
+inval:
+ _leave(" = -EINVAL [%s %zu %*.*s]",
+ problem, p - text, (int)len, (int)len, text);
return ERR_PTR(-EINVAL);
+bad_address:
+ _leave(" = -EINVAL [%s %zu %*.*s]",
+ problem, p - text, (int)len, (int)len, text);
+ ret = -EINVAL;
+error:
+ afs_put_addrlist(alist);
+error_vl:
+ afs_put_vlserverlist(net, vllist);
+ return ERR_PTR(ret);
}
/*
@@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1,
/*
* Perform a DNS query for VL servers and build a up an address list.
*/
-struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
{
- struct afs_addr_list *alist;
- char *vllist = NULL;
+ struct afs_vlserver_list *vllist;
+ char *result = NULL;
int ret;
_enter("%s", cell->name);
- ret = dns_query("afsdb", cell->name, cell->name_len,
- "", &vllist, _expiry);
- if (ret < 0)
+ ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
+ &result, _expiry);
+ if (ret < 0) {
+ _leave(" = %d [dns]", ret);
return ERR_PTR(ret);
-
- alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
- VL_SERVICE, AFS_VL_PORT);
- if (IS_ERR(alist)) {
- kfree(vllist);
- if (alist != ERR_PTR(-ENOMEM))
- pr_err("Failed to parse DNS data\n");
- return alist;
}
- kfree(vllist);
- return alist;
+ if (*_expiry == 0)
+ *_expiry = ktime_get_real_seconds() + 60;
+
+ if (ret > 1 && result[0] == 0)
+ vllist = afs_extract_vlserver_list(cell, result, ret);
+ else
+ vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
+ VL_SERVICE, AFS_VL_PORT);
+ kfree(result);
+ if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
+ pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));
+
+ return vllist;
}
/*
@@ -258,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = htons(port);
@@ -296,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin6);
srx->transport.sin6.sin6_family = AF_INET6;
srx->transport.sin6.sin6_port = htons(port);
@@ -308,25 +357,33 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
*/
bool afs_iterate_addresses(struct afs_addr_cursor *ac)
{
- _enter("%hu+%hd", ac->start, (short)ac->index);
+ unsigned long set, failed;
+ int index;
if (!ac->alist)
return false;
- if (ac->begun) {
- ac->index++;
- if (ac->index == ac->alist->nr_addrs)
- ac->index = 0;
+ set = ac->alist->responded;
+ failed = ac->alist->failed;
+ _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
- if (ac->index == ac->start) {
- ac->error = -EDESTADDRREQ;
- return false;
- }
- }
+ ac->nr_iterations++;
- ac->begun = true;
+ set &= ~(failed | ac->tried);
+
+ if (!set)
+ return false;
+
+ index = READ_ONCE(ac->alist->preferred);
+ if (test_bit(index, &set))
+ goto selected;
+
+ index = __ffs(set);
+
+selected:
+ ac->index = index;
+ set_bit(index, &ac->tried);
ac->responded = false;
- ac->addr = &ac->alist->addrs[ac->index];
return true;
}
@@ -339,53 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
alist = ac->alist;
if (alist) {
- if (ac->responded && ac->index != ac->start)
- WRITE_ONCE(alist->index, ac->index);
+ if (ac->responded &&
+ ac->index != alist->preferred &&
+ test_bit(ac->alist->preferred, &ac->tried))
+ WRITE_ONCE(alist->preferred, ac->index);
afs_put_addrlist(alist);
+ ac->alist = NULL;
}
- ac->addr = NULL;
- ac->alist = NULL;
- ac->begun = false;
return ac->error;
}
-
-/*
- * Set the address cursor for iterating over VL servers.
- */
-int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
-{
- struct afs_addr_list *alist;
- int ret;
-
- if (!rcu_access_pointer(cell->vl_addrs)) {
- ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
- TASK_INTERRUPTIBLE);
- if (ret < 0)
- return ret;
-
- if (!rcu_access_pointer(cell->vl_addrs) &&
- ktime_get_real_seconds() < cell->dns_expiry)
- return cell->error;
- }
-
- read_lock(&cell->vl_addrs_lock);
- alist = rcu_dereference_protected(cell->vl_addrs,
- lockdep_is_held(&cell->vl_addrs_lock));
- if (alist->nr_addrs > 0)
- afs_get_addrlist(alist);
- else
- alist = NULL;
- read_unlock(&cell->vl_addrs_lock);
-
- if (!alist)
- return -EDESTADDRREQ;
-
- ac->alist = alist;
- ac->addr = NULL;
- ac->start = READ_ONCE(alist->index);
- ac->index = ac->start;
- ac->error = 0;
- ac->begun = false;
- return 0;
-}
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index b4ff1f7..d12ffb4 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -23,9 +23,9 @@
#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
-typedef unsigned afs_volid_t;
-typedef unsigned afs_vnodeid_t;
-typedef unsigned long long afs_dataversion_t;
+typedef u64 afs_volid_t;
+typedef u64 afs_vnodeid_t;
+typedef u64 afs_dataversion_t;
typedef enum {
AFSVL_RWVOL, /* read/write volume */
@@ -52,8 +52,9 @@ typedef enum {
*/
struct afs_fid {
afs_volid_t vid; /* volume ID */
- afs_vnodeid_t vnode; /* file index within volume */
- unsigned unique; /* unique ID number (file index version) */
+ afs_vnodeid_t vnode; /* Lower 64-bits of file index within volume */
+ u32 vnode_hi; /* Upper 32-bits of file index */
+ u32 unique; /* unique ID number (file index version) */
};
/*
@@ -67,14 +68,14 @@ typedef enum {
} afs_callback_type_t;
struct afs_callback {
+ time64_t expires_at; /* Time at which expires */
unsigned version; /* Callback version */
- unsigned expiry; /* Time at which expires */
afs_callback_type_t type; /* Type of callback */
};
struct afs_callback_break {
struct afs_fid fid; /* File identifier */
- struct afs_callback cb; /* Callback details */
+ //struct afs_callback cb; /* Callback details */
};
#define AFSCBMAX 50 /* maximum callbacks transferred per bulk op */
@@ -129,19 +130,18 @@ typedef u32 afs_access_t;
struct afs_file_status {
u64 size; /* file size */
afs_dataversion_t data_version; /* current data version */
- time_t mtime_client; /* last time client changed data */
- time_t mtime_server; /* last time server changed data */
- unsigned abort_code; /* Abort if bulk-fetching this failed */
-
- afs_file_type_t type; /* file type */
- unsigned nlink; /* link count */
- u32 author; /* author ID */
- u32 owner; /* owner ID */
- u32 group; /* group ID */
+ struct timespec64 mtime_client; /* Last time client changed data */
+ struct timespec64 mtime_server; /* Last time server changed data */
+ s64 author; /* author ID */
+ s64 owner; /* owner ID */
+ s64 group; /* group ID */
afs_access_t caller_access; /* access rights for authenticated caller */
afs_access_t anon_access; /* access rights for unauthenticated caller */
umode_t mode; /* UNIX mode */
+ afs_file_type_t type; /* file type */
+ u32 nlink; /* link count */
s32 lock_count; /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
+ u32 abort_code; /* Abort if bulk-fetching this failed */
};
/*
@@ -158,25 +158,27 @@ struct afs_file_status {
* AFS volume synchronisation information
*/
struct afs_volsync {
- time_t creation; /* volume creation time */
+ time64_t creation; /* volume creation time */
};
/*
* AFS volume status record
*/
struct afs_volume_status {
- u32 vid; /* volume ID */
- u32 parent_id; /* parent volume ID */
+ afs_volid_t vid; /* volume ID */
+ afs_volid_t parent_id; /* parent volume ID */
u8 online; /* true if volume currently online and available */
u8 in_service; /* true if volume currently in service */
u8 blessed; /* same as in_service */
u8 needs_salvage; /* true if consistency checking required */
u32 type; /* volume type (afs_voltype_t) */
- u32 min_quota; /* minimum space set aside (blocks) */
- u32 max_quota; /* maximum space this volume may occupy (blocks) */
- u32 blocks_in_use; /* space this volume currently occupies (blocks) */
- u32 part_blocks_avail; /* space available in volume's partition */
- u32 part_max_blocks; /* size of volume's partition */
+ u64 min_quota; /* minimum space set aside (blocks) */
+ u64 max_quota; /* maximum space this volume may occupy (blocks) */
+ u64 blocks_in_use; /* space this volume currently occupies (blocks) */
+ u64 part_blocks_avail; /* space available in volume's partition */
+ u64 part_max_blocks; /* size of volume's partition */
+ s64 vol_copy_date;
+ s64 vol_backup_date;
};
#define AFS_BLOCK_SIZE 1024
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index b1c31ec..f6d0a21e 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -49,7 +49,7 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
struct afs_vnode *vnode = cookie_netfs_data;
struct afs_vnode_cache_aux aux;
- _enter("{%x,%x,%llx},%p,%u",
+ _enter("{%llx,%x,%llx},%p,%u",
vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
buffer, buflen);
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 5f261fb..1c7955f 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -210,12 +210,10 @@ void afs_init_callback_state(struct afs_server *server)
/*
* actually break a callback
*/
-void afs_break_callback(struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode)
{
_enter("");
- write_seqlock(&vnode->cb_lock);
-
clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
vnode->cb_break++;
@@ -230,7 +228,12 @@ void afs_break_callback(struct afs_vnode *vnode)
afs_lock_may_be_available(vnode);
spin_unlock(&vnode->lock);
}
+}
+void afs_break_callback(struct afs_vnode *vnode)
+{
+ write_seqlock(&vnode->cb_lock);
+ __afs_break_callback(vnode);
write_sequnlock(&vnode->cb_lock);
}
@@ -310,14 +313,10 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
/* TODO: Sort the callback break list by volume ID */
for (; count > 0; callbacks++, count--) {
- _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }",
+ _debug("- Fid { vl=%08llx n=%llu u=%u }",
callbacks->fid.vid,
callbacks->fid.vnode,
- callbacks->fid.unique,
- callbacks->cb.version,
- callbacks->cb.expiry,
- callbacks->cb.type
- );
+ callbacks->fid.unique);
afs_break_one_callback(server, &callbacks->fid);
}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 6127f0f..cf445db 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,6 +20,8 @@
#include "internal.h"
static unsigned __read_mostly afs_cell_gc_delay = 10;
+static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
+static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
static void afs_manage_cell(struct work_struct *);
@@ -119,7 +121,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
*/
static struct afs_cell *afs_alloc_cell(struct afs_net *net,
const char *name, unsigned int namelen,
- const char *vllist)
+ const char *addresses)
{
struct afs_cell *cell;
int i, ret;
@@ -134,7 +136,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
if (namelen == 5 && memcmp(name, "@cell", 5) == 0)
return ERR_PTR(-EINVAL);
- _enter("%*.*s,%s", namelen, namelen, name, vllist);
+ _enter("%*.*s,%s", namelen, namelen, name, addresses);
cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
if (!cell) {
@@ -153,23 +155,26 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
(1 << AFS_CELL_FL_NO_LOOKUP_YET));
INIT_LIST_HEAD(&cell->proc_volumes);
rwlock_init(&cell->proc_lock);
- rwlock_init(&cell->vl_addrs_lock);
+ rwlock_init(&cell->vl_servers_lock);
/* Fill in the VL server list if we were given a list of addresses to
* use.
*/
- if (vllist) {
- struct afs_addr_list *alist;
+ if (addresses) {
+ struct afs_vlserver_list *vllist;
- alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
- VL_SERVICE, AFS_VL_PORT);
- if (IS_ERR(alist)) {
- ret = PTR_ERR(alist);
+ vllist = afs_parse_text_addrs(net,
+ addresses, strlen(addresses), ':',
+ VL_SERVICE, AFS_VL_PORT);
+ if (IS_ERR(vllist)) {
+ ret = PTR_ERR(vllist);
goto parse_failed;
}
- rcu_assign_pointer(cell->vl_addrs, alist);
+ rcu_assign_pointer(cell->vl_servers, vllist);
cell->dns_expiry = TIME64_MAX;
+ } else {
+ cell->dns_expiry = ktime_get_real_seconds();
}
_leave(" = %p", cell);
@@ -356,26 +361,40 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
*/
static void afs_update_cell(struct afs_cell *cell)
{
- struct afs_addr_list *alist, *old;
- time64_t now, expiry;
+ struct afs_vlserver_list *vllist, *old;
+ unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
+ unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
+ time64_t now, expiry = 0;
_enter("%s", cell->name);
- alist = afs_dns_query(cell, &expiry);
- if (IS_ERR(alist)) {
- switch (PTR_ERR(alist)) {
+ vllist = afs_dns_query(cell, &expiry);
+
+ now = ktime_get_real_seconds();
+ if (min_ttl > max_ttl)
+ max_ttl = min_ttl;
+ if (expiry < now + min_ttl)
+ expiry = now + min_ttl;
+ else if (expiry > now + max_ttl)
+ expiry = now + max_ttl;
+
+ if (IS_ERR(vllist)) {
+ switch (PTR_ERR(vllist)) {
case -ENODATA:
- /* The DNS said that the cell does not exist */
+ case -EDESTADDRREQ:
+ /* The DNS said that the cell does not exist or there
+ * weren't any addresses to be had.
+ */
set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
- cell->dns_expiry = ktime_get_real_seconds() + 61;
+ cell->dns_expiry = expiry;
break;
case -EAGAIN:
case -ECONNREFUSED:
default:
set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
- cell->dns_expiry = ktime_get_real_seconds() + 10;
+ cell->dns_expiry = now + 10;
break;
}
@@ -387,12 +406,12 @@ static void afs_update_cell(struct afs_cell *cell)
/* Exclusion on changing vl_addrs is achieved by a
* non-reentrant work item.
*/
- old = rcu_dereference_protected(cell->vl_addrs, true);
- rcu_assign_pointer(cell->vl_addrs, alist);
+ old = rcu_dereference_protected(cell->vl_servers, true);
+ rcu_assign_pointer(cell->vl_servers, vllist);
cell->dns_expiry = expiry;
if (old)
- afs_put_addrlist(old);
+ afs_put_vlserverlist(cell->net, old);
}
if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
@@ -414,7 +433,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
ASSERTCMP(atomic_read(&cell->usage), ==, 0);
- afs_put_addrlist(rcu_access_pointer(cell->vl_addrs));
+ afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
key_put(cell->anonymous_key);
kfree(cell);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 9e51d6f..8ee59728 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -16,6 +16,7 @@
#include <linux/ip.h>
#include "internal.h"
#include "afs_cm.h"
+#include "protocol_yfs.h"
static int afs_deliver_cb_init_call_back_state(struct afs_call *);
static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
@@ -30,6 +31,8 @@ static void SRXAFSCB_Probe(struct work_struct *);
static void SRXAFSCB_ProbeUuid(struct work_struct *);
static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
+static int afs_deliver_yfs_cb_callback(struct afs_call *);
+
#define CM_NAME(name) \
const char afs_SRXCB##name##_name[] __tracepoint_string = \
"CB." #name
@@ -101,12 +104,25 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
};
/*
+ * YFS CB.CallBack operation type
+ */
+static CM_NAME(YFS_CallBack);
+static const struct afs_call_type afs_SRXYFSCB_CallBack = {
+ .name = afs_SRXCBYFS_CallBack_name,
+ .deliver = afs_deliver_yfs_cb_callback,
+ .destructor = afs_cm_destructor,
+ .work = SRXAFSCB_CallBack,
+};
+
+/*
* route an incoming cache manager call
* - return T if supported, F if not
*/
bool afs_cm_incoming_call(struct afs_call *call)
{
- _enter("{CB.OP %u}", call->operation_ID);
+ _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
+
+ call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
switch (call->operation_ID) {
case CBCallBack:
@@ -127,12 +143,102 @@ bool afs_cm_incoming_call(struct afs_call *call)
case CBTellMeAboutYourself:
call->type = &afs_SRXCBTellMeAboutYourself;
return true;
+ case YFSCBCallBack:
+ if (call->service_id != YFS_CM_SERVICE)
+ return false;
+ call->type = &afs_SRXYFSCB_CallBack;
+ return true;
default:
return false;
}
}
/*
+ * Record a probe to the cache manager from a server.
+ */
+static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
+{
+ _enter("");
+
+ if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
+ !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
+ if (server->cm_epoch == call->epoch)
+ return 0;
+
+ if (!server->probe.said_rebooted) {
+ pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
+ server->probe.said_rebooted = true;
+ }
+ }
+
+ spin_lock(&server->probe_lock);
+
+ if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
+ server->cm_epoch = call->epoch;
+ server->probe.cm_epoch = call->epoch;
+ goto out;
+ }
+
+ if (server->probe.cm_probed &&
+ call->epoch != server->probe.cm_epoch &&
+ !server->probe.said_inconsistent) {
+ pr_notice("kAFS: FS endpoints inconsistent %pU\n",
+ &server->uuid);
+ server->probe.said_inconsistent = true;
+ }
+
+ if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
+ server->probe.cm_epoch = server->cm_epoch;
+
+out:
+ server->probe.cm_probed = true;
+ spin_unlock(&server->probe_lock);
+ return 0;
+}
+
+/*
+ * Find the server record by peer address and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_peer(struct afs_call *call)
+{
+ struct sockaddr_rxrpc srx;
+ struct afs_server *server;
+
+ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+
+ server = afs_find_server(call->net, &srx);
+ if (!server) {
+ trace_afs_cm_no_server(call, &srx);
+ return 0;
+ }
+
+ call->cm_server = server;
+ return afs_record_cm_probe(call, server);
+}
+
+/*
+ * Find the server record by server UUID and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_uuid(struct afs_call *call,
+ struct afs_uuid *uuid)
+{
+ struct afs_server *server;
+
+ rcu_read_lock();
+ server = afs_find_server_by_uuid(call->net, call->request);
+ rcu_read_unlock();
+ if (!server) {
+ trace_afs_cm_no_server_u(call, call->request);
+ return 0;
+ }
+
+ call->cm_server = server;
+ return afs_record_cm_probe(call, server);
+}
+
+/*
* Clean up a cache manager call.
*/
static void afs_cm_destructor(struct afs_call *call)
@@ -168,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
static int afs_deliver_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
- struct sockaddr_rxrpc srx;
__be32 *bp;
int ret, loop;
@@ -176,32 +281,32 @@ static int afs_deliver_cb_callback(struct afs_call *call)
switch (call->unmarshall) {
case 0:
- call->offset = 0;
+ afs_extract_to_tmp(call);
call->unmarshall++;
/* extract the FID array and its count in two steps */
case 1:
_debug("extract FID count");
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
call->count = ntohl(call->tmp);
_debug("FID count: %u", call->count);
if (call->count > AFSCBMAX)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_cb_fid_count);
call->buffer = kmalloc(array3_size(call->count, 3, 4),
GFP_KERNEL);
if (!call->buffer)
return -ENOMEM;
- call->offset = 0;
+ afs_extract_to_buf(call, call->count * 3 * 4);
call->unmarshall++;
case 2:
_debug("extract FID array");
- ret = afs_extract_data(call, call->buffer,
- call->count * 3 * 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
@@ -218,59 +323,46 @@ static int afs_deliver_cb_callback(struct afs_call *call)
cb->fid.vid = ntohl(*bp++);
cb->fid.vnode = ntohl(*bp++);
cb->fid.unique = ntohl(*bp++);
- cb->cb.type = AFSCM_CB_UNTYPED;
}
- call->offset = 0;
+ afs_extract_to_tmp(call);
call->unmarshall++;
/* extract the callback array and its count in two steps */
case 3:
_debug("extract CB count");
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
call->count2 = ntohl(call->tmp);
_debug("CB count: %u", call->count2);
if (call->count2 != call->count && call->count2 != 0)
- return afs_protocol_error(call, -EBADMSG);
- call->offset = 0;
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_cb_count);
+ call->_iter = &call->iter;
+ iov_iter_discard(&call->iter, READ, call->count2 * 3 * 4);
call->unmarshall++;
case 4:
- _debug("extract CB array");
- ret = afs_extract_data(call, call->buffer,
- call->count2 * 3 * 4, false);
+ _debug("extract discard %zu/%u",
+ iov_iter_count(&call->iter), call->count2 * 3 * 4);
+
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
- _debug("unmarshall CB array");
- cb = call->request;
- bp = call->buffer;
- for (loop = call->count2; loop > 0; loop--, cb++) {
- cb->cb.version = ntohl(*bp++);
- cb->cb.expiry = ntohl(*bp++);
- cb->cb.type = ntohl(*bp++);
- }
-
- call->offset = 0;
call->unmarshall++;
case 5:
break;
}
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
- return -EIO;
+ return afs_io_error(call, afs_io_error_cm_reply);
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- call->cm_server = afs_find_server(call->net, &srx);
- if (!call->cm_server)
- trace_afs_cm_no_server(call, &srx);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_peer(call);
}
/*
@@ -294,24 +386,18 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
*/
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
- struct sockaddr_rxrpc srx;
int ret;
_enter("");
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-
- ret = afs_extract_data(call, NULL, 0, false);
+ afs_extract_discard(call, 0);
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- call->cm_server = afs_find_server(call->net, &srx);
- if (!call->cm_server)
- trace_afs_cm_no_server(call, &srx);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_peer(call);
}
/*
@@ -330,16 +416,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
switch (call->unmarshall) {
case 0:
- call->offset = 0;
call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
if (!call->buffer)
return -ENOMEM;
+ afs_extract_to_buf(call, 11 * sizeof(__be32));
call->unmarshall++;
case 1:
_debug("extract UUID");
- ret = afs_extract_data(call, call->buffer,
- 11 * sizeof(__be32), false);
+ ret = afs_extract_data(call, false);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
@@ -362,7 +447,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
for (loop = 0; loop < 6; loop++)
r->node[loop] = ntohl(b[loop + 5]);
- call->offset = 0;
call->unmarshall++;
case 2:
@@ -370,17 +454,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
}
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
- return -EIO;
+ return afs_io_error(call, afs_io_error_cm_reply);
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- rcu_read_lock();
- call->cm_server = afs_find_server_by_uuid(call->net, call->request);
- rcu_read_unlock();
- if (!call->cm_server)
- trace_afs_cm_no_server_u(call, call->request);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_uuid(call, call->request);
}
/*
@@ -405,14 +483,14 @@ static int afs_deliver_cb_probe(struct afs_call *call)
_enter("");
- ret = afs_extract_data(call, NULL, 0, false);
+ afs_extract_discard(call, 0);
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
- return -EIO;
-
- return afs_queue_call_work(call);
+ return afs_io_error(call, afs_io_error_cm_reply);
+ return afs_find_cm_server_by_peer(call);
}
/*
@@ -453,16 +531,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
switch (call->unmarshall) {
case 0:
- call->offset = 0;
call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
if (!call->buffer)
return -ENOMEM;
+ afs_extract_to_buf(call, 11 * sizeof(__be32));
call->unmarshall++;
case 1:
_debug("extract UUID");
- ret = afs_extract_data(call, call->buffer,
- 11 * sizeof(__be32), false);
+ ret = afs_extract_data(call, false);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
@@ -485,7 +562,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
for (loop = 0; loop < 6; loop++)
r->node[loop] = ntohl(b[loop + 5]);
- call->offset = 0;
call->unmarshall++;
case 2:
@@ -493,9 +569,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
}
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
- return -EIO;
-
- return afs_queue_call_work(call);
+ return afs_io_error(call, afs_io_error_cm_reply);
+ return afs_find_cm_server_by_uuid(call, call->request);
}
/*
@@ -570,12 +645,88 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
_enter("");
- ret = afs_extract_data(call, NULL, 0, false);
+ afs_extract_discard(call, 0);
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
- return -EIO;
+ return afs_io_error(call, afs_io_error_cm_reply);
+ return afs_find_cm_server_by_peer(call);
+}
- return afs_queue_call_work(call);
+/*
+ * deliver request data to a YFS CB.CallBack call
+ */
+static int afs_deliver_yfs_cb_callback(struct afs_call *call)
+{
+ struct afs_callback_break *cb;
+ struct yfs_xdr_YFSFid *bp;
+ size_t size;
+ int ret, loop;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ /* extract the FID array and its count in two steps */
+ case 1:
+ _debug("extract FID count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("FID count: %u", call->count);
+ if (call->count > YFSCBMAX)
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_cb_fid_count);
+
+ size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid));
+ call->buffer = kmalloc(size, GFP_KERNEL);
+ if (!call->buffer)
+ return -ENOMEM;
+ afs_extract_to_buf(call, size);
+ call->unmarshall++;
+
+ case 2:
+ _debug("extract FID array");
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ _debug("unmarshall FID array");
+ call->request = kcalloc(call->count,
+ sizeof(struct afs_callback_break),
+ GFP_KERNEL);
+ if (!call->request)
+ return -ENOMEM;
+
+ cb = call->request;
+ bp = call->buffer;
+ for (loop = call->count; loop > 0; loop--, cb++) {
+ cb->fid.vid = xdr_to_u64(bp->volume);
+ cb->fid.vnode = xdr_to_u64(bp->vnode.lo);
+ cb->fid.vnode_hi = ntohl(bp->vnode.hi);
+ cb->fid.unique = ntohl(bp->vnode.unique);
+ bp++;
+ }
+
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ case 3:
+ break;
+ }
+
+ if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+ return afs_io_error(call, afs_io_error_cm_reply);
+
+ /* We'll need the file server record as that tells us which set of
+ * vnodes to operate upon.
+ */
+ return afs_find_cm_server_by_peer(call);
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 855bf2b..43dea3b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -138,6 +138,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
ntohs(dbuf->blocks[tmp].hdr.magic));
trace_afs_dir_check_failed(dvnode, off, i_size);
kunmap(page);
+ trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
goto error;
}
@@ -190,9 +191,11 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
retry:
i_size = i_size_read(&dvnode->vfs_inode);
if (i_size < 2048)
- return ERR_PTR(-EIO);
- if (i_size > 2048 * 1024)
+ return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
+ if (i_size > 2048 * 1024) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
return ERR_PTR(-EFBIG);
+ }
_enter("%llu", i_size);
@@ -315,7 +318,8 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
/*
* deal with one block in an AFS directory
*/
-static int afs_dir_iterate_block(struct dir_context *ctx,
+static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+ struct dir_context *ctx,
union afs_xdr_dir_block *block,
unsigned blkoff)
{
@@ -365,7 +369,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
" (len %u/%zu)",
blkoff / sizeof(union afs_xdr_dir_block),
offset, next, tmp, nlen);
- return -EIO;
+ return afs_bad(dvnode, afs_file_error_dir_over_end);
}
if (!(block->hdr.bitmap[next / 8] &
(1 << (next % 8)))) {
@@ -373,7 +377,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
" %u unmarked extension (len %u/%zu)",
blkoff / sizeof(union afs_xdr_dir_block),
offset, next, tmp, nlen);
- return -EIO;
+ return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
}
_debug("ENT[%zu.%u]: ext %u/%zu",
@@ -442,7 +446,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
*/
page = req->pages[blkoff / PAGE_SIZE];
if (!page) {
- ret = -EIO;
+ ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
break;
}
mark_page_accessed(page);
@@ -455,7 +459,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
do {
dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
sizeof(union afs_xdr_dir_block)];
- ret = afs_dir_iterate_block(ctx, dblock, blkoff);
+ ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff);
if (ret != 1) {
kunmap(page);
goto out;
@@ -548,7 +552,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
}
*fid = cookie.fid;
- _leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
+ _leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique);
return 0;
}
@@ -826,7 +830,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
struct key *key;
int ret;
- _enter("{%x:%u},%p{%pd},",
+ _enter("{%llx:%llu},%p{%pd},",
dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry);
ASSERTCMP(d_inode(dentry), ==, NULL);
@@ -896,7 +900,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (d_really_is_positive(dentry)) {
vnode = AFS_FS_I(d_inode(dentry));
- _enter("{v={%x:%u} n=%pd fl=%lx},",
+ _enter("{v={%llx:%llu} n=%pd fl=%lx},",
vnode->fid.vid, vnode->fid.vnode, dentry,
vnode->flags);
} else {
@@ -965,7 +969,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
/* if the vnode ID has changed, then the dirent points to a
* different file */
if (fid.vnode != vnode->fid.vnode) {
- _debug("%pd: dirent changed [%u != %u]",
+ _debug("%pd: dirent changed [%llu != %llu]",
dentry, fid.vnode,
vnode->fid.vnode);
goto not_found;
@@ -1085,6 +1089,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ afs_vnode_commit_status(fc, vnode, 0);
d_add(new_dentry, inode);
}
@@ -1104,7 +1109,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
mode |= S_IFDIR;
- _enter("{%x:%u},{%pd},%ho",
+ _enter("{%llx:%llu},{%pd},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
key = afs_request_key(dvnode->volume->cell);
@@ -1169,12 +1174,12 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
static int afs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct afs_fs_cursor fc;
- struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
u64 data_version = dvnode->status.data_version;
int ret;
- _enter("{%x:%u},{%pd}",
+ _enter("{%llx:%llu},{%pd}",
dvnode->fid.vid, dvnode->fid.vnode, dentry);
key = afs_request_key(dvnode->volume->cell);
@@ -1183,11 +1188,19 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
goto error;
}
+ /* Try to make sure we have a callback promise on the victim. */
+ if (d_really_is_positive(dentry)) {
+ vnode = AFS_FS_I(d_inode(dentry));
+ ret = afs_validate(vnode, key);
+ if (ret < 0)
+ goto error_key;
+ }
+
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_remove(&fc, dentry->d_name.name, true,
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, true,
data_version);
}
@@ -1201,6 +1214,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
}
}
+error_key:
key_put(key);
error:
return ret;
@@ -1231,7 +1245,9 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
if (d_really_is_positive(dentry)) {
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
- if (dir_valid) {
+ if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ /* Already done */
+ } else if (dir_valid) {
drop_nlink(&vnode->vfs_inode);
if (vnode->vfs_inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -1260,13 +1276,13 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
static int afs_unlink(struct inode *dir, struct dentry *dentry)
{
struct afs_fs_cursor fc;
- struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+ struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
unsigned long d_version = (unsigned long)dentry->d_fsdata;
u64 data_version = dvnode->status.data_version;
int ret;
- _enter("{%x:%u},{%pd}",
+ _enter("{%llx:%llu},{%pd}",
dvnode->fid.vid, dvnode->fid.vnode, dentry);
if (dentry->d_name.len >= AFSNAMEMAX)
@@ -1290,7 +1306,18 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
if (afs_begin_vnode_operation(&fc, dvnode, key)) {
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_remove(&fc, dentry->d_name.name, false,
+
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+ !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+ yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+ data_version);
+ if (fc.ac.error != -ECONNABORTED ||
+ fc.ac.abort_code != RXGEN_OPCODE)
+ continue;
+ set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+ }
+
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
data_version);
}
@@ -1330,7 +1357,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
mode |= S_IFREG;
- _enter("{%x:%u},{%pd},%ho,",
+ _enter("{%llx:%llu},{%pd},%ho,",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
ret = -ENAMETOOLONG;
@@ -1393,7 +1420,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
dvnode = AFS_FS_I(dir);
data_version = dvnode->status.data_version;
- _enter("{%x:%u},{%x:%u},{%pd}",
+ _enter("{%llx:%llu},{%llx:%llu},{%pd}",
vnode->fid.vid, vnode->fid.vnode,
dvnode->fid.vid, dvnode->fid.vnode,
dentry);
@@ -1464,7 +1491,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
u64 data_version = dvnode->status.data_version;
int ret;
- _enter("{%x:%u},{%pd},%s",
+ _enter("{%llx:%llu},{%pd},%s",
dvnode->fid.vid, dvnode->fid.vnode, dentry,
content);
@@ -1540,7 +1567,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
orig_data_version = orig_dvnode->status.data_version;
new_data_version = new_dvnode->status.data_version;
- _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}",
+ _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
vnode->fid.vid, vnode->fid.vnode,
new_dvnode->fid.vid, new_dvnode->fid.vnode,
@@ -1607,7 +1634,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
{
struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
- _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
+ _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
set_page_private(page, 0);
ClearPagePrivate(page);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index f29c6da..a9ba81d 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
return 0;
}
- ret = dns_query("afsdb", name, len, "", NULL, NULL);
+ ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
if (ret == -ENODATA)
ret = -EDESTADDRREQ;
return ret;
@@ -62,7 +62,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
struct inode *inode;
int ret = -ENOENT;
- _enter("%p{%pd}, {%x:%u}",
+ _enter("%p{%pd}, {%llx:%llu}",
dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7d4f261..d6bc3f5 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -121,7 +121,7 @@ int afs_open(struct inode *inode, struct file *file)
struct key *key;
int ret;
- _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
@@ -170,7 +170,7 @@ int afs_release(struct inode *inode, struct file *file)
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_file *af = file->private_data;
- _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
if ((file->f_mode & FMODE_WRITE))
return vfs_fsync(file, 0);
@@ -228,7 +228,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
struct afs_fs_cursor fc;
int ret;
- _enter("%s{%x:%u.%u},%x,,,",
+ _enter("%s{%llx:%llu.%u},%x,,,",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -634,7 +634,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
unsigned long priv;
- _enter("{{%x:%u}[%lu],%lx},%x",
+ _enter("{{%llx:%llu}[%lu],%lx},%x",
vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
gfp_flags);
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index dc62d15..0568fd9 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -29,7 +29,7 @@ static const struct file_lock_operations afs_lock_ops = {
*/
void afs_lock_may_be_available(struct afs_vnode *vnode)
{
- _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
}
@@ -76,7 +76,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
struct afs_fs_cursor fc;
int ret;
- _enter("%s{%x:%u.%u},%x,%u",
+ _enter("%s{%llx:%llu.%u},%x,%u",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -107,7 +107,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
struct afs_fs_cursor fc;
int ret;
- _enter("%s{%x:%u.%u},%x",
+ _enter("%s{%llx:%llu.%u},%x",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -138,7 +138,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
struct afs_fs_cursor fc;
int ret;
- _enter("%s{%x:%u.%u},%x",
+ _enter("%s{%llx:%llu.%u},%x",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -175,7 +175,7 @@ void afs_lock_work(struct work_struct *work)
struct key *key;
int ret;
- _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
spin_lock(&vnode->lock);
@@ -192,7 +192,7 @@ void afs_lock_work(struct work_struct *work)
ret = afs_release_lock(vnode, vnode->lock_key);
if (ret < 0)
printk(KERN_WARNING "AFS:"
- " Failed to release lock on {%x:%x} error %d\n",
+ " Failed to release lock on {%llx:%llx} error %d\n",
vnode->fid.vid, vnode->fid.vnode, ret);
spin_lock(&vnode->lock);
@@ -229,7 +229,7 @@ void afs_lock_work(struct work_struct *work)
key_put(key);
if (ret < 0)
- pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
+ pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
vnode->fid.vid, vnode->fid.vnode, ret);
spin_lock(&vnode->lock);
@@ -430,7 +430,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
struct key *key = afs_file_key(file);
int ret;
- _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+ _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
/* only whole-file locks are supported */
if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
@@ -582,7 +582,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
int ret;
- _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+ _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
/* Flush all pending writes before doing anything with locks. */
vfs_fsync(file, 0);
@@ -639,7 +639,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
{
struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
- _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
+ _enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
vnode->fid.vid, vnode->fid.vnode, cmd,
fl->fl_type, fl->fl_flags,
(long long) fl->fl_start, (long long) fl->fl_end);
@@ -662,7 +662,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
{
struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
- _enter("{%x:%u},%d,{t=%x,fl=%x}",
+ _enter("{%llx:%llu},%d,{t=%x,fl=%x}",
vnode->fid.vid, vnode->fid.vnode, cmd,
fl->fl_type, fl->fl_flags);
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644
index 0000000..d049cb4
--- /dev/null
+++ b/fs/afs/fs_probe.c
@@ -0,0 +1,270 @@
+/* AFS fileserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_fs_probe_done(struct afs_server *server)
+{
+ if (!atomic_dec_and_test(&server->probe_outstanding))
+ return false;
+
+ wake_up_var(&server->probe_outstanding);
+ clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
+ wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
+ return true;
+}
+
+/*
+ * Process the result of probing a fileserver. This is called after successful
+ * or failed delivery of an FS.GetCapabilities operation.
+ */
+void afs_fileserver_probe_result(struct afs_call *call)
+{
+ struct afs_addr_list *alist = call->alist;
+ struct afs_server *server = call->reply[0];
+ unsigned int server_index = (long)call->reply[1];
+ unsigned int index = call->addr_ix;
+ unsigned int rtt = UINT_MAX;
+ bool have_result = false;
+ u64 _rtt;
+ int ret = call->error;
+
+ _enter("%pU,%u", &server->uuid, index);
+
+ spin_lock(&server->probe_lock);
+
+ switch (ret) {
+ case 0:
+ server->probe.error = 0;
+ goto responded;
+ case -ECONNABORTED:
+ if (!server->probe.responded) {
+ server->probe.abort_code = call->abort_code;
+ server->probe.error = ret;
+ }
+ goto responded;
+ case -ENOMEM:
+ case -ENONET:
+ server->probe.local_failure = true;
+ afs_io_error(call, afs_io_error_fs_probe_fail);
+ goto out;
+ case -ECONNRESET: /* Responded, but call expired. */
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ default:
+ clear_bit(index, &alist->responded);
+ set_bit(index, &alist->failed);
+ if (!server->probe.responded &&
+ (server->probe.error == 0 ||
+ server->probe.error == -ETIMEDOUT ||
+ server->probe.error == -ETIME))
+ server->probe.error = ret;
+ afs_io_error(call, afs_io_error_fs_probe_fail);
+ goto out;
+ }
+
+responded:
+ set_bit(index, &alist->responded);
+ clear_bit(index, &alist->failed);
+
+ if (call->service_id == YFS_FS_SERVICE) {
+ server->probe.is_yfs = true;
+ set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ } else {
+ server->probe.not_yfs = true;
+ if (!server->probe.is_yfs) {
+ clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ }
+ }
+
+ /* Get the RTT and scale it to fit into a 32-bit value that represents
+ * over a minute of time so that we can access it with one instruction
+ * on a 32-bit system.
+ */
+ _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+ _rtt /= 64;
+ rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+ if (rtt < server->probe.rtt) {
+ server->probe.rtt = rtt;
+ alist->preferred = index;
+ have_result = true;
+ }
+
+ smp_wmb(); /* Set rtt before responded. */
+ server->probe.responded = true;
+ set_bit(AFS_SERVER_FL_PROBED, &server->flags);
+out:
+ spin_unlock(&server->probe_lock);
+
+ _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+ server_index, index, &alist->addrs[index].transport,
+ (unsigned int)rtt, ret);
+
+ have_result |= afs_fs_probe_done(server);
+ if (have_result) {
+ server->probe.have_result = true;
+ wake_up_var(&server->probe.have_result);
+ wake_up_all(&server->probe_wq);
+ }
+}
+
+/*
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_fileserver(struct afs_net *net,
+ struct afs_server *server,
+ struct key *key,
+ unsigned int server_index)
+{
+ struct afs_addr_cursor ac = {
+ .index = 0,
+ };
+ int ret;
+
+ _enter("%pU", &server->uuid);
+
+ read_lock(&server->fs_lock);
+ ac.alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->fs_lock));
+ read_unlock(&server->fs_lock);
+
+ atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+ memset(&server->probe, 0, sizeof(server->probe));
+ server->probe.rtt = UINT_MAX;
+
+ for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+ ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+ true);
+ if (ret != -EINPROGRESS) {
+ afs_fs_probe_done(server);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_probe_fileservers(struct afs_net *net, struct key *key,
+ struct afs_server_list *list)
+{
+ struct afs_server *server;
+ int i, ret;
+
+ for (i = 0; i < list->nr_servers; i++) {
+ server = list->servers[i].server;
+ if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
+ continue;
+
+ if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
+ ret = afs_do_probe_fileserver(net, server, key, i);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Wait for the first as-yet untried fileserver to respond.
+ */
+int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+{
+ struct wait_queue_entry *waits;
+ struct afs_server *server;
+ unsigned int rtt = UINT_MAX;
+ bool have_responders = false;
+ int pref = -1, i;
+
+ _enter("%u,%lx", slist->nr_servers, untried);
+
+ /* Only wait for servers that have a probe outstanding. */
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+ __clear_bit(i, &untried);
+ if (server->probe.responded)
+ have_responders = true;
+ }
+ }
+ if (have_responders || !untried)
+ return 0;
+
+ waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+ if (!waits)
+ return -ENOMEM;
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ init_waitqueue_entry(&waits[i], current);
+ add_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ for (;;) {
+ bool still_probing = false;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (server->probe.responded)
+ goto stop;
+ if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+ still_probing = true;
+ }
+ }
+
+ if (!still_probing || unlikely(signal_pending(current)))
+ goto stop;
+ schedule();
+ }
+
+stop:
+ set_current_state(TASK_RUNNING);
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (server->probe.responded &&
+ server->probe.rtt < rtt) {
+ pref = i;
+ rtt = server->probe.rtt;
+ }
+
+ remove_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ kfree(waits);
+
+ if (pref == -1 && signal_pending(current))
+ return -ERESTARTSYS;
+
+ if (pref >= 0)
+ slist->preferred = pref;
+ return 0;
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 50929cb..ca08c83 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -17,15 +17,10 @@
#include "internal.h"
#include "afs_fs.h"
#include "xdr_fs.h"
+#include "protocol_yfs.h"
static const struct afs_fid afs_zero_fid;
-/*
- * We need somewhere to discard into in case the server helpfully returns more
- * than we asked for in FS.FetchData{,64}.
- */
-static u8 afs_discard_buffer[64];
-
static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
{
call->cbi = afs_get_cb_interest(cbi);
@@ -75,8 +70,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
struct timespec64 t;
umode_t mode;
- t.tv_sec = status->mtime_client;
- t.tv_nsec = 0;
+ t = status->mtime_client;
vnode->vfs_inode.i_ctime = t;
vnode->vfs_inode.i_mtime = t;
vnode->vfs_inode.i_atime = t;
@@ -96,7 +90,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
if (!(flags & AFS_VNODE_NOT_YET_SET)) {
if (expected_version &&
*expected_version != status->data_version) {
- _debug("vnode modified %llx on {%x:%u} [exp %llx]",
+ _debug("vnode modified %llx on {%llx:%llu} [exp %llx]",
(unsigned long long) status->data_version,
vnode->fid.vid, vnode->fid.vnode,
(unsigned long long) *expected_version);
@@ -170,7 +164,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
if (type != status->type &&
vnode &&
!test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
- pr_warning("Vnode %x:%x:%x changed type %u to %u\n",
+ pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
@@ -200,8 +194,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
EXTRACT_M(mode);
EXTRACT_M(group);
- status->mtime_client = ntohl(xdr->mtime_client);
- status->mtime_server = ntohl(xdr->mtime_server);
+ status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
+ status->mtime_client.tv_nsec = 0;
+ status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
+ status->mtime_server.tv_nsec = 0;
status->lock_count = ntohl(xdr->lock_count);
size = (u64)ntohl(xdr->size_lo);
@@ -233,7 +229,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
bad:
xdr_dump_bad(*_bp);
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
}
/*
@@ -273,7 +269,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
write_seqlock(&vnode->cb_lock);
- if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
+ if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
vnode->cb_version = ntohl(*bp++);
cb_expiry = ntohl(*bp++);
vnode->cb_type = ntohl(*bp++);
@@ -293,13 +289,19 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
*_bp = bp;
}
-static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
+static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
+{
+ return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC);
+}
+
+static void xdr_decode_AFSCallBack_raw(struct afs_call *call,
+ const __be32 **_bp,
struct afs_callback *cb)
{
const __be32 *bp = *_bp;
cb->version = ntohl(*bp++);
- cb->expiry = ntohl(*bp++);
+ cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++));
cb->type = ntohl(*bp++);
*_bp = bp;
}
@@ -311,14 +313,18 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
struct afs_volsync *volsync)
{
const __be32 *bp = *_bp;
+ u32 creation;
- volsync->creation = ntohl(*bp++);
+ creation = ntohl(*bp++);
bp++; /* spare2 */
bp++; /* spare3 */
bp++; /* spare4 */
bp++; /* spare5 */
bp++; /* spare6 */
*_bp = bp;
+
+ if (volsync)
+ volsync->creation = creation;
}
/*
@@ -379,6 +385,8 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
vs->blocks_in_use = ntohl(*bp++);
vs->part_blocks_avail = ntohl(*bp++);
vs->part_max_blocks = ntohl(*bp++);
+ vs->vol_copy_date = 0;
+ vs->vol_backup_date = 0;
*_bp = bp;
}
@@ -395,16 +403,16 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
if (ret < 0)
return ret;
- _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- if (afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
xdr_decode_AFSCallBack(call, vnode, &bp);
- if (call->reply[1])
- xdr_decode_AFSVolSync(&bp, call->reply[1]);
+ xdr_decode_AFSVolSync(&bp, call->reply[1]);
_leave(" = 0 [done]");
return 0;
@@ -431,7 +439,10 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- _enter(",%x,{%x:%u},,",
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_fetch_file_status(fc, volsync, new_inode);
+
+ _enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode,
@@ -445,6 +456,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
call->reply[0] = vnode;
call->reply[1] = volsync;
call->expected_version = new_inode ? 1 : vnode->status.data_version;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -468,139 +480,117 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
struct afs_read *req = call->reply[2];
const __be32 *bp;
unsigned int size;
- void *buffer;
int ret;
- _enter("{%u,%zu/%u;%llu/%llu}",
- call->unmarshall, call->offset, call->count,
- req->remain, req->actual_len);
+ _enter("{%u,%zu/%llu}",
+ call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
switch (call->unmarshall) {
case 0:
req->actual_len = 0;
- call->offset = 0;
+ req->index = 0;
+ req->offset = req->pos & (PAGE_SIZE - 1);
call->unmarshall++;
- if (call->operation_ID != FSFETCHDATA64) {
- call->unmarshall++;
- goto no_msw;
+ if (call->operation_ID == FSFETCHDATA64) {
+ afs_extract_to_tmp64(call);
+ } else {
+ call->tmp_u = htonl(0);
+ afs_extract_to_tmp(call);
}
- /* extract the upper part of the returned data length of an
- * FSFETCHDATA64 op (which should always be 0 using this
- * client) */
- case 1:
- _debug("extract data length (MSW)");
- ret = afs_extract_data(call, &call->tmp, 4, true);
- if (ret < 0)
- return ret;
-
- req->actual_len = ntohl(call->tmp);
- req->actual_len <<= 32;
- call->offset = 0;
- call->unmarshall++;
-
- no_msw:
/* extract the returned data length */
- case 2:
+ case 1:
_debug("extract data length");
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
- req->actual_len |= ntohl(call->tmp);
+ req->actual_len = be64_to_cpu(call->tmp64);
_debug("DATA length: %llu", req->actual_len);
-
- req->remain = req->actual_len;
- call->offset = req->pos & (PAGE_SIZE - 1);
- req->index = 0;
- if (req->actual_len == 0)
+ req->remain = min(req->len, req->actual_len);
+ if (req->remain == 0)
goto no_more_data;
+
call->unmarshall++;
begin_page:
ASSERTCMP(req->index, <, req->nr_pages);
- if (req->remain > PAGE_SIZE - call->offset)
- size = PAGE_SIZE - call->offset;
+ if (req->remain > PAGE_SIZE - req->offset)
+ size = PAGE_SIZE - req->offset;
else
size = req->remain;
- call->count = call->offset + size;
- ASSERTCMP(call->count, <=, PAGE_SIZE);
- req->remain -= size;
+ call->bvec[0].bv_len = size;
+ call->bvec[0].bv_offset = req->offset;
+ call->bvec[0].bv_page = req->pages[req->index];
+ iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
+ ASSERTCMP(size, <=, PAGE_SIZE);
/* extract the returned data */
- case 3:
- _debug("extract data %llu/%llu %zu/%u",
- req->remain, req->actual_len, call->offset, call->count);
+ case 2:
+ _debug("extract data %zu/%llu",
+ iov_iter_count(&call->iter), req->remain);
- buffer = kmap(req->pages[req->index]);
- ret = afs_extract_data(call, buffer, call->count, true);
- kunmap(req->pages[req->index]);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
- if (call->offset == PAGE_SIZE) {
+ req->remain -= call->bvec[0].bv_len;
+ req->offset += call->bvec[0].bv_len;
+ ASSERTCMP(req->offset, <=, PAGE_SIZE);
+ if (req->offset == PAGE_SIZE) {
+ req->offset = 0;
if (req->page_done)
req->page_done(call, req);
req->index++;
- if (req->remain > 0) {
- call->offset = 0;
- if (req->index >= req->nr_pages) {
- call->unmarshall = 4;
- goto begin_discard;
- }
+ if (req->remain > 0)
goto begin_page;
- }
}
- goto no_more_data;
+
+ ASSERTCMP(req->remain, ==, 0);
+ if (req->actual_len <= req->len)
+ goto no_more_data;
/* Discard any excess data the server gave us */
- begin_discard:
- case 4:
- size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
- call->count = size;
- _debug("extract discard %llu/%llu %zu/%u",
- req->remain, req->actual_len, call->offset, call->count);
+ iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
+ call->unmarshall = 3;
+ case 3:
+ _debug("extract discard %zu/%llu",
+ iov_iter_count(&call->iter), req->actual_len - req->len);
- call->offset = 0;
- ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
- req->remain -= call->offset;
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
- if (req->remain > 0)
- goto begin_discard;
no_more_data:
- call->offset = 0;
- call->unmarshall = 5;
+ call->unmarshall = 4;
+ afs_extract_to_buf(call, (21 + 3 + 6) * 4);
/* extract the metadata */
- case 5:
- ret = afs_extract_data(call, call->buffer,
- (21 + 3 + 6) * 4, false);
+ case 4:
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
bp = call->buffer;
- if (afs_decode_status(call, &bp, &vnode->status, vnode,
- &vnode->status.data_version, req) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+ &vnode->status.data_version, req);
+ if (ret < 0)
+ return ret;
xdr_decode_AFSCallBack(call, vnode, &bp);
- if (call->reply[1])
- xdr_decode_AFSVolSync(&bp, call->reply[1]);
+ xdr_decode_AFSVolSync(&bp, call->reply[1]);
- call->offset = 0;
call->unmarshall++;
- case 6:
+ case 5:
break;
}
for (; req->index < req->nr_pages; req->index++) {
- if (call->count < PAGE_SIZE)
+ if (req->offset < PAGE_SIZE)
zero_user_segment(req->pages[req->index],
- call->count, PAGE_SIZE);
+ req->offset, PAGE_SIZE);
if (req->page_done)
req->page_done(call, req);
- call->count = 0;
+ req->offset = 0;
}
_leave(" = 0 [done]");
@@ -653,6 +643,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
call->reply[1] = NULL; /* volsync */
call->reply[2] = req;
call->expected_version = vnode->status.data_version;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -682,6 +673,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_fetch_data(fc, req);
+
if (upper_32_bits(req->pos) ||
upper_32_bits(req->len) ||
upper_32_bits(req->pos + req->len))
@@ -698,6 +692,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
call->reply[1] = NULL; /* volsync */
call->reply[2] = req;
call->expected_version = vnode->status.data_version;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -733,11 +728,14 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFid(&bp, call->reply[1]);
- if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 ||
- afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
- xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
+ ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]);
/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
@@ -778,6 +776,15 @@ int afs_fs_create(struct afs_fs_cursor *fc,
size_t namesz, reqsz, padsz;
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){
+ if (S_ISDIR(mode))
+ return yfs_fs_make_dir(fc, name, mode, current_data_version,
+ newfid, newstatus, newcb);
+ else
+ return yfs_fs_create_file(fc, name, mode, current_data_version,
+ newfid, newstatus, newcb);
+ }
+
_enter("");
namesz = strlen(name);
@@ -796,6 +803,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
call->reply[2] = newstatus;
call->reply[3] = newcb;
call->expected_version = current_data_version + 1;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -839,9 +847,10 @@ static int afs_deliver_fs_remove(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- if (afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
@@ -868,15 +877,18 @@ static const struct afs_call_type afs_RXFSRemoveDir = {
/*
* remove a file or directory
*/
-int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
- u64 current_data_version)
+int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+ const char *name, bool isdir, u64 current_data_version)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, padsz;
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_remove(fc, vnode, name, isdir, current_data_version);
+
_enter("");
namesz = strlen(name);
@@ -890,15 +902,16 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
+ call->reply[0] = dvnode;
+ call->reply[1] = vnode;
call->expected_version = current_data_version + 1;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
- *bp++ = htonl(vnode->fid.vid);
- *bp++ = htonl(vnode->fid.vnode);
- *bp++ = htonl(vnode->fid.unique);
+ *bp++ = htonl(dvnode->fid.vid);
+ *bp++ = htonl(dvnode->fid.vnode);
+ *bp++ = htonl(dvnode->fid.unique);
*bp++ = htonl(namesz);
memcpy(bp, name, namesz);
bp = (void *) bp + namesz;
@@ -908,7 +921,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
}
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call(call, &vnode->fid);
+ trace_afs_make_fs_call(call, &dvnode->fid);
return afs_make_call(&fc->ac, call, GFP_NOFS, false);
}
@@ -929,10 +942,13 @@ static int afs_deliver_fs_link(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 ||
- afs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ ret = afs_decode_status(call, &bp, &dvnode->status, dvnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
@@ -961,6 +977,9 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
size_t namesz, reqsz, padsz;
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_link(fc, vnode, name, current_data_version);
+
_enter("");
namesz = strlen(name);
@@ -1016,10 +1035,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFid(&bp, call->reply[1]);
- if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) ||
- afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
@@ -1052,6 +1074,10 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
size_t namesz, reqsz, padsz, c_namesz, c_padsz;
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_symlink(fc, name, contents, current_data_version,
+ newfid, newstatus);
+
_enter("");
namesz = strlen(name);
@@ -1122,13 +1148,16 @@ static int afs_deliver_fs_rename(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
- if (new_dvnode != orig_dvnode &&
- afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
- &call->expected_version_2, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ if (new_dvnode != orig_dvnode) {
+ ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
+ &call->expected_version_2, NULL);
+ if (ret < 0)
+ return ret;
+ }
/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
@@ -1161,6 +1190,12 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_rename(fc, orig_name,
+ new_dvnode, new_name,
+ current_orig_data_version,
+ current_new_data_version);
+
_enter("");
o_namesz = strlen(orig_name);
@@ -1231,9 +1266,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- if (afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
afs_pages_written_back(vnode, call);
@@ -1273,7 +1309,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- _enter(",%x,{%x:%u},,",
+ _enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
@@ -1330,7 +1366,10 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
loff_t size, pos, i_size;
__be32 *bp;
- _enter(",%x,{%x:%u},,",
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_store_data(fc, mapping, first, last, offset, to);
+
+ _enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
size = (loff_t)to - (loff_t)offset;
@@ -1407,9 +1446,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- if (afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
_leave(" = 0 [done]");
@@ -1451,7 +1491,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- _enter(",%x,{%x:%u},,",
+ _enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1498,7 +1538,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- _enter(",%x,{%x:%u},,",
+ _enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1544,10 +1584,13 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_setattr(fc, attr);
+
if (attr->ia_valid & ATTR_SIZE)
return afs_fs_setattr_size(fc, attr);
- _enter(",%x,{%x:%u},,",
+ _enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
@@ -1581,164 +1624,114 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
{
const __be32 *bp;
char *p;
+ u32 size;
int ret;
_enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
- call->offset = 0;
call->unmarshall++;
+ afs_extract_to_buf(call, 12 * 4);
/* extract the returned status record */
case 1:
_debug("extract status");
- ret = afs_extract_data(call, call->buffer,
- 12 * 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
bp = call->buffer;
xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
- call->offset = 0;
call->unmarshall++;
+ afs_extract_to_tmp(call);
/* extract the volume name length */
case 2:
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
call->count = ntohl(call->tmp);
_debug("volname length: %u", call->count);
if (call->count >= AFSNAMEMAX)
- return afs_protocol_error(call, -EBADMSG);
- call->offset = 0;
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_volname_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_begin(call, call->reply[2], size);
call->unmarshall++;
/* extract the volume name */
case 3:
_debug("extract volname");
- if (call->count > 0) {
- ret = afs_extract_data(call, call->reply[2],
- call->count, true);
- if (ret < 0)
- return ret;
- }
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
p = call->reply[2];
p[call->count] = 0;
_debug("volname '%s'", p);
-
- call->offset = 0;
+ afs_extract_to_tmp(call);
call->unmarshall++;
- /* extract the volume name padding */
- if ((call->count & 3) == 0) {
- call->unmarshall++;
- goto no_volname_padding;
- }
- call->count = 4 - (call->count & 3);
-
- case 4:
- ret = afs_extract_data(call, call->buffer,
- call->count, true);
- if (ret < 0)
- return ret;
-
- call->offset = 0;
- call->unmarshall++;
- no_volname_padding:
-
/* extract the offline message length */
- case 5:
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ case 4:
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
call->count = ntohl(call->tmp);
_debug("offline msg length: %u", call->count);
if (call->count >= AFSNAMEMAX)
- return afs_protocol_error(call, -EBADMSG);
- call->offset = 0;
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_offline_msg_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_begin(call, call->reply[2], size);
call->unmarshall++;
/* extract the offline message */
- case 6:
+ case 5:
_debug("extract offline");
- if (call->count > 0) {
- ret = afs_extract_data(call, call->reply[2],
- call->count, true);
- if (ret < 0)
- return ret;
- }
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
p = call->reply[2];
p[call->count] = 0;
_debug("offline '%s'", p);
- call->offset = 0;
+ afs_extract_to_tmp(call);
call->unmarshall++;
- /* extract the offline message padding */
- if ((call->count & 3) == 0) {
- call->unmarshall++;
- goto no_offline_padding;
- }
- call->count = 4 - (call->count & 3);
-
- case 7:
- ret = afs_extract_data(call, call->buffer,
- call->count, true);
- if (ret < 0)
- return ret;
-
- call->offset = 0;
- call->unmarshall++;
- no_offline_padding:
-
/* extract the message of the day length */
- case 8:
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ case 6:
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
call->count = ntohl(call->tmp);
_debug("motd length: %u", call->count);
if (call->count >= AFSNAMEMAX)
- return afs_protocol_error(call, -EBADMSG);
- call->offset = 0;
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_motd_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_begin(call, call->reply[2], size);
call->unmarshall++;
/* extract the message of the day */
- case 9:
+ case 7:
_debug("extract motd");
- if (call->count > 0) {
- ret = afs_extract_data(call, call->reply[2],
- call->count, true);
- if (ret < 0)
- return ret;
- }
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
p = call->reply[2];
p[call->count] = 0;
_debug("motd '%s'", p);
- call->offset = 0;
call->unmarshall++;
- /* extract the message of the day padding */
- call->count = (4 - (call->count & 3)) & 3;
-
- case 10:
- ret = afs_extract_data(call, call->buffer,
- call->count, false);
- if (ret < 0)
- return ret;
-
- call->offset = 0;
- call->unmarshall++;
- case 11:
+ case 8:
break;
}
@@ -1778,6 +1771,9 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
__be32 *bp;
void *tmpbuf;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_get_volume_status(fc, vs);
+
_enter("");
tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
@@ -1867,6 +1863,9 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_set_lock(fc, type);
+
_enter("");
call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
@@ -1899,6 +1898,9 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_extend_lock(fc);
+
_enter("");
call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
@@ -1930,6 +1932,9 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_release_lock(fc);
+
_enter("");
call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
@@ -2004,19 +2009,16 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
u32 count;
int ret;
- _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+ _enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter));
-again:
switch (call->unmarshall) {
case 0:
- call->offset = 0;
+ afs_extract_to_tmp(call);
call->unmarshall++;
/* Extract the capabilities word count */
case 1:
- ret = afs_extract_data(call, &call->tmp,
- 1 * sizeof(__be32),
- true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
@@ -2024,24 +2026,17 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
call->count = count;
call->count2 = count;
- call->offset = 0;
+ iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
call->unmarshall++;
/* Extract capabilities words */
case 2:
- count = min(call->count, 16U);
- ret = afs_extract_data(call, call->buffer,
- count * sizeof(__be32),
- call->count > 16);
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
/* TODO: Examine capabilities */
- call->count -= count;
- if (call->count > 0)
- goto again;
- call->offset = 0;
call->unmarshall++;
break;
}
@@ -2050,6 +2045,14 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
return 0;
}
+static void afs_destroy_fs_get_capabilities(struct afs_call *call)
+{
+ struct afs_server *server = call->reply[0];
+
+ afs_put_server(call->net, server);
+ afs_flat_call_destructor(call);
+}
+
/*
* FS.GetCapabilities operation type
*/
@@ -2057,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
.name = "FS.GetCapabilities",
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
- .destructor = afs_flat_call_destructor,
+ .done = afs_fileserver_probe_result,
+ .destructor = afs_destroy_fs_get_capabilities,
};
/*
@@ -2067,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
int afs_fs_get_capabilities(struct afs_net *net,
struct afs_server *server,
struct afs_addr_cursor *ac,
- struct key *key)
+ struct key *key,
+ unsigned int server_index,
+ bool async)
{
struct afs_call *call;
__be32 *bp;
@@ -2079,6 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net,
return -ENOMEM;
call->key = key;
+ call->reply[0] = afs_get_server(server);
+ call->reply[1] = (void *)(long)server_index;
+ call->upgrade = true;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -2086,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net,
/* Can't take a ref on server */
trace_afs_make_fs_call(call, NULL);
- return afs_make_call(ac, call, GFP_NOFS, false);
+ return afs_make_call(ac, call, GFP_NOFS, async);
}
/*
@@ -2097,7 +2107,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
struct afs_file_status *status = call->reply[1];
struct afs_callback *callback = call->reply[2];
struct afs_volsync *volsync = call->reply[3];
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_fid *fid = call->reply[0];
const __be32 *bp;
int ret;
@@ -2105,21 +2115,16 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
if (ret < 0)
return ret;
- _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%llx:%llu}", fid->vid, fid->vnode);
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- afs_decode_status(call, &bp, status, vnode,
- &call->expected_version, NULL);
- callback[call->count].version = ntohl(bp[0]);
- callback[call->count].expiry = ntohl(bp[1]);
- callback[call->count].type = ntohl(bp[2]);
- if (vnode)
- xdr_decode_AFSCallBack(call, vnode, &bp);
- else
- bp += 3;
- if (volsync)
- xdr_decode_AFSVolSync(&bp, volsync);
+ ret = afs_decode_status(call, &bp, status, NULL,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_AFSCallBack_raw(call, &bp, callback);
+ xdr_decode_AFSVolSync(&bp, volsync);
_leave(" = 0 [done]");
return 0;
@@ -2148,7 +2153,10 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
struct afs_call *call;
__be32 *bp;
- _enter(",%x,{%x:%u},,",
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync);
+
+ _enter(",%x,{%llx:%llu},,",
key_serial(fc->key), fid->vid, fid->vnode);
call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
@@ -2158,11 +2166,12 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
}
call->key = fc->key;
- call->reply[0] = NULL; /* vnode for fid[0] */
+ call->reply[0] = fid;
call->reply[1] = status;
call->reply[2] = callback;
call->reply[3] = volsync;
call->expected_version = 1; /* vnode->status.data_version */
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -2193,38 +2202,40 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
switch (call->unmarshall) {
case 0:
- call->offset = 0;
+ afs_extract_to_tmp(call);
call->unmarshall++;
/* Extract the file status count and array in two steps */
case 1:
_debug("extract status count");
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
tmp = ntohl(call->tmp);
_debug("status count: %u/%u", tmp, call->count2);
if (tmp != call->count2)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_ibulkst_count);
call->count = 0;
call->unmarshall++;
more_counts:
- call->offset = 0;
+ afs_extract_to_buf(call, 21 * sizeof(__be32));
case 2:
_debug("extract status array %u", call->count);
- ret = afs_extract_data(call, call->buffer, 21 * 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
bp = call->buffer;
statuses = call->reply[1];
- if (afs_decode_status(call, &bp, &statuses[call->count],
- call->count == 0 ? vnode : NULL,
- NULL, NULL) < 0)
- return afs_protocol_error(call, -EBADMSG);
+ ret = afs_decode_status(call, &bp, &statuses[call->count],
+ call->count == 0 ? vnode : NULL,
+ NULL, NULL);
+ if (ret < 0)
+ return ret;
call->count++;
if (call->count < call->count2)
@@ -2232,27 +2243,28 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
call->count = 0;
call->unmarshall++;
- call->offset = 0;
+ afs_extract_to_tmp(call);
/* Extract the callback count and array in two steps */
case 3:
_debug("extract CB count");
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
tmp = ntohl(call->tmp);
_debug("CB count: %u", tmp);
if (tmp != call->count2)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_ibulkst_cb_count);
call->count = 0;
call->unmarshall++;
more_cbs:
- call->offset = 0;
+ afs_extract_to_buf(call, 3 * sizeof(__be32));
case 4:
_debug("extract CB array");
- ret = afs_extract_data(call, call->buffer, 3 * 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
@@ -2260,7 +2272,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
bp = call->buffer;
callbacks = call->reply[2];
callbacks[call->count].version = ntohl(bp[0]);
- callbacks[call->count].expiry = ntohl(bp[1]);
+ callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1]));
callbacks[call->count].type = ntohl(bp[2]);
statuses = call->reply[1];
if (call->count == 0 && vnode && statuses[0].abort_code == 0)
@@ -2269,19 +2281,17 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
if (call->count < call->count2)
goto more_cbs;
- call->offset = 0;
+ afs_extract_to_buf(call, 6 * sizeof(__be32));
call->unmarshall++;
case 5:
- ret = afs_extract_data(call, call->buffer, 6 * 4, false);
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
bp = call->buffer;
- if (call->reply[3])
- xdr_decode_AFSVolSync(&bp, call->reply[3]);
+ xdr_decode_AFSVolSync(&bp, call->reply[3]);
- call->offset = 0;
call->unmarshall++;
case 6:
@@ -2317,7 +2327,11 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
__be32 *bp;
int i;
- _enter(",%x,{%x:%u},%u",
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+ return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks,
+ nr_fids, volsync);
+
+ _enter(",%x,{%llx:%llu},%u",
key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus,
@@ -2334,6 +2348,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
call->reply[2] = callbacks;
call->reply[3] = volsync;
call->count2 = nr_fids;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 479b7fd..4c6d8e1 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -82,7 +82,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
default:
printk("kAFS: AFS vnode with undefined type\n");
read_sequnlock_excl(&vnode->cb_lock);
- return afs_protocol_error(NULL, -EBADMSG);
+ return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
}
inode->i_blocks = 0;
@@ -100,7 +100,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
struct afs_fs_cursor fc;
int ret;
- _enter("%s,{%x:%u.%u,S=%lx}",
+ _enter("%s,{%llx:%llu.%u,S=%lx}",
vnode->volume->name,
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
vnode->flags);
@@ -127,9 +127,9 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
int afs_iget5_test(struct inode *inode, void *opaque)
{
struct afs_iget_data *data = opaque;
+ struct afs_vnode *vnode = AFS_FS_I(inode);
- return inode->i_ino == data->fid.vnode &&
- inode->i_generation == data->fid.unique;
+ return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0;
}
/*
@@ -150,11 +150,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
struct afs_iget_data *data = opaque;
struct afs_vnode *vnode = AFS_FS_I(inode);
- inode->i_ino = data->fid.vnode;
- inode->i_generation = data->fid.unique;
vnode->fid = data->fid;
vnode->volume = data->volume;
+ /* YFS supports 96-bit vnode IDs, but Linux only supports
+ * 64-bit inode numbers.
+ */
+ inode->i_ino = data->fid.vnode;
+ inode->i_generation = data->fid.unique;
return 0;
}
@@ -193,7 +196,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
return ERR_PTR(-ENOMEM);
}
- _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }",
+ _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
inode, inode->i_ino, data.fid.vid, data.fid.vnode,
data.fid.unique);
@@ -252,8 +255,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
key.vnode_id = vnode->fid.vnode;
key.unique = vnode->fid.unique;
- key.vnode_id_ext[0] = 0;
- key.vnode_id_ext[1] = 0;
+ key.vnode_id_ext[0] = vnode->fid.vnode >> 32;
+ key.vnode_id_ext[1] = vnode->fid.vnode_hi;
aux.data_version = vnode->status.data_version;
vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
@@ -277,7 +280,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
struct inode *inode;
int ret;
- _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique);
+ _enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique);
as = sb->s_fs_info;
data.volume = as->volume;
@@ -289,7 +292,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
return ERR_PTR(-ENOMEM);
}
- _debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
+ _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
inode, fid->vid, fid->vnode, fid->unique);
vnode = AFS_FS_I(inode);
@@ -314,11 +317,11 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
* didn't give us a callback) */
vnode->cb_version = 0;
vnode->cb_type = 0;
- vnode->cb_expires_at = 0;
+ vnode->cb_expires_at = ktime_get();
} else {
vnode->cb_version = cb->version;
vnode->cb_type = cb->type;
- vnode->cb_expires_at = cb->expiry;
+ vnode->cb_expires_at = cb->expires_at;
vnode->cb_interest = afs_get_cb_interest(cbi);
set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
}
@@ -352,7 +355,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
*/
void afs_zap_data(struct afs_vnode *vnode)
{
- _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
#ifdef CONFIG_AFS_FSCACHE
fscache_invalidate(vnode->cache);
@@ -382,7 +385,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
bool valid = false;
int ret;
- _enter("{v={%x:%u} fl=%lx},%x",
+ _enter("{v={%llx:%llu} fl=%lx},%x",
vnode->fid.vid, vnode->fid.vnode, vnode->flags,
key_serial(key));
@@ -501,7 +504,7 @@ void afs_evict_inode(struct inode *inode)
vnode = AFS_FS_I(inode);
- _enter("{%x:%u.%d}",
+ _enter("{%llx:%llu.%d}",
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique);
@@ -550,7 +553,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
struct key *key;
int ret;
- _enter("{%x:%u},{n=%pd},%x",
+ _enter("{%llx:%llu},{n=%pd},%x",
vnode->fid.vid, vnode->fid.vnode, dentry,
attr->ia_valid);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 72de1f1..5da3b09 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -22,6 +22,7 @@
#include <linux/backing-dev.h>
#include <linux/uuid.h>
#include <linux/mm_types.h>
+#include <linux/dns_resolver.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -75,10 +76,13 @@ struct afs_addr_list {
u32 version; /* Version */
unsigned char max_addrs;
unsigned char nr_addrs;
- unsigned char index; /* Address currently in use */
+ unsigned char preferred; /* Preferred address */
unsigned char nr_ipv4; /* Number of IPv4 addresses */
+ enum dns_record_source source:8;
+ enum dns_lookup_status status:8;
unsigned long probed; /* Mask of servers that have been probed */
- unsigned long yfs; /* Mask of servers that are YFS */
+ unsigned long failed; /* Mask of addrs that failed locally/ICMP */
+ unsigned long responded; /* Mask of addrs that responded */
struct sockaddr_rxrpc addrs[];
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
};
@@ -88,6 +92,7 @@ struct afs_addr_list {
*/
struct afs_call {
const struct afs_call_type *type; /* type of call */
+ struct afs_addr_list *alist; /* Address is alist[addr_ix] */
wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */
@@ -98,16 +103,22 @@ struct afs_call {
struct afs_cb_interest *cbi; /* Callback interest for server used */
void *request; /* request data (first part) */
struct address_space *mapping; /* Pages being written from */
+ struct iov_iter iter; /* Buffer iterator */
+ struct iov_iter *_iter; /* Iterator currently in use */
+ union { /* Convenience for ->iter */
+ struct kvec kvec[1];
+ struct bio_vec bvec[1];
+ };
void *buffer; /* reply receive buffer */
void *reply[4]; /* Where to put the reply */
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
- size_t offset; /* offset into received data store */
atomic_t usage;
enum afs_call_state state;
spinlock_t state_lock;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
+ u32 epoch;
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
@@ -117,19 +128,28 @@ struct afs_call {
unsigned count2; /* count used in unmarshalling */
};
unsigned char unmarshall; /* unmarshalling phase */
+ unsigned char addr_ix; /* Address in ->alist */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
+ bool want_reply_time; /* T if want reply_time */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
- __be32 tmp; /* place to extract temporary data */
+ union { /* place to extract temporary data */
+ struct {
+ __be32 tmp_u;
+ __be32 tmp;
+ } __attribute__((packed));
+ __be64 tmp64;
+ };
afs_dataversion_t expected_version; /* Updated version expected from store */
afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */
+ ktime_t reply_time; /* Time of first reply packet */
};
struct afs_call_type {
@@ -146,6 +166,9 @@ struct afs_call_type {
/* Work function */
void (*work)(struct work_struct *work);
+
+ /* Call done function (gets called immediately on success or failure) */
+ void (*done)(struct afs_call *call);
};
/*
@@ -185,6 +208,7 @@ struct afs_read {
refcount_t usage;
unsigned int index; /* Which page we're reading into */
unsigned int nr_pages;
+ unsigned int offset; /* offset into current page */
void (*page_done)(struct afs_call *, struct afs_read *);
struct page **pages;
struct page *array[];
@@ -343,13 +367,70 @@ struct afs_cell {
rwlock_t proc_lock;
/* VL server list. */
- rwlock_t vl_addrs_lock; /* Lock on vl_addrs */
- struct afs_addr_list __rcu *vl_addrs; /* List of VL servers */
+ rwlock_t vl_servers_lock; /* Lock on vl_servers */
+ struct afs_vlserver_list __rcu *vl_servers;
+
u8 name_len; /* Length of name */
char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */
};
/*
+ * Volume Location server record.
+ */
+struct afs_vlserver {
+ struct rcu_head rcu;
+ struct afs_addr_list __rcu *addresses; /* List of addresses for this VL server */
+ unsigned long flags;
+#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */
+#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */
+#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */
+ rwlock_t lock; /* Lock on addresses */
+ atomic_t usage;
+
+ /* Probe state */
+ wait_queue_head_t probe_wq;
+ atomic_t probe_outstanding;
+ spinlock_t probe_lock;
+ struct {
+ unsigned int rtt; /* RTT as ktime/64 */
+ u32 abort_code;
+ short error;
+ bool have_result;
+ bool responded:1;
+ bool is_yfs:1;
+ bool not_yfs:1;
+ bool local_failure:1;
+ } probe;
+
+ u16 port;
+ u16 name_len; /* Length of name */
+ char name[]; /* Server name, case-flattened */
+};
+
+/*
+ * Weighted list of Volume Location servers.
+ */
+struct afs_vlserver_entry {
+ u16 priority; /* Preference (as SRV) */
+ u16 weight; /* Weight (as SRV) */
+ enum dns_record_source source:8;
+ enum dns_lookup_status status:8;
+ struct afs_vlserver *server;
+};
+
+struct afs_vlserver_list {
+ struct rcu_head rcu;
+ atomic_t usage;
+ u8 nr_servers;
+ u8 index; /* Server currently in use */
+ u8 preferred; /* Preferred server */
+ enum dns_record_source source:8;
+ enum dns_lookup_status status:8;
+ rwlock_t lock;
+ struct afs_vlserver_entry servers[];
+};
+
+/*
* Cached VLDB entry.
*
* This is pointed to by cell->vldb_entries, indexed by name.
@@ -403,8 +484,12 @@ struct afs_server {
#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */
#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */
#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
+#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */
+#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAVE_EPOCH 11 /* ->epoch is valid */
atomic_t usage;
u32 addr_version; /* Address list version */
+ u32 cm_epoch; /* Server RxRPC epoch */
/* file service access */
rwlock_t fs_lock; /* access lock */
@@ -413,6 +498,26 @@ struct afs_server {
struct hlist_head cb_volumes; /* List of volume interests on this server */
unsigned cb_s_break; /* Break-everything counter. */
rwlock_t cb_break_lock; /* Volume finding lock */
+
+ /* Probe state */
+ wait_queue_head_t probe_wq;
+ atomic_t probe_outstanding;
+ spinlock_t probe_lock;
+ struct {
+ unsigned int rtt; /* RTT as ktime/64 */
+ u32 abort_code;
+ u32 cm_epoch;
+ short error;
+ bool have_result;
+ bool responded:1;
+ bool is_yfs:1;
+ bool not_yfs:1;
+ bool local_failure:1;
+ bool no_epoch:1;
+ bool cm_probed:1;
+ bool said_rebooted:1;
+ bool said_inconsistent:1;
+ } probe;
};
/*
@@ -447,8 +552,8 @@ struct afs_server_entry {
struct afs_server_list {
refcount_t usage;
- unsigned short nr_servers;
- unsigned short index; /* Server currently in use */
+ unsigned char nr_servers;
+ unsigned char preferred; /* Preferred server */
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
unsigned int seq; /* Set to ->servers_seq when installed */
rwlock_t lock;
@@ -550,6 +655,15 @@ struct afs_vnode {
afs_callback_type_t cb_type; /* type of callback */
};
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+ return vnode->cache;
+#else
+ return NULL;
+#endif
+}
+
/*
* cached security record for one user's attempt to access a vnode
*/
@@ -586,13 +700,31 @@ struct afs_interface {
*/
struct afs_addr_cursor {
struct afs_addr_list *alist; /* Current address list (pins ref) */
- struct sockaddr_rxrpc *addr;
- u32 abort_code;
- unsigned short start; /* Starting point in alist->addrs[] */
- unsigned short index; /* Wrapping offset from start to current addr */
- short error;
- bool begun; /* T if we've begun iteration */
+ unsigned long tried; /* Tried addresses */
+ signed char index; /* Current address */
bool responded; /* T if the current address responded */
+ unsigned short nr_iterations; /* Number of address iterations */
+ short error;
+ u32 abort_code;
+};
+
+/*
+ * Cursor for iterating over a set of volume location servers.
+ */
+struct afs_vl_cursor {
+ struct afs_addr_cursor ac;
+ struct afs_cell *cell; /* The cell we're querying */
+ struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
+ struct afs_vlserver *server; /* Server on which this resides */
+ struct key *key; /* Key for the server */
+ unsigned long untried; /* Bitmask of untried servers */
+ short index; /* Current server */
+ short error;
+ unsigned short flags;
+#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
+#define AFS_VL_CURSOR_RETRY 0x0002 /* Set to do a retry */
+#define AFS_VL_CURSOR_RETRIED 0x0004 /* Set if started a retry */
+ unsigned short nr_iterations; /* Number of server iterations */
};
/*
@@ -604,10 +736,11 @@ struct afs_fs_cursor {
struct afs_server_list *server_list; /* Current server list (pins ref) */
struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */
struct key *key; /* Key for the server */
+ unsigned long untried; /* Bitmask of untried servers */
unsigned int cb_break; /* cb_break + cb_s_break before the call */
unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */
- unsigned char start; /* Initial index in server list */
- unsigned char index; /* Number of servers tried beyond start */
+ short index; /* Current server */
+ short error;
unsigned short flags;
#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */
#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */
@@ -615,6 +748,7 @@ struct afs_fs_cursor {
#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */
#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+ unsigned short nr_iterations; /* Number of server iterations */
};
/*
@@ -640,12 +774,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
unsigned short,
unsigned short);
extern void afs_put_addrlist(struct afs_addr_list *);
-extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
- unsigned short, unsigned short);
-extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
+ const char *, size_t, char,
+ unsigned short, unsigned short);
+extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
extern bool afs_iterate_addresses(struct afs_addr_cursor *);
extern int afs_end_cursor(struct afs_addr_cursor *);
-extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
@@ -668,6 +802,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
* callback.c
*/
extern void afs_init_callback_state(struct afs_server *);
+extern void __afs_break_callback(struct afs_vnode *);
extern void afs_break_callback(struct afs_vnode *);
extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
@@ -688,10 +823,13 @@ static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
}
-static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
- struct afs_cb_interest *cbi)
+static inline bool afs_cb_is_broken(unsigned int cb_break,
+ const struct afs_vnode *vnode,
+ const struct afs_cb_interest *cbi)
{
- return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+ return !cbi || cb_break != (vnode->cb_break +
+ cbi->server->cb_s_break +
+ vnode->volume->cb_v_break);
}
/*
@@ -781,7 +919,7 @@ extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64,
struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64);
+extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
struct afs_fid *, struct afs_file_status *);
@@ -797,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *);
extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
struct afs_addr_cursor *, struct key *);
extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
- struct afs_addr_cursor *, struct key *);
+ struct afs_addr_cursor *, struct key *, unsigned int, bool);
extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
struct afs_fid *, struct afs_file_status *,
struct afs_callback *, unsigned int,
@@ -807,6 +945,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
struct afs_callback *, struct afs_volsync *);
/*
+ * fs_probe.c
+ */
+extern void afs_fileserver_probe_result(struct afs_call *);
+extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *);
+extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+
+/*
* inode.c
*/
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool);
@@ -922,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
-extern int afs_queue_call_work(struct afs_call *);
extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
const struct afs_call_type *,
@@ -930,12 +1074,39 @@ extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
extern void afs_flat_call_destructor(struct afs_call *);
extern void afs_send_empty_reply(struct afs_call *);
extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
-extern int afs_protocol_error(struct afs_call *, int);
+extern int afs_extract_data(struct afs_call *, bool);
+extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
+
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
+{
+ call->kvec[0].iov_base = buf;
+ call->kvec[0].iov_len = size;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+ afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_to_tmp64(struct afs_call *call)
+{
+ afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+ iov_iter_discard(&call->iter, READ, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+ afs_extract_begin(call, call->buffer, size);
+}
static inline int afs_transfer_reply(struct afs_call *call)
{
- return afs_extract_data(call, call->buffer, call->reply_max, false);
+ return afs_extract_data(call, false);
}
static inline bool afs_check_call_state(struct afs_call *call,
@@ -1012,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *);
extern void afs_manage_servers(struct work_struct *);
extern void afs_servers_timer(struct timer_list *);
extern void __net_exit afs_purge_servers(struct afs_net *);
-extern bool afs_probe_fileserver(struct afs_fs_cursor *);
extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
/*
@@ -1039,14 +1209,51 @@ extern void afs_fs_exit(void);
/*
* vlclient.c
*/
-extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
- struct afs_addr_cursor *,
- struct key *, const char *, int);
-extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
- struct key *, const uuid_t *);
-extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
-extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
- struct key *, const uuid_t *);
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
+ const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
+ struct afs_vlserver *, unsigned int, bool);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
+
+/*
+ * vl_probe.c
+ */
+extern void afs_vlserver_probe_result(struct afs_call *);
+extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
+extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
+
+/*
+ * vl_rotate.c
+ */
+extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
+ struct afs_cell *, struct key *);
+extern bool afs_select_vlserver(struct afs_vl_cursor *);
+extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
+extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
+
+/*
+ * vlserver_list.c
+ */
+static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
+{
+ atomic_inc(&vlserver->usage);
+ return vlserver;
+}
+
+static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
+{
+ if (vllist)
+ atomic_inc(&vllist->usage);
+ return vllist;
+}
+
+extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
+extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
+extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
+extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
+extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
+ const void *, size_t);
/*
* volume.c
@@ -1089,6 +1296,36 @@ extern int afs_launder_page(struct page *);
extern const struct xattr_handler *afs_xattr_handlers[];
extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
+/*
+ * yfsclient.c
+ */
+extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
+extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
+extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64,
+ struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64,
+ struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
+extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
+ struct afs_fid *, struct afs_file_status *);
+extern int yfs_fs_rename(struct afs_fs_cursor *, const char *,
+ struct afs_vnode *, const char *, u64, u64);
+extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
+ pgoff_t, pgoff_t, unsigned, unsigned);
+extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
+extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
+extern int yfs_fs_extend_lock(struct afs_fs_cursor *);
+extern int yfs_fs_release_lock(struct afs_fs_cursor *);
+extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
+ struct afs_fid *, struct afs_file_status *,
+ struct afs_callback *, struct afs_volsync *);
+extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
+ struct afs_fid *, struct afs_file_status *,
+ struct afs_callback *, unsigned int,
+ struct afs_volsync *);
/*
* Miscellaneous inline functions.
@@ -1120,6 +1357,17 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
}
}
+static inline int afs_io_error(struct afs_call *call, enum afs_io_error where)
+{
+ trace_afs_io_error(call->debug_id, -EIO, where);
+ return -EIO;
+}
+
+static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
+{
+ trace_afs_file_error(vnode, -EIO, where);
+ return -EIO;
+}
/*****************************************************************************/
/*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 99fd135..2e51c69 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -130,9 +130,10 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
goto error_no_page;
}
- ret = -EIO;
- if (PageError(page))
+ if (PageError(page)) {
+ ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
goto error;
+ }
buf = kmap_atomic(page);
memcpy(devname, buf, size);
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9101f62..be2ee3b 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -17,6 +17,11 @@
#include <linux/uaccess.h>
#include "internal.h"
+struct afs_vl_seq_net_private {
+ struct seq_net_private seq; /* Must be first */
+ struct afs_vlserver_list *vllist;
+};
+
static inline struct afs_net *afs_seq2net(struct seq_file *m)
{
return afs_net(seq_file_net(m));
@@ -32,16 +37,24 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
*/
static int afs_proc_cells_show(struct seq_file *m, void *v)
{
- struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
+ struct afs_vlserver_list *vllist;
+ struct afs_cell *cell;
if (v == SEQ_START_TOKEN) {
/* display header on line 1 */
- seq_puts(m, "USE NAME\n");
+ seq_puts(m, "USE TTL SV NAME\n");
return 0;
}
+ cell = list_entry(v, struct afs_cell, proc_link);
+ vllist = rcu_dereference(cell->vl_servers);
+
/* display one cell per line on subsequent lines */
- seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name);
+ seq_printf(m, "%3u %6lld %2u %s\n",
+ atomic_read(&cell->usage),
+ cell->dns_expiry - ktime_get_real_seconds(),
+ vllist ? vllist->nr_servers : 0,
+ cell->name);
return 0;
}
@@ -208,7 +221,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
return 0;
}
- seq_printf(m, "%3d %08x %s\n",
+ seq_printf(m, "%3d %08llx %s\n",
atomic_read(&vol->usage), vol->vid,
afs_vol_types[vol->type]);
@@ -247,61 +260,102 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
.show = afs_proc_cell_volumes_show,
};
+static const char *const dns_record_sources[NR__dns_record_source + 1] = {
+ [DNS_RECORD_UNAVAILABLE] = "unav",
+ [DNS_RECORD_FROM_CONFIG] = "cfg",
+ [DNS_RECORD_FROM_DNS_A] = "A",
+ [DNS_RECORD_FROM_DNS_AFSDB] = "AFSDB",
+ [DNS_RECORD_FROM_DNS_SRV] = "SRV",
+ [DNS_RECORD_FROM_NSS] = "nss",
+ [NR__dns_record_source] = "[weird]"
+};
+
+static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = {
+ [DNS_LOOKUP_NOT_DONE] = "no-lookup",
+ [DNS_LOOKUP_GOOD] = "good",
+ [DNS_LOOKUP_GOOD_WITH_BAD] = "good/bad",
+ [DNS_LOOKUP_BAD] = "bad",
+ [DNS_LOOKUP_GOT_NOT_FOUND] = "not-found",
+ [DNS_LOOKUP_GOT_LOCAL_FAILURE] = "local-failure",
+ [DNS_LOOKUP_GOT_TEMP_FAILURE] = "temp-failure",
+ [DNS_LOOKUP_GOT_NS_FAILURE] = "ns-failure",
+ [NR__dns_lookup_status] = "[weird]"
+};
+
/*
* Display the list of Volume Location servers we're using for a cell.
*/
static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
{
- struct sockaddr_rxrpc *addr = v;
+ const struct afs_vl_seq_net_private *priv = m->private;
+ const struct afs_vlserver_list *vllist = priv->vllist;
+ const struct afs_vlserver_entry *entry;
+ const struct afs_vlserver *vlserver;
+ const struct afs_addr_list *alist;
+ int i;
- /* display header on line 1 */
- if (v == (void *)1) {
- seq_puts(m, "ADDRESS\n");
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(m, "# source %s, status %s\n",
+ dns_record_sources[vllist->source],
+ dns_lookup_statuses[vllist->status]);
return 0;
}
- /* display one cell per line on subsequent lines */
- seq_printf(m, "%pISp\n", &addr->transport);
+ entry = v;
+ vlserver = entry->server;
+ alist = rcu_dereference(vlserver->addresses);
+
+ seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n",
+ vlserver->name, entry->priority, entry->weight,
+ dns_record_sources[alist ? alist->source : entry->source],
+ dns_lookup_statuses[alist ? alist->status : entry->status]);
+ if (alist) {
+ for (i = 0; i < alist->nr_addrs; i++)
+ seq_printf(m, " %c %pISpc\n",
+ alist->preferred == i ? '>' : '-',
+ &alist->addrs[i].transport);
+ }
return 0;
}
static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
__acquires(rcu)
{
- struct afs_addr_list *alist;
+ struct afs_vl_seq_net_private *priv = m->private;
+ struct afs_vlserver_list *vllist;
struct afs_cell *cell = PDE_DATA(file_inode(m->file));
loff_t pos = *_pos;
rcu_read_lock();
- alist = rcu_dereference(cell->vl_addrs);
+ vllist = rcu_dereference(cell->vl_servers);
+ priv->vllist = vllist;
- /* allow for the header line */
- if (!pos)
- return (void *) 1;
- pos--;
+ if (pos < 0)
+ *_pos = pos = 0;
+ if (pos == 0)
+ return SEQ_START_TOKEN;
- if (!alist || pos >= alist->nr_addrs)
+ if (!vllist || pos - 1 >= vllist->nr_servers)
return NULL;
- return alist->addrs + pos;
+ return &vllist->servers[pos - 1];
}
static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v,
loff_t *_pos)
{
- struct afs_addr_list *alist;
- struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+ struct afs_vl_seq_net_private *priv = m->private;
+ struct afs_vlserver_list *vllist = priv->vllist;
loff_t pos;
- alist = rcu_dereference(cell->vl_addrs);
-
pos = *_pos;
- (*_pos)++;
- if (!alist || pos >= alist->nr_addrs)
+ pos++;
+ *_pos = pos;
+ if (!vllist || pos - 1 >= vllist->nr_servers)
return NULL;
- return alist->addrs + pos;
+ return &vllist->servers[pos - 1];
}
static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v)
@@ -337,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
&server->uuid,
atomic_read(&server->usage),
&alist->addrs[0].transport,
- alist->index == 0 ? "*" : "");
+ alist->preferred == 0 ? "*" : "");
for (i = 1; i < alist->nr_addrs; i++)
seq_printf(m, " %pISpc%s\n",
&alist->addrs[i].transport,
- alist->index == i ? "*" : "");
+ alist->preferred == i ? "*" : "");
return 0;
}
@@ -562,7 +616,7 @@ int afs_proc_cell_setup(struct afs_cell *cell)
if (!proc_create_net_data("vlservers", 0444, dir,
&afs_proc_cell_vlservers_ops,
- sizeof(struct seq_net_private),
+ sizeof(struct afs_vl_seq_net_private),
cell) ||
!proc_create_net_data("volumes", 0444, dir,
&afs_proc_cell_volumes_ops,
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
new file mode 100644
index 0000000..07bc10f
--- /dev/null
+++ b/fs/afs/protocol_yfs.h
@@ -0,0 +1,163 @@
+/* YFS protocol bits
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define YFS_FS_SERVICE 2500
+#define YFS_CM_SERVICE 2501
+
+#define YFSCBMAX 1024
+
+enum YFS_CM_Operations {
+ YFSCBProbe = 206, /* probe client */
+ YFSCBGetLock = 207, /* get contents of CM lock table */
+ YFSCBXStatsVersion = 209, /* get version of extended statistics */
+ YFSCBGetXStats = 210, /* get contents of extended statistics data */
+ YFSCBInitCallBackState3 = 213, /* initialise callback state, version 3 */
+ YFSCBProbeUuid = 214, /* check the client hasn't rebooted */
+ YFSCBGetServerPrefs = 215,
+ YFSCBGetCellServDV = 216,
+ YFSCBGetLocalCell = 217,
+ YFSCBGetCacheConfig = 218,
+ YFSCBGetCellByNum = 65537,
+ YFSCBTellMeAboutYourself = 65538, /* get client capabilities */
+ YFSCBCallBack = 64204,
+};
+
+enum YFS_FS_Operations {
+ YFSFETCHACL = 64131, /* YFS Fetch file ACL */
+ YFSFETCHSTATUS = 64132, /* YFS Fetch file status */
+ YFSSTOREACL = 64134, /* YFS Store file ACL */
+ YFSSTORESTATUS = 64135, /* YFS Store file status */
+ YFSREMOVEFILE = 64136, /* YFS Remove a file */
+ YFSCREATEFILE = 64137, /* YFS Create a file */
+ YFSRENAME = 64138, /* YFS Rename or move a file or directory */
+ YFSSYMLINK = 64139, /* YFS Create a symbolic link */
+ YFSLINK = 64140, /* YFS Create a hard link */
+ YFSMAKEDIR = 64141, /* YFS Create a directory */
+ YFSREMOVEDIR = 64142, /* YFS Remove a directory */
+ YFSGETVOLUMESTATUS = 64149, /* YFS Get volume status information */
+ YFSSETVOLUMESTATUS = 64150, /* YFS Set volume status information */
+ YFSSETLOCK = 64156, /* YFS Request a file lock */
+ YFSEXTENDLOCK = 64157, /* YFS Extend a file lock */
+ YFSRELEASELOCK = 64158, /* YFS Release a file lock */
+ YFSLOOKUP = 64161, /* YFS lookup file in directory */
+ YFSFLUSHCPS = 64165,
+ YFSFETCHOPAQUEACL = 64168,
+ YFSWHOAMI = 64170,
+ YFSREMOVEACL = 64171,
+ YFSREMOVEFILE2 = 64173,
+ YFSSTOREOPAQUEACL2 = 64174,
+ YFSINLINEBULKSTATUS = 64536, /* YFS Fetch multiple file statuses with errors */
+ YFSFETCHDATA64 = 64537, /* YFS Fetch file data */
+ YFSSTOREDATA64 = 64538, /* YFS Store file data */
+ YFSUPDATESYMLINK = 64540,
+};
+
+struct yfs_xdr_u64 {
+ __be32 msw;
+ __be32 lsw;
+} __packed;
+
+static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x)
+{
+ return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw);
+}
+
+static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x)
+{
+ return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) };
+}
+
+struct yfs_xdr_vnode {
+ struct yfs_xdr_u64 lo;
+ __be32 hi;
+ __be32 unique;
+} __packed;
+
+struct yfs_xdr_YFSFid {
+ struct yfs_xdr_u64 volume;
+ struct yfs_xdr_vnode vnode;
+} __packed;
+
+
+struct yfs_xdr_YFSFetchStatus {
+ __be32 type;
+ __be32 nlink;
+ struct yfs_xdr_u64 size;
+ struct yfs_xdr_u64 data_version;
+ struct yfs_xdr_u64 author;
+ struct yfs_xdr_u64 owner;
+ struct yfs_xdr_u64 group;
+ __be32 mode;
+ __be32 caller_access;
+ __be32 anon_access;
+ struct yfs_xdr_vnode parent;
+ __be32 data_access_protocol;
+ struct yfs_xdr_u64 mtime_client;
+ struct yfs_xdr_u64 mtime_server;
+ __be32 lock_count;
+ __be32 abort_code;
+} __packed;
+
+struct yfs_xdr_YFSCallBack {
+ __be32 version;
+ struct yfs_xdr_u64 expiration_time;
+ __be32 type;
+} __packed;
+
+struct yfs_xdr_YFSStoreStatus {
+ __be32 mask;
+ __be32 mode;
+ struct yfs_xdr_u64 mtime_client;
+ struct yfs_xdr_u64 owner;
+ struct yfs_xdr_u64 group;
+} __packed;
+
+struct yfs_xdr_RPCFlags {
+ __be32 rpc_flags;
+} __packed;
+
+struct yfs_xdr_YFSVolSync {
+ struct yfs_xdr_u64 vol_creation_date;
+ struct yfs_xdr_u64 vol_update_date;
+ struct yfs_xdr_u64 max_quota;
+ struct yfs_xdr_u64 blocks_in_use;
+ struct yfs_xdr_u64 blocks_avail;
+} __packed;
+
+enum yfs_volume_type {
+ yfs_volume_type_ro = 0,
+ yfs_volume_type_rw = 1,
+};
+
+#define yfs_FVSOnline 0x1
+#define yfs_FVSInservice 0x2
+#define yfs_FVSBlessed 0x4
+#define yfs_FVSNeedsSalvage 0x8
+
+struct yfs_xdr_YFSFetchVolumeStatus {
+ struct yfs_xdr_u64 vid;
+ struct yfs_xdr_u64 parent_id;
+ __be32 flags;
+ __be32 type;
+ struct yfs_xdr_u64 max_quota;
+ struct yfs_xdr_u64 blocks_in_use;
+ struct yfs_xdr_u64 part_blocks_avail;
+ struct yfs_xdr_u64 part_max_blocks;
+ struct yfs_xdr_u64 vol_copy_date;
+ struct yfs_xdr_u64 vol_backup_date;
+} __packed;
+
+struct yfs_xdr_YFSStoreVolumeStatus {
+ __be32 mask;
+ struct yfs_xdr_u64 min_quota;
+ struct yfs_xdr_u64 max_quota;
+ struct yfs_xdr_u64 file_quota;
+} __packed;
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 1faef56..0050425 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -19,14 +19,6 @@
#include "afs_fs.h"
/*
- * Initialise a filesystem server cursor for iterating over FS servers.
- */
-static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
- memset(fc, 0, sizeof(*fc));
-}
-
-/*
* Begin an operation on the fileserver.
*
* Fileserver operations are serialised on the server by vnode, so we serialise
@@ -35,13 +27,14 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode
bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
struct key *key)
{
- afs_init_fs_cursor(fc, vnode);
+ memset(fc, 0, sizeof(*fc));
fc->vnode = vnode;
fc->key = key;
fc->ac.error = SHRT_MAX;
+ fc->error = -EDESTADDRREQ;
if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
- fc->ac.error = -EINTR;
+ fc->error = -EINTR;
fc->flags |= AFS_FS_CURSOR_STOP;
return false;
}
@@ -65,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
fc->server_list = afs_get_serverlist(vnode->volume->servers);
read_unlock(&vnode->volume->servers_lock);
+ fc->untried = (1UL << fc->server_list->nr_servers) - 1;
+ fc->index = READ_ONCE(fc->server_list->preferred);
+
cbi = vnode->cb_interest;
if (cbi) {
/* See if the vnode's preferred record is still available */
for (i = 0; i < fc->server_list->nr_servers; i++) {
if (fc->server_list->servers[i].cb_interest == cbi) {
- fc->start = i;
+ fc->index = i;
goto found_interest;
}
}
@@ -80,7 +76,7 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
* and have to return an error.
*/
if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
- fc->ac.error = -ESTALE;
+ fc->error = -ESTALE;
return false;
}
@@ -94,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
afs_put_cb_interest(afs_v2net(vnode), cbi);
cbi = NULL;
- } else {
- fc->start = READ_ONCE(fc->server_list->index);
}
found_interest:
- fc->index = fc->start;
return true;
}
@@ -117,7 +110,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
default: m = "busy"; break;
}
- pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
+ pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
}
/*
@@ -127,7 +120,7 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
{
msleep_interruptible(1000);
if (signal_pending(current)) {
- fc->ac.error = -ERESTARTSYS;
+ fc->error = -ERESTARTSYS;
return false;
}
@@ -143,27 +136,32 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
struct afs_addr_list *alist;
struct afs_server *server;
struct afs_vnode *vnode = fc->vnode;
+ u32 rtt, abort_code;
+ int error = fc->ac.error, i;
- _enter("%u/%u,%u/%u,%d,%d",
- fc->index, fc->start,
- fc->ac.index, fc->ac.start,
- fc->ac.error, fc->ac.abort_code);
+ _enter("%lx[%d],%lx[%d],%d,%d",
+ fc->untried, fc->index,
+ fc->ac.tried, fc->ac.index,
+ error, fc->ac.abort_code);
if (fc->flags & AFS_FS_CURSOR_STOP) {
_leave(" = f [stopped]");
return false;
}
+ fc->nr_iterations++;
+
/* Evaluate the result of the previous operation, if there was one. */
- switch (fc->ac.error) {
+ switch (error) {
case SHRT_MAX:
goto start;
case 0:
default:
/* Success or local failure. Stop. */
+ fc->error = error;
fc->flags |= AFS_FS_CURSOR_STOP;
- _leave(" = f [okay/local %d]", fc->ac.error);
+ _leave(" = f [okay/local %d]", error);
return false;
case -ECONNABORTED:
@@ -178,7 +176,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
* - May indicate that the fileserver couldn't attach to the vol.
*/
if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
- fc->ac.error = -EREMOTEIO;
+ fc->error = -EREMOTEIO;
goto next_server;
}
@@ -187,12 +185,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
write_unlock(&vnode->volume->servers_lock);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
- fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
- if (fc->ac.error < 0)
- goto failed;
+ error = afs_check_volume_status(vnode->volume, fc->key);
+ if (error < 0)
+ goto failed_set_error;
if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
- fc->ac.error = -ENOMEDIUM;
+ fc->error = -ENOMEDIUM;
goto failed;
}
@@ -200,7 +198,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
* it's the fileserver having trouble.
*/
if (vnode->volume->servers == fc->server_list) {
- fc->ac.error = -EREMOTEIO;
+ fc->error = -EREMOTEIO;
goto next_server;
}
@@ -215,7 +213,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
case VONLINE:
case VDISKFULL:
case VOVERQUOTA:
- fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+ fc->error = afs_abort_to_error(fc->ac.abort_code);
goto next_server;
case VOFFLINE:
@@ -224,11 +222,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
}
if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
- fc->ac.error = -EADV;
+ fc->error = -EADV;
goto failed;
}
if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
- fc->ac.error = -ESTALE;
+ fc->error = -ESTALE;
goto failed;
}
goto busy;
@@ -240,7 +238,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
* have a file lock we need to maintain.
*/
if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
- fc->ac.error = -EBUSY;
+ fc->error = -EBUSY;
goto failed;
}
if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
@@ -269,16 +267,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
* honour, just in case someone sets up a loop.
*/
if (fc->flags & AFS_FS_CURSOR_VMOVED) {
- fc->ac.error = -EREMOTEIO;
+ fc->error = -EREMOTEIO;
goto failed;
}
fc->flags |= AFS_FS_CURSOR_VMOVED;
set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
- fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
- if (fc->ac.error < 0)
- goto failed;
+ error = afs_check_volume_status(vnode->volume, fc->key);
+ if (error < 0)
+ goto failed_set_error;
/* If the server list didn't change, then the VLDB is
* out of sync with the fileservers. This is hopefully
@@ -290,7 +288,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
* TODO: Retry a few times with sleeps.
*/
if (vnode->volume->servers == fc->server_list) {
- fc->ac.error = -ENOMEDIUM;
+ fc->error = -ENOMEDIUM;
goto failed;
}
@@ -299,20 +297,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
default:
clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
- fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+ fc->error = afs_abort_to_error(fc->ac.abort_code);
goto failed;
}
+ case -ETIMEDOUT:
+ case -ETIME:
+ if (fc->error != -EDESTADDRREQ)
+ goto iterate_address;
+ /* Fall through */
case -ENETUNREACH:
case -EHOSTUNREACH:
case -ECONNREFUSED:
- case -ETIMEDOUT:
- case -ETIME:
_debug("no conn");
+ fc->error = error;
goto iterate_address;
case -ECONNRESET:
_debug("call reset");
+ fc->error = error;
goto failed;
}
@@ -328,15 +331,57 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
/* See if we need to do an update of the volume record. Note that the
* volume may have moved or even have been deleted.
*/
- fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
- if (fc->ac.error < 0)
- goto failed;
+ error = afs_check_volume_status(vnode->volume, fc->key);
+ if (error < 0)
+ goto failed_set_error;
if (!afs_start_fs_iteration(fc, vnode))
goto failed;
-use_server:
- _debug("use");
+ _debug("__ VOL %llx __", vnode->volume->vid);
+ error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
+ if (error < 0)
+ goto failed_set_error;
+
+pick_server:
+ _debug("pick [%lx]", fc->untried);
+
+ error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
+ if (error < 0)
+ goto failed_set_error;
+
+ /* Pick the untried server with the lowest RTT. If we have outstanding
+ * callbacks, we stick with the server we're already using if we can.
+ */
+ if (fc->cbi) {
+ _debug("cbi %u", fc->index);
+ if (test_bit(fc->index, &fc->untried))
+ goto selected_server;
+ afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+ fc->cbi = NULL;
+ _debug("nocbi");
+ }
+
+ fc->index = -1;
+ rtt = U32_MAX;
+ for (i = 0; i < fc->server_list->nr_servers; i++) {
+ struct afs_server *s = fc->server_list->servers[i].server;
+
+ if (!test_bit(i, &fc->untried) || !s->probe.responded)
+ continue;
+ if (s->probe.rtt < rtt) {
+ fc->index = i;
+ rtt = s->probe.rtt;
+ }
+ }
+
+ if (fc->index == -1)
+ goto no_more_servers;
+
+selected_server:
+ _debug("use %d", fc->index);
+ __clear_bit(fc->index, &fc->untried);
+
/* We're starting on a different fileserver from the list. We need to
* check it, create a callback intercept, find its address list and
* probe its capabilities before we use it.
@@ -354,10 +399,10 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
* break request before we've finished decoding the reply and
* installing the vnode.
*/
- fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
- fc->index);
- if (fc->ac.error < 0)
- goto failed;
+ error = afs_register_server_cb_interest(vnode, fc->server_list,
+ fc->index);
+ if (error < 0)
+ goto failed_set_error;
fc->cbi = afs_get_cb_interest(vnode->cb_interest);
@@ -369,66 +414,88 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
memset(&fc->ac, 0, sizeof(fc->ac));
- /* Probe the current fileserver if we haven't done so yet. */
- if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
- fc->ac.alist = afs_get_addrlist(alist);
-
- if (!afs_probe_fileserver(fc)) {
- switch (fc->ac.error) {
- case -ENOMEM:
- case -ERESTARTSYS:
- case -EINTR:
- goto failed;
- default:
- goto next_server;
- }
- }
- }
-
if (!fc->ac.alist)
fc->ac.alist = alist;
else
afs_put_addrlist(alist);
- fc->ac.start = READ_ONCE(alist->index);
- fc->ac.index = fc->ac.start;
+ fc->ac.index = -1;
iterate_address:
ASSERT(fc->ac.alist);
- _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
/* Iterate over the current server's address list to try and find an
* address on which it will respond to us.
*/
if (!afs_iterate_addresses(&fc->ac))
goto next_server;
+ _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
+
_leave(" = t");
return true;
next_server:
_debug("next");
afs_end_cursor(&fc->ac);
- afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
- fc->cbi = NULL;
- fc->index++;
- if (fc->index >= fc->server_list->nr_servers)
- fc->index = 0;
- if (fc->index != fc->start)
- goto use_server;
+ goto pick_server;
+no_more_servers:
/* That's all the servers poked to no good effect. Try again if some
* of them were busy.
*/
if (fc->flags & AFS_FS_CURSOR_VBUSY)
goto restart_from_beginning;
- fc->ac.error = -EDESTADDRREQ;
- goto failed;
+ abort_code = 0;
+ error = -EDESTADDRREQ;
+ for (i = 0; i < fc->server_list->nr_servers; i++) {
+ struct afs_server *s = fc->server_list->servers[i].server;
+ int probe_error = READ_ONCE(s->probe.error);
+ switch (probe_error) {
+ case 0:
+ continue;
+ default:
+ if (error == -ETIMEDOUT ||
+ error == -ETIME)
+ continue;
+ case -ETIMEDOUT:
+ case -ETIME:
+ if (error == -ENOMEM ||
+ error == -ENONET)
+ continue;
+ case -ENOMEM:
+ case -ENONET:
+ if (error == -ENETUNREACH)
+ continue;
+ case -ENETUNREACH:
+ if (error == -EHOSTUNREACH)
+ continue;
+ case -EHOSTUNREACH:
+ if (error == -ECONNREFUSED)
+ continue;
+ case -ECONNREFUSED:
+ if (error == -ECONNRESET)
+ continue;
+ case -ECONNRESET: /* Responded, but call expired. */
+ if (error == -ECONNABORTED)
+ continue;
+ case -ECONNABORTED:
+ abort_code = s->probe.abort_code;
+ error = probe_error;
+ continue;
+ }
+ }
+
+ if (error == -ECONNABORTED)
+ error = afs_abort_to_error(abort_code);
+
+failed_set_error:
+ fc->error = error;
failed:
fc->flags |= AFS_FS_CURSOR_STOP;
afs_end_cursor(&fc->ac);
- _leave(" = f [failed %d]", fc->ac.error);
+ _leave(" = f [failed %d]", fc->error);
return false;
}
@@ -442,13 +509,14 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
struct afs_vnode *vnode = fc->vnode;
struct afs_cb_interest *cbi = vnode->cb_interest;
struct afs_addr_list *alist;
+ int error = fc->ac.error;
_enter("");
- switch (fc->ac.error) {
+ switch (error) {
case SHRT_MAX:
if (!cbi) {
- fc->ac.error = -ESTALE;
+ fc->error = -ESTALE;
fc->flags |= AFS_FS_CURSOR_STOP;
return false;
}
@@ -461,25 +529,26 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
afs_get_addrlist(alist);
read_unlock(&cbi->server->fs_lock);
if (!alist) {
- fc->ac.error = -ESTALE;
+ fc->error = -ESTALE;
fc->flags |= AFS_FS_CURSOR_STOP;
return false;
}
memset(&fc->ac, 0, sizeof(fc->ac));
fc->ac.alist = alist;
- fc->ac.start = READ_ONCE(alist->index);
- fc->ac.index = fc->ac.start;
+ fc->ac.index = -1;
goto iterate_address;
case 0:
default:
/* Success or local failure. Stop. */
+ fc->error = error;
fc->flags |= AFS_FS_CURSOR_STOP;
- _leave(" = f [okay/local %d]", fc->ac.error);
+ _leave(" = f [okay/local %d]", error);
return false;
case -ECONNABORTED:
+ fc->error = afs_abort_to_error(fc->ac.abort_code);
fc->flags |= AFS_FS_CURSOR_STOP;
_leave(" = f [abort]");
return false;
@@ -490,6 +559,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
case -ETIMEDOUT:
case -ETIME:
_debug("no conn");
+ fc->error = error;
goto iterate_address;
}
@@ -507,12 +577,65 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
}
/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
+{
+ static int count;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+ return;
+ count++;
+
+ rcu_read_lock();
+
+ pr_notice("EDESTADDR occurred\n");
+ pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
+ fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
+ pr_notice("FC: ut=%lx ix=%d ni=%u\n",
+ fc->untried, fc->index, fc->nr_iterations);
+
+ if (fc->server_list) {
+ const struct afs_server_list *sl = fc->server_list;
+ pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
+ sl->nr_servers, sl->preferred, sl->vnovol_mask);
+ for (i = 0; i < sl->nr_servers; i++) {
+ const struct afs_server *s = sl->servers[i].server;
+ pr_notice("FC: server fl=%lx av=%u %pU\n",
+ s->flags, s->addr_version, &s->uuid);
+ if (s->addresses) {
+ const struct afs_addr_list *a =
+ rcu_dereference(s->addresses);
+ pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
+ a->version,
+ a->nr_ipv4, a->nr_addrs, a->max_addrs,
+ a->preferred);
+ pr_notice("FC: - pr=%lx R=%lx F=%lx\n",
+ a->probed, a->responded, a->failed);
+ if (a == fc->ac.alist)
+ pr_notice("FC: - current\n");
+ }
+ }
+ }
+
+ pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+ fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
+ fc->ac.responded, fc->ac.nr_iterations);
+ rcu_read_unlock();
+}
+
+/*
* Tidy up a filesystem cursor and unlock the vnode.
*/
int afs_end_vnode_operation(struct afs_fs_cursor *fc)
{
struct afs_net *net = afs_v2net(fc->vnode);
- int ret;
+
+ if (fc->error == -EDESTADDRREQ ||
+ fc->error == -ENETUNREACH ||
+ fc->error == -EHOSTUNREACH)
+ afs_dump_edestaddrreq(fc);
mutex_unlock(&fc->vnode->io_lock);
@@ -520,9 +643,8 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
afs_put_cb_interest(net, fc->cbi);
afs_put_serverlist(net, fc->server_list);
- ret = fc->ac.error;
- if (ret == -ECONNABORTED)
- afs_abort_to_error(fc->ac.abort_code);
+ if (fc->error == -ECONNABORTED)
+ fc->error = afs_abort_to_error(fc->ac.abort_code);
- return fc->ac.error;
+ return fc->error;
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 77a8379..5997088 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -16,6 +16,7 @@
#include <net/af_rxrpc.h>
#include "internal.h"
#include "afs_cm.h"
+#include "protocol_yfs.h"
struct workqueue_struct *afs_async_calls;
@@ -75,6 +76,18 @@ int afs_open_socket(struct afs_net *net)
if (ret < 0)
goto error_2;
+ srx.srx_service = YFS_CM_SERVICE;
+ ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+ if (ret < 0)
+ goto error_2;
+
+ /* Ideally, we'd turn on service upgrade here, but we can't because
+ * OpenAFS is buggy and leaks the userStatus field from packet to
+ * packet and between FS packets and CB packets - so if we try to do an
+ * upgrade on an FS packet, OpenAFS will leak that into the CB packet
+ * it sends back to us.
+ */
+
rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
afs_rx_discard_new_call);
@@ -143,6 +156,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
INIT_WORK(&call->async_work, afs_process_async_call);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock);
+ call->_iter = &call->iter;
o = atomic_inc_return(&net->nr_outstanding_calls);
trace_afs_call(call, afs_call_trace_alloc, 1, o,
@@ -176,6 +190,7 @@ void afs_put_call(struct afs_call *call)
afs_put_server(call->net, call->cm_server);
afs_put_cb_interest(call->net, call->cbi);
+ afs_put_addrlist(call->alist);
kfree(call->request);
trace_afs_call(call, afs_call_trace_free, 0, o,
@@ -189,21 +204,22 @@ void afs_put_call(struct afs_call *call)
}
/*
- * Queue the call for actual work. Returns 0 unconditionally for convenience.
+ * Queue the call for actual work.
*/
-int afs_queue_call_work(struct afs_call *call)
+static void afs_queue_call_work(struct afs_call *call)
{
- int u = atomic_inc_return(&call->usage);
+ if (call->type->work) {
+ int u = atomic_inc_return(&call->usage);
- trace_afs_call(call, afs_call_trace_work, u,
- atomic_read(&call->net->nr_outstanding_calls),
- __builtin_return_address(0));
+ trace_afs_call(call, afs_call_trace_work, u,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
- INIT_WORK(&call->work, call->type->work);
+ INIT_WORK(&call->work, call->type->work);
- if (!queue_work(afs_wq, &call->work))
- afs_put_call(call);
- return 0;
+ if (!queue_work(afs_wq, &call->work))
+ afs_put_call(call);
+ }
}
/*
@@ -233,6 +249,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net,
goto nomem_free;
}
+ afs_extract_to_buf(call, call->reply_max);
call->operation_ID = type->op;
init_waitqueue_head(&call->waitq);
return call;
@@ -286,7 +303,7 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
offset = 0;
}
- iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+ iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes);
}
/*
@@ -342,7 +359,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
gfp_t gfp, bool async)
{
- struct sockaddr_rxrpc *srx = ac->addr;
+ struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
@@ -359,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
atomic_read(&call->net->nr_outstanding_calls));
call->async = async;
+ call->addr_ix = ac->index;
+ call->alist = afs_get_addrlist(ac->alist);
/* Work out the length we're going to transmit. This is awkward for
* calls such as FS.StoreData where there's an extra injection of data
@@ -390,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
+ call->error = ret;
goto error_kill_call;
}
@@ -401,8 +421,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
- call->request_size);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
@@ -432,7 +451,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret, "KSD");
} else {
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+ iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0);
rxrpc_kernel_recv_data(call->net->socket, rxcall,
&msg.msg_iter, false,
&call->abort_code, &call->service_id);
@@ -442,6 +461,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
call->error = ret;
trace_afs_call_done(call);
error_kill_call:
+ if (call->type->done)
+ call->type->done(call);
afs_put_call(call);
ac->error = ret;
_leave(" = %d", ret);
@@ -466,14 +487,12 @@ static void afs_deliver_to_call(struct afs_call *call)
state == AFS_CALL_SV_AWAIT_ACK
) {
if (state == AFS_CALL_SV_AWAIT_ACK) {
- struct iov_iter iter;
-
- iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
+ iov_iter_kvec(&call->iter, READ, NULL, 0, 0);
ret = rxrpc_kernel_recv_data(call->net->socket,
- call->rxcall, &iter, false,
- &remote_abort,
+ call->rxcall, &call->iter,
+ false, &remote_abort,
&call->service_id);
- trace_afs_recv_data(call, 0, 0, false, ret);
+ trace_afs_receive_data(call, &call->iter, false, ret);
if (ret == -EINPROGRESS || ret == -EAGAIN)
return;
@@ -485,10 +504,17 @@ static void afs_deliver_to_call(struct afs_call *call)
return;
}
+ if (call->want_reply_time &&
+ rxrpc_kernel_get_reply_time(call->net->socket,
+ call->rxcall,
+ &call->reply_time))
+ call->want_reply_time = false;
+
ret = call->type->deliver(call);
state = READ_ONCE(call->state);
switch (ret) {
case 0:
+ afs_queue_call_work(call);
if (state == AFS_CALL_CL_PROC_REPLY) {
if (call->cbi)
set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
@@ -500,7 +526,6 @@ static void afs_deliver_to_call(struct afs_call *call)
case -EINPROGRESS:
case -EAGAIN:
goto out;
- case -EIO:
case -ECONNABORTED:
ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
goto done;
@@ -509,6 +534,10 @@ static void afs_deliver_to_call(struct afs_call *call)
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret, "KIV");
goto local_abort;
+ case -EIO:
+ pr_err("kAFS: Call %u in bad state %u\n",
+ call->debug_id, state);
+ /* Fall through */
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
@@ -517,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call)
if (state != AFS_CALL_CL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- abort_code, -EBADMSG, "KUM");
+ abort_code, ret, "KUM");
goto local_abort;
}
}
done:
+ if (call->type->done)
+ call->type->done(call);
if (state == AFS_CALL_COMPLETE && call->incoming)
afs_put_call(call);
out:
@@ -728,6 +759,7 @@ void afs_charge_preallocation(struct work_struct *work)
call->async = true;
call->state = AFS_CALL_SV_AWAIT_OP_ID;
init_waitqueue_head(&call->waitq);
+ afs_extract_to_tmp(call);
}
if (rxrpc_kernel_charge_accept(net->socket,
@@ -773,18 +805,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
{
int ret;
- _enter("{%zu}", call->offset);
-
- ASSERTCMP(call->offset, <, 4);
+ _enter("{%zu}", iov_iter_count(call->_iter));
/* the operation ID forms the first four bytes of the request data */
- ret = afs_extract_data(call, &call->tmp, 4, true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
call->operation_ID = ntohl(call->tmp);
afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
- call->offset = 0;
/* ask the cache manager to route the call (it'll change the call type
* if successful) */
@@ -825,7 +854,7 @@ void afs_send_empty_reply(struct afs_call *call)
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+ iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -864,7 +893,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
iov[0].iov_len = len;
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -888,30 +917,19 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
/*
* Extract a piece of data from the received data socket buffers.
*/
-int afs_extract_data(struct afs_call *call, void *buf, size_t count,
- bool want_more)
+int afs_extract_data(struct afs_call *call, bool want_more)
{
struct afs_net *net = call->net;
- struct iov_iter iter;
- struct kvec iov;
+ struct iov_iter *iter = call->_iter;
enum afs_call_state state;
u32 remote_abort = 0;
int ret;
- _enter("{%s,%zu},,%zu,%d",
- call->type->name, call->offset, count, want_more);
+ _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more);
- ASSERTCMP(call->offset, <=, count);
-
- iov.iov_base = buf + call->offset;
- iov.iov_len = count - call->offset;
- iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
-
- ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
+ ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
want_more, &remote_abort,
&call->service_id);
- call->offset += (count - call->offset) - iov_iter_count(&iter);
- trace_afs_recv_data(call, count, call->offset, want_more, ret);
if (ret == 0 || ret == -EAGAIN)
return ret;
@@ -926,7 +944,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
break;
case AFS_CALL_COMPLETE:
kdebug("prem complete %d", call->error);
- return -EIO;
+ return afs_io_error(call, afs_io_error_extract);
default:
break;
}
@@ -940,8 +958,9 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
/*
* Log protocol error production.
*/
-noinline int afs_protocol_error(struct afs_call *call, int error)
+noinline int afs_protocol_error(struct afs_call *call, int error,
+ enum afs_eproto_cause cause)
{
- trace_afs_protocol_error(call, error, __builtin_return_address(0));
+ trace_afs_protocol_error(call, error, cause);
return error;
}
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 81dfedb..5f58a9a 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -126,7 +126,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
bool changed = false;
int i, j;
- _enter("{%x:%u},%x,%x",
+ _enter("{%llx:%llu},%x,%x",
vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
rcu_read_lock();
@@ -147,7 +147,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
break;
}
- if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
+ if (afs_cb_is_broken(cb_break, vnode,
+ vnode->cb_interest)) {
changed = true;
break;
}
@@ -177,7 +178,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
}
}
- if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
+ if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest))
goto someone_else_changed_it;
/* We need a ref on any permits list we want to copy as we'll have to
@@ -256,7 +257,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
spin_lock(&vnode->lock);
zap = rcu_access_pointer(vnode->permit_cache);
- if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
+ if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) &&
zap == permits)
rcu_assign_pointer(vnode->permit_cache, replacement);
else
@@ -289,7 +290,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
bool valid = false;
int i, ret;
- _enter("{%x:%u},%x",
+ _enter("{%llx:%llu},%x",
vnode->fid.vid, vnode->fid.vnode, key_serial(key));
/* check the permits to see if we've got one yet */
@@ -349,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
- _enter("{{%x:%u},%lx},%x,",
+ _enter("{{%llx:%llu},%lx},%x,",
vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
key = afs_request_key(vnode->volume->cell);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 1d329e6..642afa2 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include "afs_fs.h"
#include "internal.h"
+#include "protocol_yfs.h"
static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
@@ -230,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
rwlock_init(&server->fs_lock);
INIT_HLIST_HEAD(&server->cb_volumes);
rwlock_init(&server->cb_break_lock);
+ init_waitqueue_head(&server->probe_wq);
+ spin_lock_init(&server->probe_lock);
afs_inc_servers_outstanding(net);
_leave(" = %p", server);
@@ -246,41 +249,23 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
struct key *key, const uuid_t *uuid)
{
- struct afs_addr_cursor ac;
- struct afs_addr_list *alist;
+ struct afs_vl_cursor vc;
+ struct afs_addr_list *alist = NULL;
int ret;
- ret = afs_set_vl_cursor(&ac, cell);
- if (ret < 0)
- return ERR_PTR(ret);
-
- while (afs_iterate_addresses(&ac)) {
- if (test_bit(ac.index, &ac.alist->yfs))
- alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
- else
- alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
- switch (ac.error) {
- case 0:
- afs_end_cursor(&ac);
- return alist;
- case -ECONNABORTED:
- ac.error = afs_abort_to_error(ac.abort_code);
- goto error;
- case -ENOMEM:
- case -ENONET:
- goto error;
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- break;
- default:
- ac.error = -EIO;
- goto error;
+ ret = -ERESTARTSYS;
+ if (afs_begin_vlserver_operation(&vc, cell, key)) {
+ while (afs_select_vlserver(&vc)) {
+ if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
+ alist = afs_yfsvl_get_endpoints(&vc, uuid);
+ else
+ alist = afs_vl_get_addrs_u(&vc, uuid);
}
+
+ ret = afs_end_vlserver_operation(&vc);
}
-error:
- return ERR_PTR(afs_end_cursor(&ac));
+ return ret < 0 ? ERR_PTR(ret) : alist;
}
/*
@@ -382,9 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
struct afs_addr_cursor ac = {
.alist = alist,
- .start = alist->index,
- .index = 0,
- .addr = &alist->addrs[alist->index],
+ .index = alist->preferred,
.error = 0,
};
_enter("%p", server);
@@ -392,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
+ wait_var_event(&server->probe_outstanding,
+ atomic_read(&server->probe_outstanding) == 0);
+
call_rcu(&server->rcu, afs_server_rcu);
afs_dec_servers_outstanding(net);
}
@@ -525,99 +511,6 @@ void afs_purge_servers(struct afs_net *net)
}
/*
- * Probe a fileserver to find its capabilities.
- *
- * TODO: Try service upgrade.
- */
-static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
-{
- _enter("");
-
- fc->ac.addr = NULL;
- fc->ac.start = READ_ONCE(fc->ac.alist->index);
- fc->ac.index = fc->ac.start;
- fc->ac.error = 0;
- fc->ac.begun = false;
-
- while (afs_iterate_addresses(&fc->ac)) {
- afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
- &fc->ac, fc->key);
- switch (fc->ac.error) {
- case 0:
- afs_end_cursor(&fc->ac);
- set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
- return true;
- case -ECONNABORTED:
- fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
- goto error;
- case -ENOMEM:
- case -ENONET:
- goto error;
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- case -ETIMEDOUT:
- case -ETIME:
- break;
- default:
- fc->ac.error = -EIO;
- goto error;
- }
- }
-
-error:
- afs_end_cursor(&fc->ac);
- return false;
-}
-
-/*
- * If we haven't already, try probing the fileserver to get its capabilities.
- * We try not to instigate parallel probes, but it's possible that the parallel
- * probes will fail due to authentication failure when ours would succeed.
- *
- * TODO: Try sending an anonymous probe if an authenticated probe fails.
- */
-bool afs_probe_fileserver(struct afs_fs_cursor *fc)
-{
- bool success;
- int ret, retries = 0;
-
- _enter("");
-
-retry:
- if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
- _leave(" = t");
- return true;
- }
-
- if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
- success = afs_do_probe_fileserver(fc);
- clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
- wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
- _leave(" = t");
- return success;
- }
-
- _debug("wait");
- ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
- TASK_INTERRUPTIBLE);
- if (ret == -ERESTARTSYS) {
- fc->ac.error = ret;
- _leave(" = f [%d]", ret);
- return false;
- }
-
- retries++;
- if (retries == 4) {
- fc->ac.error = -ESTALE;
- _leave(" = f [stale]");
- return false;
- }
- _debug("retry");
- goto retry;
-}
-
-/*
* Get an update for a server's address list.
*/
static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 8a5760a..95d0761 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new,
return false;
changed:
- /* Maintain the same current server as before if possible. */
- cur = old->servers[old->index].server;
+ /* Maintain the same preferred server as before if possible. */
+ cur = old->servers[old->preferred].server;
for (j = 0; j < new->nr_servers; j++) {
if (new->servers[j].server == cur) {
- new->index = j;
+ new->preferred = j;
break;
}
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 4d3e274..dcd07fe 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -406,10 +406,11 @@ static int afs_fill_super(struct super_block *sb,
inode = afs_iget_pseudo_dir(sb, true);
sb->s_flags |= SB_RDONLY;
} else {
- sprintf(sb->s_id, "%u", as->volume->vid);
+ sprintf(sb->s_id, "%llu", as->volume->vid);
afs_activate_volume(as->volume);
fid.vid = as->volume->vid;
fid.vnode = 1;
+ fid.vnode_hi = 0;
fid.unique = 1;
inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
}
@@ -663,7 +664,7 @@ static void afs_destroy_inode(struct inode *inode)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
- _enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode);
+ _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode);
_debug("DESTROY INODE %p", inode);
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
new file mode 100644
index 0000000..b4f1a845
--- /dev/null
+++ b/fs/afs/vl_list.c
@@ -0,0 +1,340 @@
+/* AFS vlserver list management.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
+ unsigned short port)
+{
+ struct afs_vlserver *vlserver;
+
+ vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
+ GFP_KERNEL);
+ if (vlserver) {
+ atomic_set(&vlserver->usage, 1);
+ rwlock_init(&vlserver->lock);
+ init_waitqueue_head(&vlserver->probe_wq);
+ spin_lock_init(&vlserver->probe_lock);
+ vlserver->name_len = name_len;
+ vlserver->port = port;
+ memcpy(vlserver->name, name, name_len);
+ }
+ return vlserver;
+}
+
+static void afs_vlserver_rcu(struct rcu_head *rcu)
+{
+ struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
+
+ afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
+ kfree_rcu(vlserver, rcu);
+}
+
+void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
+{
+ if (vlserver) {
+ unsigned int u = atomic_dec_return(&vlserver->usage);
+ //_debug("VL PUT %p{%u}", vlserver, u);
+
+ if (u == 0)
+ call_rcu(&vlserver->rcu, afs_vlserver_rcu);
+ }
+}
+
+struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
+{
+ struct afs_vlserver_list *vllist;
+
+ vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
+ if (vllist) {
+ atomic_set(&vllist->usage, 1);
+ rwlock_init(&vllist->lock);
+ }
+
+ return vllist;
+}
+
+void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
+{
+ if (vllist) {
+ unsigned int u = atomic_dec_return(&vllist->usage);
+
+ //_debug("VLLS PUT %p{%u}", vllist, u);
+ if (u == 0) {
+ int i;
+
+ for (i = 0; i < vllist->nr_servers; i++) {
+ afs_put_vlserver(net, vllist->servers[i].server);
+ }
+ kfree_rcu(vllist, rcu);
+ }
+ }
+}
+
+static u16 afs_extract_le16(const u8 **_b)
+{
+ u16 val;
+
+ val = (u16)*(*_b)++ << 0;
+ val |= (u16)*(*_b)++ << 8;
+ return val;
+}
+
+/*
+ * Build a VL server address list from a DNS queried server list.
+ */
+static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
+ u8 nr_addrs, u16 port)
+{
+ struct afs_addr_list *alist;
+ const u8 *b = *_b;
+ int ret = -EINVAL;
+
+ alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
+ if (!alist)
+ return ERR_PTR(-ENOMEM);
+ if (nr_addrs == 0)
+ return alist;
+
+ for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
+ struct dns_server_list_v1_address hdr;
+ __be32 x[4];
+
+ hdr.address_type = *b++;
+
+ switch (hdr.address_type) {
+ case DNS_ADDRESS_IS_IPV4:
+ if (end - b < 4) {
+ _leave(" = -EINVAL [short inet]");
+ goto error;
+ }
+ memcpy(x, b, 4);
+ afs_merge_fs_addr4(alist, x[0], port);
+ b += 4;
+ break;
+
+ case DNS_ADDRESS_IS_IPV6:
+ if (end - b < 16) {
+ _leave(" = -EINVAL [short inet6]");
+ goto error;
+ }
+ memcpy(x, b, 16);
+ afs_merge_fs_addr6(alist, x, port);
+ b += 16;
+ break;
+
+ default:
+ _leave(" = -EADDRNOTAVAIL [unknown af %u]",
+ hdr.address_type);
+ ret = -EADDRNOTAVAIL;
+ goto error;
+ }
+ }
+
+ /* Start with IPv6 if available. */
+ if (alist->nr_ipv4 < alist->nr_addrs)
+ alist->preferred = alist->nr_ipv4;
+
+ *_b = b;
+ return alist;
+
+error:
+ *_b = b;
+ afs_put_addrlist(alist);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Build a VL server list from a DNS queried server list.
+ */
+struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
+ const void *buffer,
+ size_t buffer_size)
+{
+ const struct dns_server_list_v1_header *hdr = buffer;
+ struct dns_server_list_v1_server bs;
+ struct afs_vlserver_list *vllist, *previous;
+ struct afs_addr_list *addrs;
+ struct afs_vlserver *server;
+ const u8 *b = buffer, *end = buffer + buffer_size;
+ int ret = -ENOMEM, nr_servers, i, j;
+
+ _enter("");
+
+ /* Check that it's a server list, v1 */
+ if (end - b < sizeof(*hdr) ||
+ hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST ||
+ hdr->hdr.version != 1) {
+ pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n",
+ hdr->hdr.content, hdr->hdr.version, end - b);
+ ret = -EDESTADDRREQ;
+ goto dump;
+ }
+
+ nr_servers = hdr->nr_servers;
+
+ vllist = afs_alloc_vlserver_list(nr_servers);
+ if (!vllist)
+ return ERR_PTR(-ENOMEM);
+
+ vllist->source = (hdr->source < NR__dns_record_source) ?
+ hdr->source : NR__dns_record_source;
+ vllist->status = (hdr->status < NR__dns_lookup_status) ?
+ hdr->status : NR__dns_lookup_status;
+
+ read_lock(&cell->vl_servers_lock);
+ previous = afs_get_vlserverlist(
+ rcu_dereference_protected(cell->vl_servers,
+ lockdep_is_held(&cell->vl_servers_lock)));
+ read_unlock(&cell->vl_servers_lock);
+
+ b += sizeof(*hdr);
+ while (end - b >= sizeof(bs)) {
+ bs.name_len = afs_extract_le16(&b);
+ bs.priority = afs_extract_le16(&b);
+ bs.weight = afs_extract_le16(&b);
+ bs.port = afs_extract_le16(&b);
+ bs.source = *b++;
+ bs.status = *b++;
+ bs.protocol = *b++;
+ bs.nr_addrs = *b++;
+
+ _debug("extract %u %u %u %u %u %u %*.*s",
+ bs.name_len, bs.priority, bs.weight,
+ bs.port, bs.protocol, bs.nr_addrs,
+ bs.name_len, bs.name_len, b);
+
+ if (end - b < bs.name_len)
+ break;
+
+ ret = -EPROTONOSUPPORT;
+ if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) {
+ bs.protocol = DNS_SERVER_PROTOCOL_UDP;
+ } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) {
+ _leave(" = [proto %u]", bs.protocol);
+ goto error;
+ }
+
+ if (bs.port == 0)
+ bs.port = AFS_VL_PORT;
+ if (bs.source > NR__dns_record_source)
+ bs.source = NR__dns_record_source;
+ if (bs.status > NR__dns_lookup_status)
+ bs.status = NR__dns_lookup_status;
+
+ server = NULL;
+ if (previous) {
+ /* See if we can update an old server record */
+ for (i = 0; i < previous->nr_servers; i++) {
+ struct afs_vlserver *p = previous->servers[i].server;
+
+ if (p->name_len == bs.name_len &&
+ p->port == bs.port &&
+ strncasecmp(b, p->name, bs.name_len) == 0) {
+ server = afs_get_vlserver(p);
+ break;
+ }
+ }
+ }
+
+ if (!server) {
+ ret = -ENOMEM;
+ server = afs_alloc_vlserver(b, bs.name_len, bs.port);
+ if (!server)
+ goto error;
+ }
+
+ b += bs.name_len;
+
+ /* Extract the addresses - note that we can't skip this as we
+ * have to advance the payload pointer.
+ */
+ addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
+ if (IS_ERR(addrs)) {
+ ret = PTR_ERR(addrs);
+ goto error_2;
+ }
+
+ if (vllist->nr_servers >= nr_servers) {
+ _debug("skip %u >= %u", vllist->nr_servers, nr_servers);
+ afs_put_addrlist(addrs);
+ afs_put_vlserver(cell->net, server);
+ continue;
+ }
+
+ addrs->source = bs.source;
+ addrs->status = bs.status;
+
+ if (addrs->nr_addrs == 0) {
+ afs_put_addrlist(addrs);
+ if (!rcu_access_pointer(server->addresses)) {
+ afs_put_vlserver(cell->net, server);
+ continue;
+ }
+ } else {
+ struct afs_addr_list *old = addrs;
+
+ write_lock(&server->lock);
+ rcu_swap_protected(server->addresses, old,
+ lockdep_is_held(&server->lock));
+ write_unlock(&server->lock);
+ afs_put_addrlist(old);
+ }
+
+
+ /* TODO: Might want to check for duplicates */
+
+ /* Insertion-sort by priority and weight */
+ for (j = 0; j < vllist->nr_servers; j++) {
+ if (bs.priority < vllist->servers[j].priority)
+ break; /* Lower preferable */
+ if (bs.priority == vllist->servers[j].priority &&
+ bs.weight > vllist->servers[j].weight)
+ break; /* Higher preferable */
+ }
+
+ if (j < vllist->nr_servers) {
+ memmove(vllist->servers + j + 1,
+ vllist->servers + j,
+ (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
+ }
+
+ clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+
+ vllist->servers[j].priority = bs.priority;
+ vllist->servers[j].weight = bs.weight;
+ vllist->servers[j].server = server;
+ vllist->nr_servers++;
+ }
+
+ if (b != end) {
+ _debug("parse error %zd", b - end);
+ goto error;
+ }
+
+ afs_put_vlserverlist(cell->net, previous);
+ _leave(" = ok [%u]", vllist->nr_servers);
+ return vllist;
+
+error_2:
+ afs_put_vlserver(cell->net, server);
+error:
+ afs_put_vlserverlist(cell->net, vllist);
+ afs_put_vlserverlist(cell->net, previous);
+dump:
+ if (ret != -ENOMEM) {
+ printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer);
+ print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size);
+ }
+ return ERR_PTR(ret);
+}
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
new file mode 100644
index 0000000..c0f616b
--- /dev/null
+++ b/fs/afs/vl_probe.c
@@ -0,0 +1,273 @@
+/* AFS vlserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_vl_probe_done(struct afs_vlserver *server)
+{
+ if (!atomic_dec_and_test(&server->probe_outstanding))
+ return false;
+
+ wake_up_var(&server->probe_outstanding);
+ clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
+ wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
+ return true;
+}
+
+/*
+ * Process the result of probing a vlserver. This is called after successful
+ * or failed delivery of an VL.GetCapabilities operation.
+ */
+void afs_vlserver_probe_result(struct afs_call *call)
+{
+ struct afs_addr_list *alist = call->alist;
+ struct afs_vlserver *server = call->reply[0];
+ unsigned int server_index = (long)call->reply[1];
+ unsigned int index = call->addr_ix;
+ unsigned int rtt = UINT_MAX;
+ bool have_result = false;
+ u64 _rtt;
+ int ret = call->error;
+
+ _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
+
+ spin_lock(&server->probe_lock);
+
+ switch (ret) {
+ case 0:
+ server->probe.error = 0;
+ goto responded;
+ case -ECONNABORTED:
+ if (!server->probe.responded) {
+ server->probe.abort_code = call->abort_code;
+ server->probe.error = ret;
+ }
+ goto responded;
+ case -ENOMEM:
+ case -ENONET:
+ server->probe.local_failure = true;
+ afs_io_error(call, afs_io_error_vl_probe_fail);
+ goto out;
+ case -ECONNRESET: /* Responded, but call expired. */
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ default:
+ clear_bit(index, &alist->responded);
+ set_bit(index, &alist->failed);
+ if (!server->probe.responded &&
+ (server->probe.error == 0 ||
+ server->probe.error == -ETIMEDOUT ||
+ server->probe.error == -ETIME))
+ server->probe.error = ret;
+ afs_io_error(call, afs_io_error_vl_probe_fail);
+ goto out;
+ }
+
+responded:
+ set_bit(index, &alist->responded);
+ clear_bit(index, &alist->failed);
+
+ if (call->service_id == YFS_VL_SERVICE) {
+ server->probe.is_yfs = true;
+ set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ } else {
+ server->probe.not_yfs = true;
+ if (!server->probe.is_yfs) {
+ clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ }
+ }
+
+ /* Get the RTT and scale it to fit into a 32-bit value that represents
+ * over a minute of time so that we can access it with one instruction
+ * on a 32-bit system.
+ */
+ _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+ _rtt /= 64;
+ rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+ if (rtt < server->probe.rtt) {
+ server->probe.rtt = rtt;
+ alist->preferred = index;
+ have_result = true;
+ }
+
+ smp_wmb(); /* Set rtt before responded. */
+ server->probe.responded = true;
+ set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+out:
+ spin_unlock(&server->probe_lock);
+
+ _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+ server_index, index, &alist->addrs[index].transport,
+ (unsigned int)rtt, ret);
+
+ have_result |= afs_vl_probe_done(server);
+ if (have_result) {
+ server->probe.have_result = true;
+ wake_up_var(&server->probe.have_result);
+ wake_up_all(&server->probe_wq);
+ }
+}
+
+/*
+ * Probe all of a vlserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_vlserver(struct afs_net *net,
+ struct afs_vlserver *server,
+ struct key *key,
+ unsigned int server_index)
+{
+ struct afs_addr_cursor ac = {
+ .index = 0,
+ };
+ int ret;
+
+ _enter("%s", server->name);
+
+ read_lock(&server->lock);
+ ac.alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->lock));
+ read_unlock(&server->lock);
+
+ atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+ memset(&server->probe, 0, sizeof(server->probe));
+ server->probe.rtt = UINT_MAX;
+
+ for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+ ret = afs_vl_get_capabilities(net, &ac, key, server,
+ server_index, true);
+ if (ret != -EINPROGRESS) {
+ afs_vl_probe_done(server);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_send_vl_probes(struct afs_net *net, struct key *key,
+ struct afs_vlserver_list *vllist)
+{
+ struct afs_vlserver *server;
+ int i, ret;
+
+ for (i = 0; i < vllist->nr_servers; i++) {
+ server = vllist->servers[i].server;
+ if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
+ continue;
+
+ if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
+ ret = afs_do_probe_vlserver(net, server, key, i);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Wait for the first as-yet untried server to respond.
+ */
+int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
+ unsigned long untried)
+{
+ struct wait_queue_entry *waits;
+ struct afs_vlserver *server;
+ unsigned int rtt = UINT_MAX;
+ bool have_responders = false;
+ int pref = -1, i;
+
+ _enter("%u,%lx", vllist->nr_servers, untried);
+
+ /* Only wait for servers that have a probe outstanding. */
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+ __clear_bit(i, &untried);
+ if (server->probe.responded)
+ have_responders = true;
+ }
+ }
+ if (have_responders || !untried)
+ return 0;
+
+ waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+ if (!waits)
+ return -ENOMEM;
+
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ init_waitqueue_entry(&waits[i], current);
+ add_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ for (;;) {
+ bool still_probing = false;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ if (server->probe.responded)
+ goto stop;
+ if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+ still_probing = true;
+ }
+ }
+
+ if (!still_probing || unlikely(signal_pending(current)))
+ goto stop;
+ schedule();
+ }
+
+stop:
+ set_current_state(TASK_RUNNING);
+
+ for (i = 0; i < vllist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = vllist->servers[i].server;
+ if (server->probe.responded &&
+ server->probe.rtt < rtt) {
+ pref = i;
+ rtt = server->probe.rtt;
+ }
+
+ remove_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ kfree(waits);
+
+ if (pref == -1 && signal_pending(current))
+ return -ERESTARTSYS;
+
+ if (pref >= 0)
+ vllist->preferred = pref;
+
+ _leave(" = 0 [%u]", pref);
+ return 0;
+}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
new file mode 100644
index 0000000..b64a284
--- /dev/null
+++ b/fs/afs/vl_rotate.c
@@ -0,0 +1,355 @@
+/* Handle vlserver selection and rotation.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_vl.h"
+
+/*
+ * Begin an operation on a volume location server.
+ */
+bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
+ struct key *key)
+{
+ memset(vc, 0, sizeof(*vc));
+ vc->cell = cell;
+ vc->key = key;
+ vc->error = -EDESTADDRREQ;
+ vc->ac.error = SHRT_MAX;
+
+ if (signal_pending(current)) {
+ vc->error = -EINTR;
+ vc->flags |= AFS_VL_CURSOR_STOP;
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Begin iteration through a server list, starting with the last used server if
+ * possible, or the last recorded good server if not.
+ */
+static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
+{
+ struct afs_cell *cell = vc->cell;
+
+ if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
+ TASK_INTERRUPTIBLE)) {
+ vc->error = -ERESTARTSYS;
+ return false;
+ }
+
+ read_lock(&cell->vl_servers_lock);
+ vc->server_list = afs_get_vlserverlist(
+ rcu_dereference_protected(cell->vl_servers,
+ lockdep_is_held(&cell->vl_servers_lock)));
+ read_unlock(&cell->vl_servers_lock);
+ if (!vc->server_list || !vc->server_list->nr_servers)
+ return false;
+
+ vc->untried = (1UL << vc->server_list->nr_servers) - 1;
+ vc->index = -1;
+ return true;
+}
+
+/*
+ * Select the vlserver to use. May be called multiple times to rotate
+ * through the vlservers.
+ */
+bool afs_select_vlserver(struct afs_vl_cursor *vc)
+{
+ struct afs_addr_list *alist;
+ struct afs_vlserver *vlserver;
+ u32 rtt;
+ int error = vc->ac.error, abort_code, i;
+
+ _enter("%lx[%d],%lx[%d],%d,%d",
+ vc->untried, vc->index,
+ vc->ac.tried, vc->ac.index,
+ error, vc->ac.abort_code);
+
+ if (vc->flags & AFS_VL_CURSOR_STOP) {
+ _leave(" = f [stopped]");
+ return false;
+ }
+
+ vc->nr_iterations++;
+
+ /* Evaluate the result of the previous operation, if there was one. */
+ switch (error) {
+ case SHRT_MAX:
+ goto start;
+
+ default:
+ case 0:
+ /* Success or local failure. Stop. */
+ vc->error = error;
+ vc->flags |= AFS_VL_CURSOR_STOP;
+ _leave(" = f [okay/local %d]", vc->ac.error);
+ return false;
+
+ case -ECONNABORTED:
+ /* The far side rejected the operation on some grounds. This
+ * might involve the server being busy or the volume having been moved.
+ */
+ switch (vc->ac.abort_code) {
+ case AFSVL_IO:
+ case AFSVL_BADVOLOPER:
+ case AFSVL_NOMEM:
+ /* The server went weird. */
+ vc->error = -EREMOTEIO;
+ //write_lock(&vc->cell->vl_servers_lock);
+ //vc->server_list->weird_mask |= 1 << vc->index;
+ //write_unlock(&vc->cell->vl_servers_lock);
+ goto next_server;
+
+ default:
+ vc->error = afs_abort_to_error(vc->ac.abort_code);
+ goto failed;
+ }
+
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ _debug("no conn %d", error);
+ vc->error = error;
+ goto iterate_address;
+
+ case -ECONNRESET:
+ _debug("call reset");
+ vc->error = error;
+ vc->flags |= AFS_VL_CURSOR_RETRY;
+ goto next_server;
+ }
+
+restart_from_beginning:
+ _debug("restart");
+ afs_end_cursor(&vc->ac);
+ afs_put_vlserverlist(vc->cell->net, vc->server_list);
+ vc->server_list = NULL;
+ if (vc->flags & AFS_VL_CURSOR_RETRIED)
+ goto failed;
+ vc->flags |= AFS_VL_CURSOR_RETRIED;
+start:
+ _debug("start");
+
+ if (!afs_start_vl_iteration(vc))
+ goto failed;
+
+ error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
+ if (error < 0)
+ goto failed_set_error;
+
+pick_server:
+ _debug("pick [%lx]", vc->untried);
+
+ error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
+ if (error < 0)
+ goto failed_set_error;
+
+ /* Pick the untried server with the lowest RTT. */
+ vc->index = vc->server_list->preferred;
+ if (test_bit(vc->index, &vc->untried))
+ goto selected_server;
+
+ vc->index = -1;
+ rtt = U32_MAX;
+ for (i = 0; i < vc->server_list->nr_servers; i++) {
+ struct afs_vlserver *s = vc->server_list->servers[i].server;
+
+ if (!test_bit(i, &vc->untried) || !s->probe.responded)
+ continue;
+ if (s->probe.rtt < rtt) {
+ vc->index = i;
+ rtt = s->probe.rtt;
+ }
+ }
+
+ if (vc->index == -1)
+ goto no_more_servers;
+
+selected_server:
+ _debug("use %d", vc->index);
+ __clear_bit(vc->index, &vc->untried);
+
+ /* We're starting on a different vlserver from the list. We need to
+ * check it, find its address list and probe its capabilities before we
+ * use it.
+ */
+ ASSERTCMP(vc->ac.alist, ==, NULL);
+ vlserver = vc->server_list->servers[vc->index].server;
+ vc->server = vlserver;
+
+ _debug("USING VLSERVER: %s", vlserver->name);
+
+ read_lock(&vlserver->lock);
+ alist = rcu_dereference_protected(vlserver->addresses,
+ lockdep_is_held(&vlserver->lock));
+ afs_get_addrlist(alist);
+ read_unlock(&vlserver->lock);
+
+ memset(&vc->ac, 0, sizeof(vc->ac));
+
+ if (!vc->ac.alist)
+ vc->ac.alist = alist;
+ else
+ afs_put_addrlist(alist);
+
+ vc->ac.index = -1;
+
+iterate_address:
+ ASSERT(vc->ac.alist);
+ /* Iterate over the current server's address list to try and find an
+ * address on which it will respond to us.
+ */
+ if (!afs_iterate_addresses(&vc->ac))
+ goto next_server;
+
+ _debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
+
+ _leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
+ return true;
+
+next_server:
+ _debug("next");
+ afs_end_cursor(&vc->ac);
+ goto pick_server;
+
+no_more_servers:
+ /* That's all the servers poked to no good effect. Try again if some
+ * of them were busy.
+ */
+ if (vc->flags & AFS_VL_CURSOR_RETRY)
+ goto restart_from_beginning;
+
+ abort_code = 0;
+ error = -EDESTADDRREQ;
+ for (i = 0; i < vc->server_list->nr_servers; i++) {
+ struct afs_vlserver *s = vc->server_list->servers[i].server;
+ int probe_error = READ_ONCE(s->probe.error);
+
+ switch (probe_error) {
+ case 0:
+ continue;
+ default:
+ if (error == -ETIMEDOUT ||
+ error == -ETIME)
+ continue;
+ case -ETIMEDOUT:
+ case -ETIME:
+ if (error == -ENOMEM ||
+ error == -ENONET)
+ continue;
+ case -ENOMEM:
+ case -ENONET:
+ if (error == -ENETUNREACH)
+ continue;
+ case -ENETUNREACH:
+ if (error == -EHOSTUNREACH)
+ continue;
+ case -EHOSTUNREACH:
+ if (error == -ECONNREFUSED)
+ continue;
+ case -ECONNREFUSED:
+ if (error == -ECONNRESET)
+ continue;
+ case -ECONNRESET: /* Responded, but call expired. */
+ if (error == -ECONNABORTED)
+ continue;
+ case -ECONNABORTED:
+ abort_code = s->probe.abort_code;
+ error = probe_error;
+ continue;
+ }
+ }
+
+ if (error == -ECONNABORTED)
+ error = afs_abort_to_error(abort_code);
+
+failed_set_error:
+ vc->error = error;
+failed:
+ vc->flags |= AFS_VL_CURSOR_STOP;
+ afs_end_cursor(&vc->ac);
+ _leave(" = f [failed %d]", vc->error);
+ return false;
+}
+
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
+{
+ static int count;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+ return;
+ count++;
+
+ rcu_read_lock();
+ pr_notice("EDESTADDR occurred\n");
+ pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
+ vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
+
+ if (vc->server_list) {
+ const struct afs_vlserver_list *sl = vc->server_list;
+ pr_notice("VC: SL nr=%u ix=%u\n",
+ sl->nr_servers, sl->index);
+ for (i = 0; i < sl->nr_servers; i++) {
+ const struct afs_vlserver *s = sl->servers[i].server;
+ pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
+ s->name, s->port, s->flags, s->probe.error);
+ if (s->addresses) {
+ const struct afs_addr_list *a =
+ rcu_dereference(s->addresses);
+ pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
+ a->nr_ipv4, a->nr_addrs, a->max_addrs,
+ a->preferred);
+ pr_notice("VC: - pr=%lx R=%lx F=%lx\n",
+ a->probed, a->responded, a->failed);
+ if (a == vc->ac.alist)
+ pr_notice("VC: - current\n");
+ }
+ }
+ }
+
+ pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+ vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
+ vc->ac.responded, vc->ac.nr_iterations);
+ rcu_read_unlock();
+}
+
+/*
+ * Tidy up a volume location server cursor and unlock the vnode.
+ */
+int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
+{
+ struct afs_net *net = vc->cell->net;
+
+ if (vc->error == -EDESTADDRREQ ||
+ vc->error == -ENETUNREACH ||
+ vc->error == -EHOSTUNREACH)
+ afs_vl_dump_edestaddrreq(vc);
+
+ afs_end_cursor(&vc->ac);
+ afs_put_vlserverlist(net, vc->server_list);
+
+ if (vc->error == -ECONNABORTED)
+ vc->error = afs_abort_to_error(vc->ac.abort_code);
+
+ return vc->error;
+}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index c3b7408..c3d9e5a 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -128,14 +128,13 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = {
* Dispatch a get volume entry by name or ID operation (uuid variant). If the
* volname is a decimal number then it's a volume ID not a volume name.
*/
-struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
- struct afs_addr_cursor *ac,
- struct key *key,
+struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
const char *volname,
int volnamesz)
{
struct afs_vldb_entry *entry;
struct afs_call *call;
+ struct afs_net *net = vc->cell->net;
size_t reqsz, padsz;
__be32 *bp;
@@ -155,7 +154,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
return ERR_PTR(-ENOMEM);
}
- call->key = key;
+ call->key = vc->key;
call->reply[0] = entry;
call->ret_reply0 = true;
@@ -168,7 +167,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
memset((void *)bp + volnamesz, 0, padsz);
trace_afs_make_vl_call(call);
- return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false);
+ return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
}
/*
@@ -187,19 +186,18 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
u32 uniquifier, nentries, count;
int i, ret;
- _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+ _enter("{%u,%zu/%u}",
+ call->unmarshall, iov_iter_count(call->_iter), call->count);
-again:
switch (call->unmarshall) {
case 0:
- call->offset = 0;
+ afs_extract_to_buf(call,
+ sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
call->unmarshall++;
/* Extract the returned uuid, uniquifier, nentries and blkaddrs size */
case 1:
- ret = afs_extract_data(call, call->buffer,
- sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32),
- true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
@@ -216,28 +214,28 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
call->reply[0] = alist;
call->count = count;
call->count2 = nentries;
- call->offset = 0;
call->unmarshall++;
+ more_entries:
+ count = min(call->count, 4U);
+ afs_extract_to_buf(call, count * sizeof(__be32));
+
/* Extract entries */
case 2:
- count = min(call->count, 4U);
- ret = afs_extract_data(call, call->buffer,
- count * sizeof(__be32),
- call->count > 4);
+ ret = afs_extract_data(call, call->count > 4);
if (ret < 0)
return ret;
alist = call->reply[0];
bp = call->buffer;
+ count = min(call->count, 4U);
for (i = 0; i < count; i++)
if (alist->nr_addrs < call->count2)
afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
call->count -= count;
if (call->count > 0)
- goto again;
- call->offset = 0;
+ goto more_entries;
call->unmarshall++;
break;
}
@@ -267,14 +265,13 @@ static const struct afs_call_type afs_RXVLGetAddrsU = {
* Dispatch an operation to get the addresses for a server, where the server is
* nominated by UUID.
*/
-struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
- struct afs_addr_cursor *ac,
- struct key *key,
+struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
const uuid_t *uuid)
{
struct afs_ListAddrByAttributes__xdr *r;
const struct afs_uuid *u = (const struct afs_uuid *)uuid;
struct afs_call *call;
+ struct afs_net *net = vc->cell->net;
__be32 *bp;
int i;
@@ -286,7 +283,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
if (!call)
return ERR_PTR(-ENOMEM);
- call->key = key;
+ call->key = vc->key;
call->reply[0] = NULL;
call->ret_reply0 = true;
@@ -307,7 +304,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
r->uuid.node[i] = htonl(u->node[i]);
trace_afs_make_vl_call(call);
- return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+ return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
}
/*
@@ -318,54 +315,51 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
u32 count;
int ret;
- _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+ _enter("{%u,%zu/%u}",
+ call->unmarshall, iov_iter_count(call->_iter), call->count);
-again:
switch (call->unmarshall) {
case 0:
- call->offset = 0;
+ afs_extract_to_tmp(call);
call->unmarshall++;
/* Extract the capabilities word count */
case 1:
- ret = afs_extract_data(call, &call->tmp,
- 1 * sizeof(__be32),
- true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
count = ntohl(call->tmp);
-
call->count = count;
call->count2 = count;
- call->offset = 0;
+
call->unmarshall++;
+ afs_extract_discard(call, count * sizeof(__be32));
/* Extract capabilities words */
case 2:
- count = min(call->count, 16U);
- ret = afs_extract_data(call, call->buffer,
- count * sizeof(__be32),
- call->count > 16);
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
/* TODO: Examine capabilities */
- call->count -= count;
- if (call->count > 0)
- goto again;
- call->offset = 0;
call->unmarshall++;
break;
}
- call->reply[0] = (void *)(unsigned long)call->service_id;
-
_leave(" = 0 [done]");
return 0;
}
+static void afs_destroy_vl_get_capabilities(struct afs_call *call)
+{
+ struct afs_vlserver *server = call->reply[0];
+
+ afs_put_vlserver(call->net, server);
+ afs_flat_call_destructor(call);
+}
+
/*
* VL.GetCapabilities operation type
*/
@@ -373,11 +367,12 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
.name = "VL.GetCapabilities",
.op = afs_VL_GetCapabilities,
.deliver = afs_deliver_vl_get_capabilities,
- .destructor = afs_flat_call_destructor,
+ .done = afs_vlserver_probe_result,
+ .destructor = afs_destroy_vl_get_capabilities,
};
/*
- * Probe a fileserver for the capabilities that it supports. This can
+ * Probe a volume server for the capabilities that it supports. This can
* return up to 196 words.
*
* We use this to probe for service upgrade to determine what the server at the
@@ -385,7 +380,10 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
*/
int afs_vl_get_capabilities(struct afs_net *net,
struct afs_addr_cursor *ac,
- struct key *key)
+ struct key *key,
+ struct afs_vlserver *server,
+ unsigned int server_index,
+ bool async)
{
struct afs_call *call;
__be32 *bp;
@@ -397,9 +395,10 @@ int afs_vl_get_capabilities(struct afs_net *net,
return -ENOMEM;
call->key = key;
- call->upgrade = true; /* Let's see if this is a YFS server */
- call->reply[0] = (void *)VLGETCAPABILITIES;
- call->ret_reply0 = true;
+ call->reply[0] = afs_get_vlserver(server);
+ call->reply[1] = (void *)(long)server_index;
+ call->upgrade = true;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -407,7 +406,7 @@ int afs_vl_get_capabilities(struct afs_net *net,
/* Can't take a ref on server */
trace_afs_make_vl_call(call);
- return afs_make_call(ac, call, GFP_KERNEL, false);
+ return afs_make_call(ac, call, GFP_KERNEL, async);
}
/*
@@ -426,22 +425,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
u32 uniquifier, size;
int ret;
- _enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
+ _enter("{%u,%zu,%u}",
+ call->unmarshall, iov_iter_count(call->_iter), call->count2);
-again:
switch (call->unmarshall) {
case 0:
- call->offset = 0;
+ afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32));
call->unmarshall = 1;
/* Extract the returned uuid, uniquifier, fsEndpoints count and
* either the first fsEndpoint type or the volEndpoints
* count if there are no fsEndpoints. */
case 1:
- ret = afs_extract_data(call, call->buffer,
- sizeof(uuid_t) +
- 3 * sizeof(__be32),
- true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
@@ -451,22 +447,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
call->count2 = ntohl(*bp); /* Type or next count */
if (call->count > YFS_MAXENDPOINTS)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_fsendpt_num);
alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
call->reply[0] = alist;
- call->offset = 0;
if (call->count == 0)
goto extract_volendpoints;
- call->unmarshall = 2;
-
- /* Extract fsEndpoints[] entries */
- case 2:
+ next_fsendpoint:
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
size = sizeof(__be32) * (1 + 1 + 1);
@@ -475,11 +468,17 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
size = sizeof(__be32) * (1 + 4 + 1);
break;
default:
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_fsendpt_type);
}
size += sizeof(__be32);
- ret = afs_extract_data(call, call->buffer, size, true);
+ afs_extract_to_buf(call, size);
+ call->unmarshall = 2;
+
+ /* Extract fsEndpoints[] entries */
+ case 2:
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
@@ -488,18 +487,21 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
if (ntohl(bp[0]) != sizeof(__be32) * 2)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_fsendpt4_len);
afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
bp += 3;
break;
case YFS_ENDPOINT_IPV6:
if (ntohl(bp[0]) != sizeof(__be32) * 5)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_fsendpt6_len);
afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
bp += 6;
break;
default:
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_fsendpt_type);
}
/* Got either the type of the next entry or the count of
@@ -507,10 +509,9 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
*/
call->count2 = ntohl(*bp++);
- call->offset = 0;
call->count--;
if (call->count > 0)
- goto again;
+ goto next_fsendpoint;
extract_volendpoints:
/* Extract the list of volEndpoints. */
@@ -518,8 +519,10 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (!call->count)
goto end;
if (call->count > YFS_MAXENDPOINTS)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_vlendpt_type);
+ afs_extract_to_buf(call, 1 * sizeof(__be32));
call->unmarshall = 3;
/* Extract the type of volEndpoints[0]. Normally we would
@@ -527,17 +530,14 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
* data of the current one, but this is the first...
*/
case 3:
- ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
bp = call->buffer;
- call->count2 = ntohl(*bp++);
- call->offset = 0;
- call->unmarshall = 4;
- /* Extract volEndpoints[] entries */
- case 4:
+ next_volendpoint:
+ call->count2 = ntohl(*bp++);
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
size = sizeof(__be32) * (1 + 1 + 1);
@@ -546,12 +546,18 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
size = sizeof(__be32) * (1 + 4 + 1);
break;
default:
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_vlendpt_type);
}
if (call->count > 1)
- size += sizeof(__be32);
- ret = afs_extract_data(call, call->buffer, size, true);
+ size += sizeof(__be32); /* Get next type too */
+ afs_extract_to_buf(call, size);
+ call->unmarshall = 4;
+
+ /* Extract volEndpoints[] entries */
+ case 4:
+ ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
@@ -559,34 +565,35 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
if (ntohl(bp[0]) != sizeof(__be32) * 2)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_vlendpt4_len);
bp += 3;
break;
case YFS_ENDPOINT_IPV6:
if (ntohl(bp[0]) != sizeof(__be32) * 5)
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_vlendpt6_len);
bp += 6;
break;
default:
- return afs_protocol_error(call, -EBADMSG);
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_yvl_vlendpt_type);
}
/* Got either the type of the next entry or the count of
* volEndpoints if no more fsEndpoints.
*/
- call->offset = 0;
call->count--;
- if (call->count > 0) {
- call->count2 = ntohl(*bp++);
- goto again;
- }
+ if (call->count > 0)
+ goto next_volendpoint;
end:
+ afs_extract_discard(call, 0);
call->unmarshall = 5;
/* Done */
case 5:
- ret = afs_extract_data(call, call->buffer, 0, false);
+ ret = afs_extract_data(call, false);
if (ret < 0)
return ret;
call->unmarshall = 6;
@@ -596,11 +603,6 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
}
alist = call->reply[0];
-
- /* Start with IPv6 if available. */
- if (alist->nr_ipv4 < alist->nr_addrs)
- alist->index = alist->nr_ipv4;
-
_leave(" = 0 [done]");
return 0;
}
@@ -619,12 +621,11 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
* Dispatch an operation to get the addresses for a server, where the server is
* nominated by UUID.
*/
-struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
- struct afs_addr_cursor *ac,
- struct key *key,
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
const uuid_t *uuid)
{
struct afs_call *call;
+ struct afs_net *net = vc->cell->net;
__be32 *bp;
_enter("");
@@ -635,7 +636,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
if (!call)
return ERR_PTR(-ENOMEM);
- call->key = key;
+ call->key = vc->key;
call->reply[0] = NULL;
call->ret_reply0 = true;
@@ -646,5 +647,5 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
trace_afs_make_vl_call(call);
- return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+ return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 3037bd0..00975ed3 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -74,55 +74,19 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
const char *volname,
size_t volnamesz)
{
- struct afs_addr_cursor ac;
- struct afs_vldb_entry *vldb;
+ struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
+ struct afs_vl_cursor vc;
int ret;
- ret = afs_set_vl_cursor(&ac, cell);
- if (ret < 0)
- return ERR_PTR(ret);
+ if (!afs_begin_vlserver_operation(&vc, cell, key))
+ return ERR_PTR(-ERESTARTSYS);
- while (afs_iterate_addresses(&ac)) {
- if (!test_bit(ac.index, &ac.alist->probed)) {
- ret = afs_vl_get_capabilities(cell->net, &ac, key);
- switch (ret) {
- case VL_SERVICE:
- clear_bit(ac.index, &ac.alist->yfs);
- set_bit(ac.index, &ac.alist->probed);
- ac.addr->srx_service = ret;
- break;
- case YFS_VL_SERVICE:
- set_bit(ac.index, &ac.alist->yfs);
- set_bit(ac.index, &ac.alist->probed);
- ac.addr->srx_service = ret;
- break;
- }
- }
-
- vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
- volname, volnamesz);
- switch (ac.error) {
- case 0:
- afs_end_cursor(&ac);
- return vldb;
- case -ECONNABORTED:
- ac.error = afs_abort_to_error(ac.abort_code);
- goto error;
- case -ENOMEM:
- case -ENONET:
- goto error;
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -ECONNREFUSED:
- break;
- default:
- ac.error = -EIO;
- goto error;
- }
+ while (afs_select_vlserver(&vc)) {
+ vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
}
-error:
- return ERR_PTR(afs_end_cursor(&ac));
+ ret = afs_end_vlserver_operation(&vc);
+ return ret < 0 ? ERR_PTR(ret) : vldb;
}
/*
@@ -270,7 +234,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
/* We look up an ID by passing it as a decimal string in the
* operation's name parameter.
*/
- idsz = sprintf(idbuf, "%u", volume->vid);
+ idsz = sprintf(idbuf, "%llu", volume->vid);
vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
if (IS_ERR(vldb)) {
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 19c04caf..72efcfc 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -33,10 +33,21 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
loff_t pos, unsigned int len, struct page *page)
{
struct afs_read *req;
+ size_t p;
+ void *data;
int ret;
_enter(",,%llu", (unsigned long long)pos);
+ if (pos >= vnode->vfs_inode.i_size) {
+ p = pos & ~PAGE_MASK;
+ ASSERTCMP(p + len, <=, PAGE_SIZE);
+ data = kmap(page);
+ memset(data + p, 0, len);
+ kunmap(page);
+ return 0;
+ }
+
req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
GFP_KERNEL);
if (!req)
@@ -81,7 +92,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index = pos >> PAGE_SHIFT;
int ret;
- _enter("{%x:%u},{%lx},%u,%u",
+ _enter("{%llx:%llu},{%lx},%u,%u",
vnode->fid.vid, vnode->fid.vnode, index, from, to);
/* We want to store information about how much of a page is altered in
@@ -181,7 +192,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
loff_t i_size, maybe_i_size;
int ret;
- _enter("{%x:%u},{%lx}",
+ _enter("{%llx:%llu},{%lx}",
vnode->fid.vid, vnode->fid.vnode, page->index);
maybe_i_size = pos + copied;
@@ -230,7 +241,7 @@ static void afs_kill_pages(struct address_space *mapping,
struct pagevec pv;
unsigned count, loop;
- _enter("{%x:%u},%lx-%lx",
+ _enter("{%llx:%llu},%lx-%lx",
vnode->fid.vid, vnode->fid.vnode, first, last);
pagevec_init(&pv);
@@ -272,7 +283,7 @@ static void afs_redirty_pages(struct writeback_control *wbc,
struct pagevec pv;
unsigned count, loop;
- _enter("{%x:%u},%lx-%lx",
+ _enter("{%llx:%llu},%lx-%lx",
vnode->fid.vid, vnode->fid.vnode, first, last);
pagevec_init(&pv);
@@ -314,7 +325,7 @@ static int afs_store_data(struct address_space *mapping,
struct list_head *p;
int ret = -ENOKEY, ret2;
- _enter("%s{%x:%u.%u},%lx,%lx,%x,%x",
+ _enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -533,6 +544,7 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
case -ENOENT:
case -ENOMEDIUM:
case -ENXIO:
+ trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
afs_kill_pages(mapping, first, last);
mapping_set_error(mapping, ret);
break;
@@ -675,7 +687,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
unsigned count, loop;
pgoff_t first = call->first, last = call->last;
- _enter("{%x:%u},{%lx-%lx}",
+ _enter("{%llx:%llu},{%lx-%lx}",
vnode->fid.vid, vnode->fid.vnode, first, last);
pagevec_init(&pv);
@@ -714,7 +726,7 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
ssize_t result;
size_t count = iov_iter_count(from);
- _enter("{%x.%u},{%zu},",
+ _enter("{%llx:%llu},{%zu},",
vnode->fid.vid, vnode->fid.vnode, count);
if (IS_SWAPFILE(&vnode->vfs_inode)) {
@@ -742,7 +754,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct inode *inode = file_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
- _enter("{%x:%u},{n=%pD},%d",
+ _enter("{%llx:%llu},{n=%pD},%d",
vnode->fid.vid, vnode->fid.vnode, file,
datasync);
@@ -760,7 +772,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
struct afs_vnode *vnode = AFS_FS_I(inode);
unsigned long priv;
- _enter("{{%x:%u}},{%lx}",
+ _enter("{{%llx:%llu}},{%lx}",
vnode->fid.vid, vnode->fid.vnode, vmf->page->index);
sb_start_pagefault(inode->i_sb);
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index cfcc674..a2cdf25 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -72,7 +72,7 @@ static int afs_xattr_get_fid(const struct xattr_handler *handler,
char text[8 + 1 + 8 + 1 + 8 + 1];
size_t len;
- len = sprintf(text, "%x:%x:%x",
+ len = sprintf(text, "%llx:%llx:%x",
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
if (size == 0)
return len;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
new file mode 100644
index 0000000..12658c1
--- /dev/null
+++ b/fs/afs/yfsclient.c
@@ -0,0 +1,2184 @@
+/* YFS File Server client stubs
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/circ_buf.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+#include "protocol_yfs.h"
+
+static const struct afs_fid afs_zero_fid;
+
+static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
+{
+ call->cbi = afs_get_cb_interest(cbi);
+}
+
+#define xdr_size(x) (sizeof(*x) / sizeof(__be32))
+
+static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid)
+{
+ const struct yfs_xdr_YFSFid *x = (const void *)*_bp;
+
+ fid->vid = xdr_to_u64(x->volume);
+ fid->vnode = xdr_to_u64(x->vnode.lo);
+ fid->vnode_hi = ntohl(x->vnode.hi);
+ fid->unique = ntohl(x->vnode.unique);
+ *_bp += xdr_size(x);
+}
+
+static __be32 *xdr_encode_u32(__be32 *bp, u32 n)
+{
+ *bp++ = htonl(n);
+ return bp;
+}
+
+static __be32 *xdr_encode_u64(__be32 *bp, u64 n)
+{
+ struct yfs_xdr_u64 *x = (void *)bp;
+
+ *x = u64_to_xdr(n);
+ return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid)
+{
+ struct yfs_xdr_YFSFid *x = (void *)bp;
+
+ x->volume = u64_to_xdr(fid->vid);
+ x->vnode.lo = u64_to_xdr(fid->vnode);
+ x->vnode.hi = htonl(fid->vnode_hi);
+ x->vnode.unique = htonl(fid->unique);
+ return bp + xdr_size(x);
+}
+
+static size_t xdr_strlen(unsigned int len)
+{
+ return sizeof(__be32) + round_up(len, sizeof(__be32));
+}
+
+static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len)
+{
+ bp = xdr_encode_u32(bp, len);
+ bp = memcpy(bp, p, len);
+ if (len & 3) {
+ unsigned int pad = 4 - (len & 3);
+
+ memset((u8 *)bp + len, 0, pad);
+ len += pad;
+ }
+
+ return bp + len / sizeof(__be32);
+}
+
+static s64 linux_to_yfs_time(const struct timespec64 *t)
+{
+ /* Convert to 100ns intervals. */
+ return (u64)t->tv_sec * 10000000 + t->tv_nsec/100;
+}
+
+static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode)
+{
+ struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+
+ x->mask = htonl(AFS_SET_MODE);
+ x->mode = htonl(mode & S_IALLUGO);
+ x->mtime_client = u64_to_xdr(0);
+ x->owner = u64_to_xdr(0);
+ x->group = u64_to_xdr(0);
+ return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t)
+{
+ struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+ s64 mtime = linux_to_yfs_time(t);
+
+ x->mask = htonl(AFS_SET_MTIME);
+ x->mode = htonl(0);
+ x->mtime_client = u64_to_xdr(mtime);
+ x->owner = u64_to_xdr(0);
+ x->group = u64_to_xdr(0);
+ return bp + xdr_size(x);
+}
+
+/*
+ * Convert a signed 100ns-resolution 64-bit time into a timespec.
+ */
+static struct timespec64 yfs_time_to_linux(s64 t)
+{
+ struct timespec64 ts;
+ u64 abs_t;
+
+ /*
+ * Unfortunately can not use normal 64 bit division on 32 bit arch, but
+ * the alternative, do_div, does not work with negative numbers so have
+ * to special case them
+ */
+ if (t < 0) {
+ abs_t = -t;
+ ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100);
+ ts.tv_nsec = -ts.tv_nsec;
+ ts.tv_sec = -abs_t;
+ } else {
+ abs_t = t;
+ ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100;
+ ts.tv_sec = abs_t;
+ }
+
+ return ts;
+}
+
+static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr)
+{
+ s64 t = xdr_to_u64(xdr);
+
+ return yfs_time_to_linux(t);
+}
+
+static void yfs_check_req(struct afs_call *call, __be32 *bp)
+{
+ size_t len = (void *)bp - call->request;
+
+ if (len > call->request_size)
+ pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n",
+ call->type->name, len, call->request_size);
+ else if (len < call->request_size)
+ pr_warning("kAFS: %s: Request buffer underflow (%zu<%u)\n",
+ call->type->name, len, call->request_size);
+}
+
+/*
+ * Dump a bad file status record.
+ */
+static void xdr_dump_bad(const __be32 *bp)
+{
+ __be32 x[4];
+ int i;
+
+ pr_notice("YFS XDR: Bad status record\n");
+ for (i = 0; i < 5 * 4 * 4; i += 16) {
+ memcpy(x, bp, 16);
+ bp += 4;
+ pr_notice("%03x: %08x %08x %08x %08x\n",
+ i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+ }
+
+ memcpy(x, bp, 4);
+ pr_notice("0x50: %08x\n", ntohl(x[0]));
+}
+
+/*
+ * Decode a YFSFetchStatus block
+ */
+static int xdr_decode_YFSFetchStatus(struct afs_call *call,
+ const __be32 **_bp,
+ struct afs_file_status *status,
+ struct afs_vnode *vnode,
+ const afs_dataversion_t *expected_version,
+ struct afs_read *read_req)
+{
+ const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
+ u32 type;
+ u8 flags = 0;
+
+ status->abort_code = ntohl(xdr->abort_code);
+ if (status->abort_code != 0) {
+ if (vnode && status->abort_code == VNOVNODE) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ status->nlink = 0;
+ __afs_break_callback(vnode);
+ }
+ return 0;
+ }
+
+ type = ntohl(xdr->type);
+ switch (type) {
+ case AFS_FTYPE_FILE:
+ case AFS_FTYPE_DIR:
+ case AFS_FTYPE_SYMLINK:
+ if (type != status->type &&
+ vnode &&
+ !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+ pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ status->type, type);
+ goto bad;
+ }
+ status->type = type;
+ break;
+ default:
+ goto bad;
+ }
+
+#define EXTRACT_M4(FIELD) \
+ do { \
+ u32 x = ntohl(xdr->FIELD); \
+ if (status->FIELD != x) { \
+ flags |= AFS_VNODE_META_CHANGED; \
+ status->FIELD = x; \
+ } \
+ } while (0)
+
+#define EXTRACT_M8(FIELD) \
+ do { \
+ u64 x = xdr_to_u64(xdr->FIELD); \
+ if (status->FIELD != x) { \
+ flags |= AFS_VNODE_META_CHANGED; \
+ status->FIELD = x; \
+ } \
+ } while (0)
+
+#define EXTRACT_D8(FIELD) \
+ do { \
+ u64 x = xdr_to_u64(xdr->FIELD); \
+ if (status->FIELD != x) { \
+ flags |= AFS_VNODE_DATA_CHANGED; \
+ status->FIELD = x; \
+ } \
+ } while (0)
+
+ EXTRACT_M4(nlink);
+ EXTRACT_D8(size);
+ EXTRACT_D8(data_version);
+ EXTRACT_M8(author);
+ EXTRACT_M8(owner);
+ EXTRACT_M8(group);
+ EXTRACT_M4(mode);
+ EXTRACT_M4(caller_access); /* call ticket dependent */
+ EXTRACT_M4(anon_access);
+
+ status->mtime_client = xdr_to_time(xdr->mtime_client);
+ status->mtime_server = xdr_to_time(xdr->mtime_server);
+ status->lock_count = ntohl(xdr->lock_count);
+
+ if (read_req) {
+ read_req->data_version = status->data_version;
+ read_req->file_size = status->size;
+ }
+
+ *_bp += xdr_size(xdr);
+
+ if (vnode) {
+ if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
+ flags |= AFS_VNODE_NOT_YET_SET;
+ afs_update_inode_from_status(vnode, status, expected_version,
+ flags);
+ }
+
+ return 0;
+
+bad:
+ xdr_dump_bad(*_bp);
+ return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+}
+
+/*
+ * Decode the file status. We need to lock the target vnode if we're going to
+ * update its status so that stat() sees the attributes update atomically.
+ */
+static int yfs_decode_status(struct afs_call *call,
+ const __be32 **_bp,
+ struct afs_file_status *status,
+ struct afs_vnode *vnode,
+ const afs_dataversion_t *expected_version,
+ struct afs_read *read_req)
+{
+ int ret;
+
+ if (!vnode)
+ return xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
+ expected_version, read_req);
+
+ write_seqlock(&vnode->cb_lock);
+ ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
+ expected_version, read_req);
+ write_sequnlock(&vnode->cb_lock);
+ return ret;
+}
+
+/*
+ * Decode a YFSCallBack block
+ */
+static void xdr_decode_YFSCallBack(struct afs_call *call,
+ struct afs_vnode *vnode,
+ const __be32 **_bp)
+{
+ struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp;
+ struct afs_cb_interest *old, *cbi = call->cbi;
+ u64 cb_expiry;
+
+ write_seqlock(&vnode->cb_lock);
+
+ if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
+ cb_expiry = xdr_to_u64(xdr->expiration_time);
+ do_div(cb_expiry, 10 * 1000 * 1000);
+ vnode->cb_version = ntohl(xdr->version);
+ vnode->cb_type = ntohl(xdr->type);
+ vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds();
+ old = vnode->cb_interest;
+ if (old != call->cbi) {
+ vnode->cb_interest = cbi;
+ cbi = old;
+ }
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+
+ write_sequnlock(&vnode->cb_lock);
+ call->cbi = cbi;
+ *_bp += xdr_size(xdr);
+}
+
+static void xdr_decode_YFSCallBack_raw(const __be32 **_bp,
+ struct afs_callback *cb)
+{
+ struct yfs_xdr_YFSCallBack *x = (void *)*_bp;
+ u64 cb_expiry;
+
+ cb_expiry = xdr_to_u64(x->expiration_time);
+ do_div(cb_expiry, 10 * 1000 * 1000);
+ cb->version = ntohl(x->version);
+ cb->type = ntohl(x->type);
+ cb->expires_at = cb_expiry + ktime_get_real_seconds();
+
+ *_bp += xdr_size(x);
+}
+
+/*
+ * Decode a YFSVolSync block
+ */
+static void xdr_decode_YFSVolSync(const __be32 **_bp,
+ struct afs_volsync *volsync)
+{
+ struct yfs_xdr_YFSVolSync *x = (void *)*_bp;
+ u64 creation;
+
+ if (volsync) {
+ creation = xdr_to_u64(x->vol_creation_date);
+ do_div(creation, 10 * 1000 * 1000);
+ volsync->creation = creation;
+ }
+
+ *_bp += xdr_size(x);
+}
+
+/*
+ * Encode the requested attributes into a YFSStoreStatus block
+ */
+static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr)
+{
+ struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+ s64 mtime = 0, owner = 0, group = 0;
+ u32 mask = 0, mode = 0;
+
+ mask = 0;
+ if (attr->ia_valid & ATTR_MTIME) {
+ mask |= AFS_SET_MTIME;
+ mtime = linux_to_yfs_time(&attr->ia_mtime);
+ }
+
+ if (attr->ia_valid & ATTR_UID) {
+ mask |= AFS_SET_OWNER;
+ owner = from_kuid(&init_user_ns, attr->ia_uid);
+ }
+
+ if (attr->ia_valid & ATTR_GID) {
+ mask |= AFS_SET_GROUP;
+ group = from_kgid(&init_user_ns, attr->ia_gid);
+ }
+
+ if (attr->ia_valid & ATTR_MODE) {
+ mask |= AFS_SET_MODE;
+ mode = attr->ia_mode & S_IALLUGO;
+ }
+
+ x->mask = htonl(mask);
+ x->mode = htonl(mode);
+ x->mtime_client = u64_to_xdr(mtime);
+ x->owner = u64_to_xdr(owner);
+ x->group = u64_to_xdr(group);
+ return bp + xdr_size(x);
+}
+
+/*
+ * Decode a YFSFetchVolumeStatus block.
+ */
+static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp,
+ struct afs_volume_status *vs)
+{
+ const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp;
+ u32 flags;
+
+ vs->vid = xdr_to_u64(x->vid);
+ vs->parent_id = xdr_to_u64(x->parent_id);
+ flags = ntohl(x->flags);
+ vs->online = flags & yfs_FVSOnline;
+ vs->in_service = flags & yfs_FVSInservice;
+ vs->blessed = flags & yfs_FVSBlessed;
+ vs->needs_salvage = flags & yfs_FVSNeedsSalvage;
+ vs->type = ntohl(x->type);
+ vs->min_quota = 0;
+ vs->max_quota = xdr_to_u64(x->max_quota);
+ vs->blocks_in_use = xdr_to_u64(x->blocks_in_use);
+ vs->part_blocks_avail = xdr_to_u64(x->part_blocks_avail);
+ vs->part_max_blocks = xdr_to_u64(x->part_max_blocks);
+ vs->vol_copy_date = xdr_to_u64(x->vol_copy_date);
+ vs->vol_backup_date = xdr_to_u64(x->vol_backup_date);
+ *_bp += sizeof(*x) / sizeof(__be32);
+}
+
+/*
+ * deliver reply data to an FS.FetchStatus
+ */
+static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSCallBack(call, vnode, &bp);
+ xdr_decode_YFSVolSync(&bp, call->reply[1]);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = {
+ .name = "YFS.FetchStatus(vnode)",
+ .op = yfs_FS_FetchStatus,
+ .deliver = yfs_deliver_fs_fetch_status_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a file.
+ */
+int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
+ bool new_inode)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus_vnode,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call) {
+ fc->ac.error = -ENOMEM;
+ return -ENOMEM;
+ }
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = volsync;
+ call->expected_version = new_inode ? 1 : vnode->status.data_version;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ yfs_check_req(call, bp);
+
+ call->cb_break = fc->cb_break;
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.FetchData64.
+ */
+static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+ struct afs_read *req = call->reply[2];
+ const __be32 *bp;
+ unsigned int size;
+ int ret;
+
+ _enter("{%u,%zu/%llu}",
+ call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
+
+ switch (call->unmarshall) {
+ case 0:
+ req->actual_len = 0;
+ req->index = 0;
+ req->offset = req->pos & (PAGE_SIZE - 1);
+ afs_extract_to_tmp64(call);
+ call->unmarshall++;
+
+ /* extract the returned data length */
+ case 1:
+ _debug("extract data length");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ req->actual_len = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", req->actual_len);
+ req->remain = min(req->len, req->actual_len);
+ if (req->remain == 0)
+ goto no_more_data;
+
+ call->unmarshall++;
+
+ begin_page:
+ ASSERTCMP(req->index, <, req->nr_pages);
+ if (req->remain > PAGE_SIZE - req->offset)
+ size = PAGE_SIZE - req->offset;
+ else
+ size = req->remain;
+ call->bvec[0].bv_len = size;
+ call->bvec[0].bv_offset = req->offset;
+ call->bvec[0].bv_page = req->pages[req->index];
+ iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
+ ASSERTCMP(size, <=, PAGE_SIZE);
+
+ /* extract the returned data */
+ case 2:
+ _debug("extract data %zu/%llu",
+ iov_iter_count(&call->iter), req->remain);
+
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+ req->remain -= call->bvec[0].bv_len;
+ req->offset += call->bvec[0].bv_len;
+ ASSERTCMP(req->offset, <=, PAGE_SIZE);
+ if (req->offset == PAGE_SIZE) {
+ req->offset = 0;
+ if (req->page_done)
+ req->page_done(call, req);
+ req->index++;
+ if (req->remain > 0)
+ goto begin_page;
+ }
+
+ ASSERTCMP(req->remain, ==, 0);
+ if (req->actual_len <= req->len)
+ goto no_more_data;
+
+ /* Discard any excess data the server gave us */
+ iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
+ call->unmarshall = 3;
+ case 3:
+ _debug("extract discard %zu/%llu",
+ iov_iter_count(&call->iter), req->actual_len - req->len);
+
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ no_more_data:
+ call->unmarshall = 4;
+ afs_extract_to_buf(call,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+
+ /* extract the metadata */
+ case 4:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+ &vnode->status.data_version, req);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSCallBack(call, vnode, &bp);
+ xdr_decode_YFSVolSync(&bp, call->reply[1]);
+
+ call->unmarshall++;
+
+ case 5:
+ break;
+ }
+
+ for (; req->index < req->nr_pages; req->index++) {
+ if (req->offset < PAGE_SIZE)
+ zero_user_segment(req->pages[req->index],
+ req->offset, PAGE_SIZE);
+ if (req->page_done)
+ req->page_done(call, req);
+ req->offset = 0;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+static void yfs_fetch_data_destructor(struct afs_call *call)
+{
+ struct afs_read *req = call->reply[2];
+
+ afs_put_read(req);
+ afs_flat_call_destructor(call);
+}
+
+/*
+ * YFS.FetchData64 operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchData64 = {
+ .name = "YFS.FetchData64",
+ .op = yfs_FS_FetchData64,
+ .deliver = yfs_deliver_fs_fetch_data64,
+ .destructor = yfs_fetch_data_destructor,
+};
+
+/*
+ * Fetch data from a file.
+ */
+int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},%llx,%llx",
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode,
+ req->pos, req->len);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSFetchData64,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_u64) * 2,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = NULL; /* volsync */
+ call->reply[2] = req;
+ call->expected_version = vnode->status.data_version;
+ call->want_reply_time = true;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSFETCHDATA64);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_u64(bp, req->pos);
+ bp = xdr_encode_u64(bp, req->len);
+ yfs_check_req(call, bp);
+
+ refcount_inc(&req->usage);
+ call->cb_break = fc->cb_break;
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data for YFS.CreateFile or YFS.MakeDir.
+ */
+static int yfs_deliver_fs_create_vnode(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_YFSFid(&bp, call->reply[1]);
+ ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSCallBack_raw(&bp, call->reply[3]);
+ xdr_decode_YFSVolSync(&bp, NULL);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateFile = {
+ .name = "YFS.CreateFile",
+ .op = yfs_FS_CreateFile,
+ .deliver = yfs_deliver_fs_create_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Create a file.
+ */
+int yfs_fs_create_file(struct afs_fs_cursor *fc,
+ const char *name,
+ umode_t mode,
+ u64 current_data_version,
+ struct afs_fid *newfid,
+ struct afs_file_status *newstatus,
+ struct afs_callback *newcb)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ size_t namesz, reqsz, rplsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = strlen(name);
+ reqsz = (sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(namesz) +
+ sizeof(struct yfs_xdr_YFSStoreStatus) +
+ sizeof(__be32));
+ rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+
+ call = afs_alloc_flat_call(net, &afs_RXFSCreateFile, reqsz, rplsz);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = newfid;
+ call->reply[2] = newstatus;
+ call->reply[3] = newcb;
+ call->expected_version = current_data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSCREATEFILE);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_string(bp, name, namesz);
+ bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
+ bp = xdr_encode_u32(bp, 0); /* ViceLockType */
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+static const struct afs_call_type yfs_RXFSMakeDir = {
+ .name = "YFS.MakeDir",
+ .op = yfs_FS_MakeDir,
+ .deliver = yfs_deliver_fs_create_vnode,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Make a directory.
+ */
+int yfs_fs_make_dir(struct afs_fs_cursor *fc,
+ const char *name,
+ umode_t mode,
+ u64 current_data_version,
+ struct afs_fid *newfid,
+ struct afs_file_status *newstatus,
+ struct afs_callback *newcb)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ size_t namesz, reqsz, rplsz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = strlen(name);
+ reqsz = (sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(namesz) +
+ sizeof(struct yfs_xdr_YFSStoreStatus));
+ rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+
+ call = afs_alloc_flat_call(net, &yfs_RXFSMakeDir, reqsz, rplsz);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = newfid;
+ call->reply[2] = newstatus;
+ call->reply[3] = newcb;
+ call->expected_version = current_data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSMAKEDIR);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_string(bp, name, namesz);
+ bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile2 operation.
+ */
+static int yfs_deliver_fs_remove_file2(struct afs_call *call)
+{
+ struct afs_vnode *dvnode = call->reply[0];
+ struct afs_vnode *vnode = call->reply[1];
+ struct afs_fid fid;
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+
+ xdr_decode_YFSFid(&bp, &fid);
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ /* Was deleted if vnode->status.abort_code == VNOVNODE. */
+
+ xdr_decode_YFSVolSync(&bp, NULL);
+ return 0;
+}
+
+/*
+ * YFS.RemoveFile2 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile2 = {
+ .name = "YFS.RemoveFile2",
+ .op = yfs_FS_RemoveFile2,
+ .deliver = yfs_deliver_fs_remove_file2,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file and retrieve new file status.
+ */
+int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+ const char *name, u64 current_data_version)
+{
+ struct afs_vnode *dvnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(dvnode);
+ size_t namesz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = strlen(name);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSRemoveFile2,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(namesz),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = dvnode;
+ call->reply[1] = vnode;
+ call->expected_version = current_data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSREMOVEFILE2);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+ bp = xdr_encode_string(bp, name, namesz);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &dvnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation.
+ */
+static int yfs_deliver_fs_remove(struct afs_call *call)
+{
+ struct afs_vnode *dvnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+
+ xdr_decode_YFSVolSync(&bp, NULL);
+ return 0;
+}
+
+/*
+ * FS.RemoveDir and FS.RemoveFile operation types.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile = {
+ .name = "YFS.RemoveFile",
+ .op = yfs_FS_RemoveFile,
+ .deliver = yfs_deliver_fs_remove,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSRemoveDir = {
+ .name = "YFS.RemoveDir",
+ .op = yfs_FS_RemoveDir,
+ .deliver = yfs_deliver_fs_remove,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * remove a file or directory
+ */
+int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+ const char *name, bool isdir, u64 current_data_version)
+{
+ struct afs_vnode *dvnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(dvnode);
+ size_t namesz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = strlen(name);
+ call = afs_alloc_flat_call(
+ net, isdir ? &yfs_RXYFSRemoveDir : &yfs_RXYFSRemoveFile,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(namesz),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = dvnode;
+ call->reply[1] = vnode;
+ call->expected_version = current_data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, isdir ? YFSREMOVEDIR : YFSREMOVEFILE);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+ bp = xdr_encode_string(bp, name, namesz);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &dvnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Link operation.
+ */
+static int yfs_deliver_fs_link(struct afs_call *call)
+{
+ struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSVolSync(&bp, NULL);
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.Link operation type.
+ */
+static const struct afs_call_type yfs_RXYFSLink = {
+ .name = "YFS.Link",
+ .op = yfs_FS_Link,
+ .deliver = yfs_deliver_fs_link,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Make a hard link.
+ */
+int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+ const char *name, u64 current_data_version)
+{
+ struct afs_vnode *dvnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ size_t namesz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = strlen(name);
+ call = afs_alloc_flat_call(net, &yfs_RXYFSLink,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(namesz) +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = dvnode;
+ call->reply[1] = vnode;
+ call->expected_version = current_data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSLINK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+ bp = xdr_encode_string(bp, name, namesz);
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Symlink operation.
+ */
+static int yfs_deliver_fs_symlink(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_YFSFid(&bp, call->reply[1]);
+ ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ if (ret < 0)
+ return ret;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSVolSync(&bp, NULL);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.Symlink operation type
+ */
+static const struct afs_call_type yfs_RXYFSSymlink = {
+ .name = "YFS.Symlink",
+ .op = yfs_FS_Symlink,
+ .deliver = yfs_deliver_fs_symlink,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Create a symbolic link.
+ */
+int yfs_fs_symlink(struct afs_fs_cursor *fc,
+ const char *name,
+ const char *contents,
+ u64 current_data_version,
+ struct afs_fid *newfid,
+ struct afs_file_status *newstatus)
+{
+ struct afs_vnode *dvnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(dvnode);
+ size_t namesz, contents_sz;
+ __be32 *bp;
+
+ _enter("");
+
+ namesz = strlen(name);
+ contents_sz = strlen(contents);
+ call = afs_alloc_flat_call(net, &yfs_RXYFSSymlink,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(namesz) +
+ xdr_strlen(contents_sz) +
+ sizeof(struct yfs_xdr_YFSStoreStatus),
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = dvnode;
+ call->reply[1] = newfid;
+ call->reply[2] = newstatus;
+ call->expected_version = current_data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSYMLINK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+ bp = xdr_encode_string(bp, name, namesz);
+ bp = xdr_encode_string(bp, contents, contents_sz);
+ bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &dvnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Rename operation.
+ */
+static int yfs_deliver_fs_rename(struct afs_call *call)
+{
+ struct afs_vnode *orig_dvnode = call->reply[0];
+ struct afs_vnode *new_dvnode = call->reply[1];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ if (new_dvnode != orig_dvnode) {
+ ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
+ &call->expected_version_2, NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ xdr_decode_YFSVolSync(&bp, NULL);
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.Rename operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename = {
+ .name = "FS.Rename",
+ .op = yfs_FS_Rename,
+ .deliver = yfs_deliver_fs_rename,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Rename a file or directory.
+ */
+int yfs_fs_rename(struct afs_fs_cursor *fc,
+ const char *orig_name,
+ struct afs_vnode *new_dvnode,
+ const char *new_name,
+ u64 current_orig_data_version,
+ u64 current_new_data_version)
+{
+ struct afs_vnode *orig_dvnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(orig_dvnode);
+ size_t o_namesz, n_namesz;
+ __be32 *bp;
+
+ _enter("");
+
+ o_namesz = strlen(orig_name);
+ n_namesz = strlen(new_name);
+ call = afs_alloc_flat_call(net, &yfs_RXYFSRename,
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_RPCFlags) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(o_namesz) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ xdr_strlen(n_namesz),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = orig_dvnode;
+ call->reply[1] = new_dvnode;
+ call->expected_version = current_orig_data_version + 1;
+ call->expected_version_2 = current_new_data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSRENAME);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &orig_dvnode->fid);
+ bp = xdr_encode_string(bp, orig_name, o_namesz);
+ bp = xdr_encode_YFSFid(bp, &new_dvnode->fid);
+ bp = xdr_encode_string(bp, new_name, n_namesz);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &orig_dvnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.StoreData64 operation.
+ */
+static int yfs_deliver_fs_store_data(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("");
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSVolSync(&bp, NULL);
+
+ afs_pages_written_back(vnode, call);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.StoreData64 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSStoreData64 = {
+ .name = "YFS.StoreData64",
+ .op = yfs_FS_StoreData64,
+ .deliver = yfs_deliver_fs_store_data,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Store a set of pages to a large file.
+ */
+int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
+ pgoff_t first, pgoff_t last,
+ unsigned offset, unsigned to)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ loff_t size, pos, i_size;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+ size = (loff_t)to - (loff_t)offset;
+ if (first != last)
+ size += (loff_t)(last - first) << PAGE_SHIFT;
+ pos = (loff_t)first << PAGE_SHIFT;
+ pos += offset;
+
+ i_size = i_size_read(&vnode->vfs_inode);
+ if (pos + size > i_size)
+ i_size = size + pos;
+
+ _debug("size %llx, at %llx, i_size %llx",
+ (unsigned long long)size, (unsigned long long)pos,
+ (unsigned long long)i_size);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64,
+ sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSStoreStatus) +
+ sizeof(struct yfs_xdr_u64) * 3,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->mapping = mapping;
+ call->reply[0] = vnode;
+ call->first = first;
+ call->last = last;
+ call->first_offset = offset;
+ call->last_to = to;
+ call->send_pages = true;
+ call->expected_version = vnode->status.data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_YFSStoreStatus_mtime(bp, &vnode->vfs_inode.i_mtime);
+ bp = xdr_encode_u64(bp, pos);
+ bp = xdr_encode_u64(bp, size);
+ bp = xdr_encode_u64(bp, i_size);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * deliver reply data to an FS.StoreStatus
+ */
+static int yfs_deliver_fs_store_status(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("");
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSVolSync(&bp, NULL);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.StoreStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreStatus = {
+ .name = "YFS.StoreStatus",
+ .op = yfs_FS_StoreStatus,
+ .deliver = yfs_deliver_fs_store_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
+ .name = "YFS.StoreData64",
+ .op = yfs_FS_StoreData64,
+ .deliver = yfs_deliver_fs_store_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 rather than
+ * YFS.StoreStatus so as to alter the file size also.
+ */
+static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64_as_Status,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSStoreStatus) +
+ sizeof(struct yfs_xdr_u64) * 3,
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->expected_version = vnode->status.data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_YFS_StoreStatus(bp, attr);
+ bp = xdr_encode_u64(bp, 0); /* position of start of write */
+ bp = xdr_encode_u64(bp, 0); /* size of write */
+ bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 if there's a change in
+ * file size, and YFS.StoreStatus otherwise.
+ */
+int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ return yfs_fs_setattr_size(fc, attr);
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(struct yfs_xdr_YFSStoreStatus),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->expected_version = vnode->status.data_version;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSTORESTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_YFS_StoreStatus(bp, attr);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.GetVolumeStatus operation.
+ */
+static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
+{
+ const __be32 *bp;
+ char *p;
+ u32 size;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ call->unmarshall++;
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
+
+ /* extract the returned status record */
+ case 1:
+ _debug("extract status");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]);
+ call->unmarshall++;
+ afs_extract_to_tmp(call);
+
+ /* extract the volume name length */
+ case 2:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("volname length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_volname_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_begin(call, call->reply[2], size);
+ call->unmarshall++;
+
+ /* extract the volume name */
+ case 3:
+ _debug("extract volname");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ p = call->reply[2];
+ p[call->count] = 0;
+ _debug("volname '%s'", p);
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ /* extract the offline message length */
+ case 4:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("offline msg length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_offline_msg_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_begin(call, call->reply[2], size);
+ call->unmarshall++;
+
+ /* extract the offline message */
+ case 5:
+ _debug("extract offline");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ p = call->reply[2];
+ p[call->count] = 0;
+ _debug("offline '%s'", p);
+
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ /* extract the message of the day length */
+ case 6:
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ call->count = ntohl(call->tmp);
+ _debug("motd length: %u", call->count);
+ if (call->count >= AFSNAMEMAX)
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_motd_len);
+ size = (call->count + 3) & ~3; /* It's padded */
+ afs_extract_begin(call, call->reply[2], size);
+ call->unmarshall++;
+
+ /* extract the message of the day */
+ case 7:
+ _debug("extract motd");
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ p = call->reply[2];
+ p[call->count] = 0;
+ _debug("motd '%s'", p);
+
+ call->unmarshall++;
+
+ case 8:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * Destroy a YFS.GetVolumeStatus call.
+ */
+static void yfs_get_volume_status_call_destructor(struct afs_call *call)
+{
+ kfree(call->reply[2]);
+ call->reply[2] = NULL;
+ afs_flat_call_destructor(call);
+}
+
+/*
+ * YFS.GetVolumeStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSGetVolumeStatus = {
+ .name = "YFS.GetVolumeStatus",
+ .op = yfs_FS_GetVolumeStatus,
+ .deliver = yfs_deliver_fs_get_volume_status,
+ .destructor = yfs_get_volume_status_call_destructor,
+};
+
+/*
+ * fetch the status of a volume
+ */
+int yfs_fs_get_volume_status(struct afs_fs_cursor *fc,
+ struct afs_volume_status *vs)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+ void *tmpbuf;
+
+ _enter("");
+
+ tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
+ if (!tmpbuf)
+ return -ENOMEM;
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_u64),
+ sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
+ sizeof(__be32));
+ if (!call) {
+ kfree(tmpbuf);
+ return -ENOMEM;
+ }
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+ call->reply[1] = vs;
+ call->reply[2] = tmpbuf;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_u64(bp, vnode->fid.vid);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.SetLock, YFS.ExtendLock or YFS.ReleaseLock
+ */
+static int yfs_deliver_fs_xxxx_lock(struct afs_call *call)
+{
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSVolSync(&bp, NULL);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.SetLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSSetLock = {
+ .name = "YFS.SetLock",
+ .op = yfs_FS_SetLock,
+ .deliver = yfs_deliver_fs_xxxx_lock,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ExtendLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSExtendLock = {
+ .name = "YFS.ExtendLock",
+ .op = yfs_FS_ExtendLock,
+ .deliver = yfs_deliver_fs_xxxx_lock,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ReleaseLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSReleaseLock = {
+ .name = "YFS.ReleaseLock",
+ .op = yfs_FS_ReleaseLock,
+ .deliver = yfs_deliver_fs_xxxx_lock,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Set a lock on a file
+ */
+int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSSetLock,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid) +
+ sizeof(__be32),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSSETLOCK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_u32(bp, type);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * extend a lock on a file
+ */
+int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSExtendLock,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSEXTENDLOCK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * release a lock on a file
+ */
+int yfs_fs_release_lock(struct afs_fs_cursor *fc)
+{
+ struct afs_vnode *vnode = fc->vnode;
+ struct afs_call *call;
+ struct afs_net *net = afs_v2net(vnode);
+ __be32 *bp;
+
+ _enter("");
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSReleaseLock,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call)
+ return -ENOMEM;
+
+ call->key = fc->key;
+ call->reply[0] = vnode;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSRELEASELOCK);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ yfs_check_req(call, bp);
+
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &vnode->fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an FS.FetchStatus with no vnode.
+ */
+static int yfs_deliver_fs_fetch_status(struct afs_call *call)
+{
+ struct afs_file_status *status = call->reply[1];
+ struct afs_callback *callback = call->reply[2];
+ struct afs_volsync *volsync = call->reply[3];
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ ret = yfs_decode_status(call, &bp, status, vnode,
+ &call->expected_version, NULL);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSCallBack_raw(&bp, callback);
+ xdr_decode_YFSVolSync(&bp, volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus = {
+ .name = "YFS.FetchStatus",
+ .op = yfs_FS_FetchStatus,
+ .deliver = yfs_deliver_fs_fetch_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a fid without needing a vnode handle.
+ */
+int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
+ struct afs_net *net,
+ struct afs_fid *fid,
+ struct afs_file_status *status,
+ struct afs_callback *callback,
+ struct afs_volsync *volsync)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%llx:%llu},,",
+ key_serial(fc->key), fid->vid, fid->vnode);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus,
+ sizeof(__be32) * 2 +
+ sizeof(struct yfs_xdr_YFSFid),
+ sizeof(struct yfs_xdr_YFSFetchStatus) +
+ sizeof(struct yfs_xdr_YFSCallBack) +
+ sizeof(struct yfs_xdr_YFSVolSync));
+ if (!call) {
+ fc->ac.error = -ENOMEM;
+ return -ENOMEM;
+ }
+
+ call->key = fc->key;
+ call->reply[0] = NULL; /* vnode for fid[0] */
+ call->reply[1] = status;
+ call->reply[2] = callback;
+ call->reply[3] = volsync;
+ call->expected_version = 1; /* vnode->status.data_version */
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPC flags */
+ bp = xdr_encode_YFSFid(bp, fid);
+ yfs_check_req(call, bp);
+
+ call->cb_break = fc->cb_break;
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, fid);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.InlineBulkStatus call
+ */
+static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+ struct afs_file_status *statuses;
+ struct afs_callback *callbacks;
+ struct afs_vnode *vnode = call->reply[0];
+ const __be32 *bp;
+ u32 tmp;
+ int ret;
+
+ _enter("{%u}", call->unmarshall);
+
+ switch (call->unmarshall) {
+ case 0:
+ afs_extract_to_tmp(call);
+ call->unmarshall++;
+
+ /* Extract the file status count and array in two steps */
+ case 1:
+ _debug("extract status count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ tmp = ntohl(call->tmp);
+ _debug("status count: %u/%u", tmp, call->count2);
+ if (tmp != call->count2)
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_ibulkst_count);
+
+ call->count = 0;
+ call->unmarshall++;
+ more_counts:
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
+
+ case 2:
+ _debug("extract status array %u", call->count);
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ statuses = call->reply[1];
+ ret = yfs_decode_status(call, &bp, &statuses[call->count],
+ call->count == 0 ? vnode : NULL,
+ NULL, NULL);
+ if (ret < 0)
+ return ret;
+
+ call->count++;
+ if (call->count < call->count2)
+ goto more_counts;
+
+ call->count = 0;
+ call->unmarshall++;
+ afs_extract_to_tmp(call);
+
+ /* Extract the callback count and array in two steps */
+ case 3:
+ _debug("extract CB count");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ tmp = ntohl(call->tmp);
+ _debug("CB count: %u", tmp);
+ if (tmp != call->count2)
+ return afs_protocol_error(call, -EBADMSG,
+ afs_eproto_ibulkst_cb_count);
+ call->count = 0;
+ call->unmarshall++;
+ more_cbs:
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
+
+ case 4:
+ _debug("extract CB array");
+ ret = afs_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ _debug("unmarshall CB array");
+ bp = call->buffer;
+ callbacks = call->reply[2];
+ xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]);
+ statuses = call->reply[1];
+ if (call->count == 0 && vnode && statuses[0].abort_code == 0) {
+ bp = call->buffer;
+ xdr_decode_YFSCallBack(call, vnode, &bp);
+ }
+ call->count++;
+ if (call->count < call->count2)
+ goto more_cbs;
+
+ afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
+ call->unmarshall++;
+
+ case 5:
+ ret = afs_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ xdr_decode_YFSVolSync(&bp, call->reply[3]);
+
+ call->unmarshall++;
+
+ case 6:
+ break;
+ }
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSInlineBulkStatus = {
+ .name = "YFS.InlineBulkStatus",
+ .op = yfs_FS_InlineBulkStatus,
+ .deliver = yfs_deliver_fs_inline_bulk_status,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 1024 files
+ */
+int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
+ struct afs_net *net,
+ struct afs_fid *fids,
+ struct afs_file_status *statuses,
+ struct afs_callback *callbacks,
+ unsigned int nr_fids,
+ struct afs_volsync *volsync)
+{
+ struct afs_call *call;
+ __be32 *bp;
+ int i;
+
+ _enter(",%x,{%llx:%llu},%u",
+ key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
+
+ call = afs_alloc_flat_call(net, &yfs_RXYFSInlineBulkStatus,
+ sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(__be32) +
+ sizeof(struct yfs_xdr_YFSFid) * nr_fids,
+ sizeof(struct yfs_xdr_YFSFetchStatus));
+ if (!call) {
+ fc->ac.error = -ENOMEM;
+ return -ENOMEM;
+ }
+
+ call->key = fc->key;
+ call->reply[0] = NULL; /* vnode for fid[0] */
+ call->reply[1] = statuses;
+ call->reply[2] = callbacks;
+ call->reply[3] = volsync;
+ call->count2 = nr_fids;
+
+ /* marshall the parameters */
+ bp = call->request;
+ bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS);
+ bp = xdr_encode_u32(bp, 0); /* RPCFlags */
+ bp = xdr_encode_u32(bp, nr_fids);
+ for (i = 0; i < nr_fids; i++)
+ bp = xdr_encode_YFSFid(bp, &fids[i]);
+ yfs_check_req(call, bp);
+
+ call->cb_break = fc->cb_break;
+ afs_use_fs_server(call, fc->cbi);
+ trace_afs_make_fs_call(call, &fids[0]);
+ return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 9a69392..d81c148 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
s->s_magic = BFS_MAGIC;
- if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
+ if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
+ le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
printf("Superblock is corrupted\n");
goto out1;
}
@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
sizeof(struct bfs_inode)
+ BFS_ROOT_INO - 1;
imap_len = (info->si_lasti / 8) + 1;
- info->si_imap = kzalloc(imap_len, GFP_KERNEL);
- if (!info->si_imap)
+ info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
+ if (!info->si_imap) {
+ printf("Cannot allocate %u bytes\n", imap_len);
goto out1;
+ }
for (i = 0; i < BFS_ROOT_INO; i++)
set_bit(i, info->si_imap);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 38b8ce0..a80b4f0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
dio->size = 0;
dio->multi_bio = false;
- dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
+ dio->should_dirty = is_read && iter_is_iovec(iter);
blk_start_plug(&plug);
for (;;) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 68ca41d..8095352 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3201,9 +3201,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
- struct file *dst_file, loff_t dst_loff,
- u64 olen);
/* file.c */
int __init btrfs_auto_defrag_init(void);
@@ -3233,8 +3230,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len);
+loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 97c7a08..a3c22e1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3298,8 +3298,7 @@ const struct file_operations btrfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,
#endif
- .clone_file_range = btrfs_clone_file_range,
- .dedupe_file_range = btrfs_dedupe_file_range,
+ .remap_file_range = btrfs_remap_file_range,
};
void __cold btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a990a90..3ca6943 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3629,26 +3629,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
return ret;
}
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
- struct file *dst_file, loff_t dst_loff,
- u64 olen)
-{
- struct inode *src = file_inode(src_file);
- struct inode *dst = file_inode(dst_file);
- u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-
- if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
- /*
- * Btrfs does not support blocksize < page_size. As a
- * result, btrfs_cmp_data() won't correctly handle
- * this situation without an update.
- */
- return -EINVAL;
- }
-
- return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
-}
-
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
struct inode *inode,
u64 endoff,
@@ -4350,10 +4330,34 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
return ret;
}
-int btrfs_clone_file_range(struct file *src_file, loff_t off,
- struct file *dst_file, loff_t destoff, u64 len)
+loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
+ struct file *dst_file, loff_t destoff, loff_t len,
+ unsigned int remap_flags)
{
- return btrfs_clone_files(dst_file, src_file, off, len, destoff);
+ int ret;
+
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+ return -EINVAL;
+
+ if (remap_flags & REMAP_FILE_DEDUP) {
+ struct inode *src = file_inode(src_file);
+ struct inode *dst = file_inode(dst_file);
+ u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+
+ if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
+ /*
+ * Btrfs does not support blocksize < page_size. As a
+ * result, btrfs_cmp_data() won't correctly handle
+ * this situation without an update.
+ */
+ return -EINVAL;
+ }
+
+ ret = btrfs_extent_same(src, off, len, dst, destoff);
+ } else {
+ ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
+ }
+ return ret < 0 ? ret : len;
}
static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
diff --git a/fs/buffer.c b/fs/buffer.c
index d60d61e..1286c2b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
*/
bio = bio_alloc(GFP_NOIO, 1);
+ if (wbc) {
+ wbc_init_bio(wbc, bio);
+ wbc_account_io(wbc, bh->b_page, bh->b_size);
+ }
+
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
op_flags |= REQ_PRIO;
bio_set_op_attrs(bio, op, op_flags);
- if (wbc) {
- wbc_init_bio(wbc, bio);
- wbc_account_io(wbc, bh->b_page, bh->b_size);
- }
-
submit_bio(bio);
return 0;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f788496..27cad84 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -615,7 +615,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
more = len < iov_iter_count(to);
- if (unlikely(to->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(to))) {
ret = iov_iter_get_pages_alloc(to, &pages, len,
&page_off);
if (ret <= 0) {
@@ -662,7 +662,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
ret += zlen;
}
- if (unlikely(to->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(to))) {
if (ret > 0) {
iov_iter_advance(to, ret);
off += ret;
@@ -815,7 +815,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
aio_req->total_len = rc + zlen;
}
- iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+ iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs,
osd_data->num_bvecs,
osd_data->bvec_pos.iter.bi_size);
iov_iter_advance(&i, rc);
@@ -1038,8 +1038,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
int zlen = min_t(size_t, len - ret,
size - pos - ret);
- iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
- len);
+ iov_iter_bvec(&i, READ, bvecs, num_pages, len);
iov_iter_advance(&i, ret);
iov_iter_zero(zlen, &i);
ret += zlen;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 3e81242..ba178b09 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -145,6 +145,58 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr);
}
+static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
+{
+ struct list_head *stmp, *tmp, *tmp1, *tmp2;
+ struct TCP_Server_Info *server;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+ struct cifsFileInfo *cfile;
+
+ seq_puts(m, "# Version:1\n");
+ seq_puts(m, "# Format:\n");
+ seq_puts(m, "# <tree id> <persistent fid> <flags> <count> <pid> <uid>");
+#ifdef CONFIG_CIFS_DEBUG2
+ seq_printf(m, " <filename> <mid>\n");
+#else
+ seq_printf(m, " <filename>\n");
+#endif /* CIFS_DEBUG2 */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(stmp, &cifs_tcp_ses_list) {
+ server = list_entry(stmp, struct TCP_Server_Info,
+ tcp_ses_list);
+ list_for_each(tmp, &server->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ list_for_each(tmp1, &ses->tcon_list) {
+ tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+ spin_lock(&tcon->open_file_lock);
+ list_for_each(tmp2, &tcon->openFileList) {
+ cfile = list_entry(tmp2, struct cifsFileInfo,
+ tlist);
+ seq_printf(m,
+ "0x%x 0x%llx 0x%x %d %d %d %s",
+ tcon->tid,
+ cfile->fid.persistent_fid,
+ cfile->f_flags,
+ cfile->count,
+ cfile->pid,
+ from_kuid(&init_user_ns, cfile->uid),
+ cfile->dentry->d_name.name);
+#ifdef CONFIG_CIFS_DEBUG2
+ seq_printf(m, " 0x%llx\n", cfile->fid.mid);
+#else
+ seq_printf(m, "\n");
+#endif /* CIFS_DEBUG2 */
+ }
+ spin_unlock(&tcon->open_file_lock);
+ }
+ }
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+ seq_putc(m, '\n');
+ return 0;
+}
+
static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
{
struct list_head *tmp1, *tmp2, *tmp3;
@@ -565,6 +617,9 @@ cifs_proc_init(void)
proc_create_single("DebugData", 0, proc_fs_cifs,
cifs_debug_data_proc_show);
+ proc_create_single("open_files", 0400, proc_fs_cifs,
+ cifs_debug_files_proc_show);
+
proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
@@ -601,6 +656,7 @@ cifs_proc_clean(void)
return;
remove_proc_entry("DebugData", proc_fs_cifs);
+ remove_proc_entry("open_files", proc_fs_cifs);
remove_proc_entry("cifsFYI", proc_fs_cifs);
remove_proc_entry("traceSMB", proc_fs_cifs);
remove_proc_entry("Stats", proc_fs_cifs);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index b611fc2..7f01c6e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -147,8 +147,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
sprintf(dp, ";sec=krb5");
else if (server->sec_mskerberos)
sprintf(dp, ";sec=mskrb5");
- else
- goto out;
+ else {
+ cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n");
+ sprintf(dp, ";sec=krb5");
+ }
dp = description + strlen(description);
sprintf(dp, ";uid=0x%x",
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7de9603..865706e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -992,17 +992,21 @@ const struct inode_operations cifs_symlink_inode_ops = {
.listxattr = cifs_listxattr,
};
-static int cifs_clone_file_range(struct file *src_file, loff_t off,
- struct file *dst_file, loff_t destoff, u64 len)
+static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
+ struct file *dst_file, loff_t destoff, loff_t len,
+ unsigned int remap_flags)
{
struct inode *src_inode = file_inode(src_file);
struct inode *target_inode = file_inode(dst_file);
struct cifsFileInfo *smb_file_src = src_file->private_data;
- struct cifsFileInfo *smb_file_target = dst_file->private_data;
- struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
+ struct cifsFileInfo *smb_file_target;
+ struct cifs_tcon *target_tcon;
unsigned int xid;
int rc;
+ if (remap_flags & ~REMAP_FILE_ADVISORY)
+ return -EINVAL;
+
cifs_dbg(FYI, "clone range\n");
xid = get_xid();
@@ -1013,6 +1017,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
goto out;
}
+ smb_file_target = dst_file->private_data;
+ target_tcon = tlink_tcon(smb_file_target->tlink);
+
/*
* Note: cifs case is easier than btrfs since server responsible for
* checks for proper open modes and file type and if it wants
@@ -1042,7 +1049,7 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
unlock_two_nondirectories(src_inode, target_inode);
out:
free_xid(xid);
- return rc;
+ return rc < 0 ? rc : len;
}
ssize_t cifs_file_copychunk_range(unsigned int xid,
@@ -1151,7 +1158,7 @@ const struct file_operations cifs_file_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
- .clone_file_range = cifs_clone_file_range,
+ .remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
@@ -1170,15 +1177,14 @@ const struct file_operations cifs_file_strict_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
- .clone_file_range = cifs_clone_file_range,
+ .remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_direct_ops = {
- /* BB reevaluate whether they can be done with directio, no cache */
- .read_iter = cifs_user_readv,
- .write_iter = cifs_user_writev,
+ .read_iter = cifs_direct_readv,
+ .write_iter = cifs_direct_writev,
.open = cifs_open,
.release = cifs_close,
.lock = cifs_lock,
@@ -1189,7 +1195,7 @@ const struct file_operations cifs_file_direct_ops = {
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
- .clone_file_range = cifs_clone_file_range,
+ .remap_file_range = cifs_remap_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
@@ -1208,7 +1214,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
- .clone_file_range = cifs_clone_file_range,
+ .remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
@@ -1226,15 +1232,14 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
- .clone_file_range = cifs_clone_file_range,
+ .remap_file_range = cifs_remap_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
const struct file_operations cifs_file_direct_nobrl_ops = {
- /* BB reevaluate whether they can be done with directio, no cache */
- .read_iter = cifs_user_readv,
- .write_iter = cifs_user_writev,
+ .read_iter = cifs_direct_readv,
+ .write_iter = cifs_direct_writev,
.open = cifs_open,
.release = cifs_close,
.fsync = cifs_fsync,
@@ -1244,7 +1249,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.splice_write = iter_file_splice_write,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
- .clone_file_range = cifs_clone_file_range,
+ .remap_file_range = cifs_remap_file_range,
.llseek = cifs_llseek,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
@@ -1256,7 +1261,7 @@ const struct file_operations cifs_dir_ops = {
.read = generic_read_dir,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
- .clone_file_range = cifs_clone_file_range,
+ .remap_file_range = cifs_remap_file_range,
.llseek = generic_file_llseek,
.fsync = cifs_dir_fsync,
};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 24e265a5..4c3b5cf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,8 +101,10 @@ extern int cifs_open(struct inode *inode, struct file *file);
extern int cifs_close(struct inode *inode, struct file *file);
extern int cifs_closedir(struct inode *inode, struct file *file);
extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
+extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
+extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
extern int cifs_lock(struct file *, int, struct file_lock *);
extern int cifs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ed1e0fc..38ab0fc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1125,6 +1125,9 @@ struct cifs_fid {
__u8 create_guid[16];
struct cifs_pending_open *pending_open;
unsigned int epoch;
+#ifdef CONFIG_CIFS_DEBUG2
+ __u64 mid;
+#endif /* CIFS_DEBUG2 */
bool purge_cache;
};
@@ -1183,6 +1186,11 @@ struct cifs_aio_ctx {
unsigned int len;
unsigned int total_len;
bool should_dirty;
+ /*
+ * Indicates if this aio_ctx is for direct_io,
+ * If yes, iter is a copy of the user passed iov_iter
+ */
+ bool direct_io;
};
struct cifs_readdata;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 1ce733f..79d842e 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1539,6 +1539,9 @@ struct reparse_symlink_data {
char PathBuffer[0];
} __attribute__((packed));
+/* Flag above */
+#define SYMLINK_FLAG_RELATIVE 0x00000001
+
/* For IO_REPARSE_TAG_NFS */
#define NFS_SPECFILE_LNK 0x00000000014B4E4C
#define NFS_SPECFILE_CHR 0x0000000000524843
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d82f0cc..6f24f12 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -589,7 +589,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
{
struct msghdr smb_msg;
struct kvec iov = {.iov_base = buf, .iov_len = to_read};
- iov_iter_kvec(&smb_msg.msg_iter, READ | ITER_KVEC, &iov, 1, to_read);
+ iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
return cifs_readv_from_socket(server, &smb_msg);
}
@@ -601,7 +601,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
struct msghdr smb_msg;
struct bio_vec bv = {
.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
- iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read);
+ iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
return cifs_readv_from_socket(server, &smb_msg);
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c620d4b..74c33d5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1005,7 +1005,7 @@ cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
* Set the byte-range lock (mandatory style). Returns:
* 1) 0, if we set the lock and don't need to request to the server;
* 2) 1, if no locks prevent us but we need to request to the server;
- * 3) -EACCESS, if there is a lock that prevents us and wait is false.
+ * 3) -EACCES, if there is a lock that prevents us and wait is false.
*/
static int
cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
@@ -2538,6 +2538,61 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
}
static int
+cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
+ struct cifs_aio_ctx *ctx)
+{
+ int wait_retry = 0;
+ unsigned int wsize, credits;
+ int rc;
+ struct TCP_Server_Info *server =
+ tlink_tcon(wdata->cfile->tlink)->ses->server;
+
+ /*
+ * Try to resend this wdata, waiting for credits up to 3 seconds.
+ * Note: we are attempting to resend the whole wdata not in segments
+ */
+ do {
+ rc = server->ops->wait_mtu_credits(
+ server, wdata->bytes, &wsize, &credits);
+
+ if (rc)
+ break;
+
+ if (wsize < wdata->bytes) {
+ add_credits_and_wake_if(server, credits, 0);
+ msleep(1000);
+ wait_retry++;
+ }
+ } while (wsize < wdata->bytes && wait_retry < 3);
+
+ if (wsize < wdata->bytes) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ rc = -EAGAIN;
+ while (rc == -EAGAIN) {
+ rc = 0;
+ if (wdata->cfile->invalidHandle)
+ rc = cifs_reopen_file(wdata->cfile, false);
+ if (!rc)
+ rc = server->ops->async_writev(wdata,
+ cifs_uncached_writedata_release);
+ }
+
+ if (!rc) {
+ list_add_tail(&wdata->list, wdata_list);
+ return 0;
+ }
+
+ add_credits_and_wake_if(server, wdata->credits, 0);
+out:
+ kref_put(&wdata->refcount, cifs_uncached_writedata_release);
+
+ return rc;
+}
+
+static int
cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
struct cifsFileInfo *open_file,
struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
@@ -2551,6 +2606,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
loff_t saved_offset = offset;
pid_t pid;
struct TCP_Server_Info *server;
+ struct page **pagevec;
+ size_t start;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
@@ -2567,38 +2624,79 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
if (rc)
break;
- nr_pages = get_numpages(wsize, len, &cur_len);
- wdata = cifs_writedata_alloc(nr_pages,
+ if (ctx->direct_io) {
+ ssize_t result;
+
+ result = iov_iter_get_pages_alloc(
+ from, &pagevec, wsize, &start);
+ if (result < 0) {
+ cifs_dbg(VFS,
+ "direct_writev couldn't get user pages "
+ "(rc=%zd) iter type %d iov_offset %zd "
+ "count %zd\n",
+ result, from->type,
+ from->iov_offset, from->count);
+ dump_stack();
+ break;
+ }
+ cur_len = (size_t)result;
+ iov_iter_advance(from, cur_len);
+
+ nr_pages =
+ (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ wdata = cifs_writedata_direct_alloc(pagevec,
cifs_uncached_writev_complete);
- if (!wdata) {
- rc = -ENOMEM;
- add_credits_and_wake_if(server, credits, 0);
- break;
- }
+ if (!wdata) {
+ rc = -ENOMEM;
+ add_credits_and_wake_if(server, credits, 0);
+ break;
+ }
- rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
- if (rc) {
- kfree(wdata);
- add_credits_and_wake_if(server, credits, 0);
- break;
- }
- num_pages = nr_pages;
- rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
- if (rc) {
- for (i = 0; i < nr_pages; i++)
- put_page(wdata->pages[i]);
- kfree(wdata);
- add_credits_and_wake_if(server, credits, 0);
- break;
- }
+ wdata->page_offset = start;
+ wdata->tailsz =
+ nr_pages > 1 ?
+ cur_len - (PAGE_SIZE - start) -
+ (nr_pages - 2) * PAGE_SIZE :
+ cur_len;
+ } else {
+ nr_pages = get_numpages(wsize, len, &cur_len);
+ wdata = cifs_writedata_alloc(nr_pages,
+ cifs_uncached_writev_complete);
+ if (!wdata) {
+ rc = -ENOMEM;
+ add_credits_and_wake_if(server, credits, 0);
+ break;
+ }
- /*
- * Bring nr_pages down to the number of pages we actually used,
- * and free any pages that we didn't use.
- */
- for ( ; nr_pages > num_pages; nr_pages--)
- put_page(wdata->pages[nr_pages - 1]);
+ rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
+ if (rc) {
+ kfree(wdata);
+ add_credits_and_wake_if(server, credits, 0);
+ break;
+ }
+
+ num_pages = nr_pages;
+ rc = wdata_fill_from_iovec(
+ wdata, from, &cur_len, &num_pages);
+ if (rc) {
+ for (i = 0; i < nr_pages; i++)
+ put_page(wdata->pages[i]);
+ kfree(wdata);
+ add_credits_and_wake_if(server, credits, 0);
+ break;
+ }
+
+ /*
+ * Bring nr_pages down to the number of pages we
+ * actually used, and free any pages that we didn't use.
+ */
+ for ( ; nr_pages > num_pages; nr_pages--)
+ put_page(wdata->pages[nr_pages - 1]);
+
+ wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
+ }
wdata->sync_mode = WB_SYNC_ALL;
wdata->nr_pages = nr_pages;
@@ -2607,7 +2705,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
wdata->pid = pid;
wdata->bytes = cur_len;
wdata->pagesz = PAGE_SIZE;
- wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
wdata->credits = credits;
wdata->ctx = ctx;
kref_get(&ctx->refcount);
@@ -2682,13 +2779,18 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
INIT_LIST_HEAD(&tmp_list);
list_del_init(&wdata->list);
- iov_iter_advance(&tmp_from,
+ if (ctx->direct_io)
+ rc = cifs_resend_wdata(
+ wdata, &tmp_list, ctx);
+ else {
+ iov_iter_advance(&tmp_from,
wdata->offset - ctx->pos);
- rc = cifs_write_from_iter(wdata->offset,
+ rc = cifs_write_from_iter(wdata->offset,
wdata->bytes, &tmp_from,
ctx->cfile, cifs_sb, &tmp_list,
ctx);
+ }
list_splice(&tmp_list, &ctx->list);
@@ -2701,8 +2803,9 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
- for (i = 0; i < ctx->npages; i++)
- put_page(ctx->bv[i].bv_page);
+ if (!ctx->direct_io)
+ for (i = 0; i < ctx->npages; i++)
+ put_page(ctx->bv[i].bv_page);
cifs_stats_bytes_written(tcon, ctx->total_len);
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -2717,7 +2820,8 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
complete(&ctx->done);
}
-ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t __cifs_writev(
+ struct kiocb *iocb, struct iov_iter *from, bool direct)
{
struct file *file = iocb->ki_filp;
ssize_t total_written = 0;
@@ -2726,13 +2830,18 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
struct cifs_sb_info *cifs_sb;
struct cifs_aio_ctx *ctx;
struct iov_iter saved_from = *from;
+ size_t len = iov_iter_count(from);
int rc;
/*
- * BB - optimize the way when signing is disabled. We can drop this
- * extra memory-to-memory copying and use iovec buffers for constructing
- * write request.
+ * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
+ * In this case, fall back to non-direct write function.
+ * this could be improved by getting pages directly in ITER_KVEC
*/
+ if (direct && from->type & ITER_KVEC) {
+ cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
+ direct = false;
+ }
rc = generic_write_checks(iocb, from);
if (rc <= 0)
@@ -2756,10 +2865,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
ctx->pos = iocb->ki_pos;
- rc = setup_aio_ctx_iter(ctx, from, WRITE);
- if (rc) {
- kref_put(&ctx->refcount, cifs_aio_ctx_release);
- return rc;
+ if (direct) {
+ ctx->direct_io = true;
+ ctx->iter = *from;
+ ctx->len = len;
+ } else {
+ rc = setup_aio_ctx_iter(ctx, from, WRITE);
+ if (rc) {
+ kref_put(&ctx->refcount, cifs_aio_ctx_release);
+ return rc;
+ }
}
/* grab a lock here due to read response handlers can access ctx */
@@ -2809,6 +2924,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
return total_written;
}
+ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+ return __cifs_writev(iocb, from, true);
+}
+
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+ return __cifs_writev(iocb, from, false);
+}
+
static ssize_t
cifs_writev(struct kiocb *iocb, struct iov_iter *from)
{
@@ -2979,7 +3104,6 @@ cifs_uncached_readdata_release(struct kref *refcount)
kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
for (i = 0; i < rdata->nr_pages; i++) {
put_page(rdata->pages[i]);
- rdata->pages[i] = NULL;
}
cifs_readdata_release(refcount);
}
@@ -3004,7 +3128,7 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
size_t copy = min_t(size_t, remaining, PAGE_SIZE);
size_t written;
- if (unlikely(iter->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(iter))) {
void *addr = kmap_atomic(page);
written = copy_to_iter(addr, copy, iter);
@@ -3106,6 +3230,67 @@ cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
return uncached_fill_pages(server, rdata, iter, iter->count);
}
+static int cifs_resend_rdata(struct cifs_readdata *rdata,
+ struct list_head *rdata_list,
+ struct cifs_aio_ctx *ctx)
+{
+ int wait_retry = 0;
+ unsigned int rsize, credits;
+ int rc;
+ struct TCP_Server_Info *server =
+ tlink_tcon(rdata->cfile->tlink)->ses->server;
+
+ /*
+ * Try to resend this rdata, waiting for credits up to 3 seconds.
+ * Note: we are attempting to resend the whole rdata not in segments
+ */
+ do {
+ rc = server->ops->wait_mtu_credits(server, rdata->bytes,
+ &rsize, &credits);
+
+ if (rc)
+ break;
+
+ if (rsize < rdata->bytes) {
+ add_credits_and_wake_if(server, credits, 0);
+ msleep(1000);
+ wait_retry++;
+ }
+ } while (rsize < rdata->bytes && wait_retry < 3);
+
+ /*
+ * If we can't find enough credits to send this rdata
+ * release the rdata and return failure, this will pass
+ * whatever I/O amount we have finished to VFS.
+ */
+ if (rsize < rdata->bytes) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ rc = -EAGAIN;
+ while (rc == -EAGAIN) {
+ rc = 0;
+ if (rdata->cfile->invalidHandle)
+ rc = cifs_reopen_file(rdata->cfile, true);
+ if (!rc)
+ rc = server->ops->async_readv(rdata);
+ }
+
+ if (!rc) {
+ /* Add to aio pending list */
+ list_add_tail(&rdata->list, rdata_list);
+ return 0;
+ }
+
+ add_credits_and_wake_if(server, rdata->credits, 0);
+out:
+ kref_put(&rdata->refcount,
+ cifs_uncached_readdata_release);
+
+ return rc;
+}
+
static int
cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
@@ -3117,6 +3302,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
int rc;
pid_t pid;
struct TCP_Server_Info *server;
+ struct page **pagevec;
+ size_t start;
+ struct iov_iter direct_iov = ctx->iter;
server = tlink_tcon(open_file->tlink)->ses->server;
@@ -3125,6 +3313,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
else
pid = current->tgid;
+ if (ctx->direct_io)
+ iov_iter_advance(&direct_iov, offset - ctx->pos);
+
do {
rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
&rsize, &credits);
@@ -3132,20 +3323,59 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
break;
cur_len = min_t(const size_t, len, rsize);
- npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
- /* allocate a readdata struct */
- rdata = cifs_readdata_alloc(npages,
+ if (ctx->direct_io) {
+ ssize_t result;
+
+ result = iov_iter_get_pages_alloc(
+ &direct_iov, &pagevec,
+ cur_len, &start);
+ if (result < 0) {
+ cifs_dbg(VFS,
+ "couldn't get user pages (cur_len=%zd)"
+ " iter type %d"
+ " iov_offset %zd count %zd\n",
+ result, direct_iov.type,
+ direct_iov.iov_offset,
+ direct_iov.count);
+ dump_stack();
+ break;
+ }
+ cur_len = (size_t)result;
+ iov_iter_advance(&direct_iov, cur_len);
+
+ rdata = cifs_readdata_direct_alloc(
+ pagevec, cifs_uncached_readv_complete);
+ if (!rdata) {
+ add_credits_and_wake_if(server, credits, 0);
+ rc = -ENOMEM;
+ break;
+ }
+
+ npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
+ rdata->page_offset = start;
+ rdata->tailsz = npages > 1 ?
+ cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
+ cur_len;
+
+ } else {
+
+ npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
+ /* allocate a readdata struct */
+ rdata = cifs_readdata_alloc(npages,
cifs_uncached_readv_complete);
- if (!rdata) {
- add_credits_and_wake_if(server, credits, 0);
- rc = -ENOMEM;
- break;
- }
+ if (!rdata) {
+ add_credits_and_wake_if(server, credits, 0);
+ rc = -ENOMEM;
+ break;
+ }
- rc = cifs_read_allocate_pages(rdata, npages);
- if (rc)
- goto error;
+ rc = cifs_read_allocate_pages(rdata, npages);
+ if (rc)
+ goto error;
+
+ rdata->tailsz = PAGE_SIZE;
+ }
rdata->cfile = cifsFileInfo_get(open_file);
rdata->nr_pages = npages;
@@ -3153,7 +3383,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
rdata->bytes = cur_len;
rdata->pid = pid;
rdata->pagesz = PAGE_SIZE;
- rdata->tailsz = PAGE_SIZE;
rdata->read_into_pages = cifs_uncached_read_into_pages;
rdata->copy_into_pages = cifs_uncached_copy_into_pages;
rdata->credits = credits;
@@ -3167,9 +3396,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
if (rc) {
add_credits_and_wake_if(server, rdata->credits, 0);
kref_put(&rdata->refcount,
- cifs_uncached_readdata_release);
- if (rc == -EAGAIN)
+ cifs_uncached_readdata_release);
+ if (rc == -EAGAIN) {
+ iov_iter_revert(&direct_iov, cur_len);
continue;
+ }
break;
}
@@ -3225,45 +3456,62 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
* reading.
*/
if (got_bytes && got_bytes < rdata->bytes) {
- rc = cifs_readdata_to_iov(rdata, to);
+ rc = 0;
+ if (!ctx->direct_io)
+ rc = cifs_readdata_to_iov(rdata, to);
if (rc) {
kref_put(&rdata->refcount,
- cifs_uncached_readdata_release);
+ cifs_uncached_readdata_release);
continue;
}
}
- rc = cifs_send_async_read(
+ if (ctx->direct_io) {
+ /*
+ * Re-use rdata as this is a
+ * direct I/O
+ */
+ rc = cifs_resend_rdata(
+ rdata,
+ &tmp_list, ctx);
+ } else {
+ rc = cifs_send_async_read(
rdata->offset + got_bytes,
rdata->bytes - got_bytes,
rdata->cfile, cifs_sb,
&tmp_list, ctx);
+ kref_put(&rdata->refcount,
+ cifs_uncached_readdata_release);
+ }
+
list_splice(&tmp_list, &ctx->list);
- kref_put(&rdata->refcount,
- cifs_uncached_readdata_release);
goto again;
} else if (rdata->result)
rc = rdata->result;
- else
+ else if (!ctx->direct_io)
rc = cifs_readdata_to_iov(rdata, to);
/* if there was a short read -- discard anything left */
if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
rc = -ENODATA;
+
+ ctx->total_len += rdata->got_bytes;
}
list_del_init(&rdata->list);
kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}
- for (i = 0; i < ctx->npages; i++) {
- if (ctx->should_dirty)
- set_page_dirty(ctx->bv[i].bv_page);
- put_page(ctx->bv[i].bv_page);
- }
+ if (!ctx->direct_io) {
+ for (i = 0; i < ctx->npages; i++) {
+ if (ctx->should_dirty)
+ set_page_dirty(ctx->bv[i].bv_page);
+ put_page(ctx->bv[i].bv_page);
+ }
- ctx->total_len = ctx->len - iov_iter_count(to);
+ ctx->total_len = ctx->len - iov_iter_count(to);
+ }
cifs_stats_bytes_read(tcon, ctx->total_len);
@@ -3281,18 +3529,28 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
complete(&ctx->done);
}
-ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+static ssize_t __cifs_readv(
+ struct kiocb *iocb, struct iov_iter *to, bool direct)
{
- struct file *file = iocb->ki_filp;
- ssize_t rc;
size_t len;
- ssize_t total_read = 0;
- loff_t offset = iocb->ki_pos;
+ struct file *file = iocb->ki_filp;
struct cifs_sb_info *cifs_sb;
- struct cifs_tcon *tcon;
struct cifsFileInfo *cfile;
+ struct cifs_tcon *tcon;
+ ssize_t rc, total_read = 0;
+ loff_t offset = iocb->ki_pos;
struct cifs_aio_ctx *ctx;
+ /*
+ * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
+ * fall back to data copy read path
+ * this could be improved by getting pages directly in ITER_KVEC
+ */
+ if (direct && to->type & ITER_KVEC) {
+ cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
+ direct = false;
+ }
+
len = iov_iter_count(to);
if (!len)
return 0;
@@ -3316,17 +3574,23 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
if (!is_sync_kiocb(iocb))
ctx->iocb = iocb;
- if (to->type == ITER_IOVEC)
+ if (iter_is_iovec(to))
ctx->should_dirty = true;
- rc = setup_aio_ctx_iter(ctx, to, READ);
- if (rc) {
- kref_put(&ctx->refcount, cifs_aio_ctx_release);
- return rc;
+ if (direct) {
+ ctx->pos = offset;
+ ctx->direct_io = true;
+ ctx->iter = *to;
+ ctx->len = len;
+ } else {
+ rc = setup_aio_ctx_iter(ctx, to, READ);
+ if (rc) {
+ kref_put(&ctx->refcount, cifs_aio_ctx_release);
+ return rc;
+ }
+ len = ctx->len;
}
- len = ctx->len;
-
/* grab a lock here due to read response handlers can access ctx */
mutex_lock(&ctx->aio_mutex);
@@ -3368,6 +3632,16 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
return rc;
}
+ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+ return __cifs_readv(iocb, to, true);
+}
+
+ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+ return __cifs_readv(iocb, to, false);
+}
+
ssize_t
cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
{
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1023d78..a81a9df 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1320,8 +1320,8 @@ cifs_drop_nlink(struct inode *inode)
/*
* If d_inode(dentry) is null (usually meaning the cached dentry
* is a negative dentry) then we would attempt a standard SMB delete, but
- * if that fails we can not attempt the fall back mechanisms on EACCESS
- * but will return the EACCESS to the caller. Note that the VFS does not call
+ * if that fails we can not attempt the fall back mechanisms on EACCES
+ * but will return the EACCES to the caller. Note that the VFS does not call
* unlink on negative dentries currently.
*/
int cifs_unlink(struct inode *dir, struct dentry *dentry)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index fc43d5d..8a41f4e 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -788,7 +788,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
struct page **pages = NULL;
struct bio_vec *bv = NULL;
- if (iter->type & ITER_KVEC) {
+ if (iov_iter_is_kvec(iter)) {
memcpy(&ctx->iter, iter, sizeof(struct iov_iter));
ctx->len = count;
iov_iter_advance(iter, count);
@@ -859,7 +859,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
ctx->bv = bv;
ctx->len = saved_len - count;
ctx->npages = npages;
- iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len);
+ iov_iter_bvec(&ctx->iter, rw, ctx->bv, npages, ctx->len);
return 0;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f85fc5a..225fec1 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -747,6 +747,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
int rc = 0;
unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0;
char *name, *value;
+ size_t buf_size = dst_size;
size_t name_len, value_len, user_name_len;
while (src_size > 0) {
@@ -782,9 +783,10 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
/* 'user.' plus a terminating null */
user_name_len = 5 + 1 + name_len;
- rc += user_name_len;
-
- if (dst_size >= user_name_len) {
+ if (buf_size == 0) {
+ /* skip copy - calc size only */
+ rc += user_name_len;
+ } else if (dst_size >= user_name_len) {
dst_size -= user_name_len;
memcpy(dst, "user.", 5);
dst += 5;
@@ -792,8 +794,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
dst += name_len;
*dst = 0;
++dst;
- } else if (dst_size == 0) {
- /* skip copy - calc size only */
+ rc += user_name_len;
} else {
/* stop before overrun buffer */
rc = -ERANGE;
@@ -1078,6 +1079,9 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
cfile->fid.persistent_fid = fid->persistent_fid;
cfile->fid.volatile_fid = fid->volatile_fid;
+#ifdef CONFIG_CIFS_DEBUG2
+ cfile->fid.mid = fid->mid;
+#endif /* CIFS_DEBUG2 */
server->ops->set_oplock_level(cinode, oplock, fid->epoch,
&fid->purge_cache);
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
@@ -3152,13 +3156,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
return 0;
}
- iov_iter_bvec(&iter, WRITE | ITER_BVEC, bvec, npages, data_len);
+ iov_iter_bvec(&iter, WRITE, bvec, npages, data_len);
} else if (buf_len >= data_offset + data_len) {
/* read response payload is in buf */
WARN_ONCE(npages > 0, "read data can be either in buf or in pages");
iov.iov_base = buf + data_offset;
iov.iov_len = data_len;
- iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, data_len);
+ iov_iter_kvec(&iter, WRITE, &iov, 1, data_len);
} else {
/* read response payload cannot be in both buf and pages */
WARN_ONCE(1, "buf can not contain only a part of read data");
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 7d7b016..27f8653 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1512,7 +1512,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
cifs_small_buf_release(req);
rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
-
+ trace_smb3_tcon(xid, tcon->tid, ses->Suid, tree, rc);
if (rc != 0) {
if (tcon) {
cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE);
@@ -1559,6 +1559,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
if (tcon->ses->server->ops->validate_negotiate)
rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
tcon_exit:
+
free_rsp_buf(resp_buftype, rsp);
kfree(unc_path);
return rc;
@@ -2308,6 +2309,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
atomic_inc(&tcon->num_remote_opens);
oparms->fid->persistent_fid = rsp->PersistentFileId;
oparms->fid->volatile_fid = rsp->VolatileFileId;
+#ifdef CONFIG_CIFS_DEBUG2
+ oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId);
+#endif /* CIFS_DEBUG2 */
if (buf) {
memcpy(buf, &rsp->CreationTime, 32);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f753f42..5671d5e 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -842,6 +842,41 @@ struct fsctl_get_integrity_information_rsp {
/* Integrity flags for above */
#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001
+/* Reparse structures - see MS-FSCC 2.1.2 */
+
+/* struct fsctl_reparse_info_req is empty, only response structs (see below) */
+
+struct reparse_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __u8 DataBuffer[0]; /* Variable Length */
+} __packed;
+
+struct reparse_guid_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __u8 ReparseGuid[16];
+ __u8 DataBuffer[0]; /* Variable Length */
+} __packed;
+
+struct reparse_mount_point_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __le16 SubstituteNameOffset;
+ __le16 SubstituteNameLength;
+ __le16 PrintNameOffset;
+ __le16 PrintNameLength;
+ __u8 PathBuffer[0]; /* Variable Length */
+} __packed;
+
+/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */
+
+/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
+
+
/* See MS-DFSC 2.2.2 */
struct fsctl_get_dfs_referral_req {
__le16 MaxReferralLevel;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5e28236..e94a8d1 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2054,14 +2054,22 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
info->smbd_recv_pending++;
- switch (msg->msg_iter.type) {
- case READ | ITER_KVEC:
+ if (iov_iter_rw(&msg->msg_iter) == WRITE) {
+ /* It's a bug in upper layer to get there */
+ cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n",
+ iov_iter_rw(&msg->msg_iter));
+ rc = -EINVAL;
+ goto out;
+ }
+
+ switch (iov_iter_type(&msg->msg_iter)) {
+ case ITER_KVEC:
buf = msg->msg_iter.kvec->iov_base;
to_read = msg->msg_iter.kvec->iov_len;
rc = smbd_recv_buf(info, buf, to_read);
break;
- case READ | ITER_BVEC:
+ case ITER_BVEC:
page = msg->msg_iter.bvec->bv_page;
page_offset = msg->msg_iter.bvec->bv_offset;
to_read = msg->msg_iter.bvec->bv_len;
@@ -2071,10 +2079,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
default:
/* It's a bug in upper layer to get there */
cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
- msg->msg_iter.type);
+ iov_iter_type(&msg->msg_iter));
rc = -EINVAL;
}
+out:
info->smbd_recv_pending--;
wake_up(&info->wait_smbd_recv_pending);
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index cce8414..fb04980 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -374,6 +374,48 @@ DEFINE_SMB3_ENTER_EXIT_EVENT(enter);
DEFINE_SMB3_ENTER_EXIT_EVENT(exit_done);
/*
+ * For SMB2/SMB3 tree connect
+ */
+
+DECLARE_EVENT_CLASS(smb3_tcon_class,
+ TP_PROTO(unsigned int xid,
+ __u32 tid,
+ __u64 sesid,
+ const char *unc_name,
+ int rc),
+ TP_ARGS(xid, tid, sesid, unc_name, rc),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(const char *, unc_name)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->unc_name = unc_name;
+ __entry->rc = rc;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d",
+ __entry->xid, __entry->sesid, __entry->tid,
+ __entry->unc_name, __entry->rc)
+)
+
+#define DEFINE_SMB3_TCON_EVENT(name) \
+DEFINE_EVENT(smb3_tcon_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u32 tid, \
+ __u64 sesid, \
+ const char *unc_name, \
+ int rc), \
+ TP_ARGS(xid, tid, sesid, unc_name, rc))
+
+DEFINE_SMB3_TCON_EVENT(tcon);
+
+
+/*
* For smb2/smb3 open call
*/
DECLARE_EVENT_CLASS(smb3_open_err_class,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index f811243..83ff0c2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -316,8 +316,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
.iov_base = &rfc1002_marker,
.iov_len = 4
};
- iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov,
- 1, 4);
+ iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4);
rc = smb_send_kvec(server, &smb_msg, &sent);
if (rc < 0)
goto uncork;
@@ -338,8 +337,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
size += iov[i].iov_len;
}
- iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC,
- iov, n_vec, size);
+ iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size);
rc = smb_send_kvec(server, &smb_msg, &sent);
if (rc < 0)
@@ -355,7 +353,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
rqst_page_get_length(&rqst[j], i, &bvec.bv_len,
&bvec.bv_offset);
- iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC,
+ iov_iter_bvec(&smb_msg.msg_iter, WRITE,
&bvec, 1, bvec.bv_len);
rc = smb_send_kvec(server, &smb_msg, &sent);
if (rc < 0)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 093fb54..722d17c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1313,7 +1313,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
spin_lock_init(&dio->bio_lock);
dio->refcount = 1;
- dio->should_dirty = (iter->type == ITER_IOVEC);
+ dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
sdio.iter = iter;
sdio.final_block_in_request = end >> blkbits;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index a5e4a22..76976d6 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -674,7 +674,7 @@ static int receive_from_sock(struct connection *con)
nvec = 2;
}
len = iov[0].iov_len + iov[1].iov_len;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len);
+ iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len);
r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL);
if (ret <= 0)
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 41cf2fb..906839a 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -101,6 +101,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
token = match_token(p, tokens, args);
switch (token) {
case Opt_name:
+ kfree(opts->dev_name);
opts->dev_name = match_strdup(&args[0]);
if (unlikely(!opts->dev_name)) {
EXOFS_ERR("Error allocating dev_name");
@@ -117,7 +118,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
EXOFS_MIN_PID);
return -EINVAL;
}
- s_pid = 1;
+ s_pid = true;
break;
case Opt_to:
if (match_int(&args[0], &option))
@@ -866,8 +867,10 @@ static struct dentry *exofs_mount(struct file_system_type *type,
int ret;
ret = parse_options(data, &opts);
- if (ret)
+ if (ret) {
+ kfree(opts.dev_name);
return ERR_PTR(ret);
+ }
if (!opts.dev_name)
opts.dev_name = dev_name;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 12f90d4..3f89d0a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -45,15 +45,6 @@
#include <linux/compiler.h>
-/* Until this gets included into linux/compiler-gcc.h */
-#ifndef __nonstring
-#if defined(GCC_VERSION) && (GCC_VERSION >= 80000)
-#define __nonstring __attribute__((nonstring))
-#else
-#define __nonstring
-#endif
-#endif
-
/*
* The fourth extended filesystem constants/structures
*/
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2addcb8..014f6a6 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1216,7 +1216,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (IS_ERR(bitmap_bh))
- return (struct inode *) bitmap_bh;
+ return ERR_CAST(bitmap_bh);
/* Having the inode bit set should be a 100% indicator that this
* is a valid orphan (no e2fsck run on fs). Orphans also include
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 67a3853..17adcb1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1556,7 +1556,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
if (IS_ERR(bh))
- return (struct dentry *) bh;
+ return ERR_CAST(bh);
inode = NULL;
if (bh) {
__u32 ino = le32_to_cpu(de->inode);
@@ -1600,7 +1600,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
if (IS_ERR(bh))
- return (struct dentry *) bh;
+ return ERR_CAST(bh);
if (!bh)
return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2aa62d5..db75901 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
+ wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
io->io_next_block = bh->b_blocknr;
- wbc_init_bio(io->io_wbc, bio);
return 0;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 58dbc39..cc2121b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1275,7 +1275,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
ssize_t ret = 0;
/* Special case for kernel I/O: can copy directly into the buffer */
- if (ii->type & ITER_KVEC) {
+ if (iov_iter_is_kvec(ii)) {
unsigned long user_addr = fuse_get_user_addr(ii);
size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 2005529..d64f622 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
u64 off, u64 olen, u64 destoff)
{
struct fd src_file = fdget(srcfd);
+ loff_t cloned;
int ret;
if (!src_file.file)
@@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
ret = -EXDEV;
if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
goto fdput;
- ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+ cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
+ olen, 0);
+ if (cloned < 0)
+ ret = cloned;
+ else if (olen && cloned != olen)
+ ret = -EINVAL;
+ else
+ ret = 0;
fdput:
fdput(src_file);
return ret;
@@ -669,6 +677,9 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
return ioctl_fiemap(filp, arg);
case FIGETBSZ:
+ /* anon_bdev filesystems may not have a block size */
+ if (!inode->i_sb->s_blocksize)
+ return -EINVAL;
return put_user(inode->i_sb->s_blocksize, argp);
case FICLONE:
diff --git a/fs/iomap.c b/fs/iomap.c
index 90c2feb..64ce240 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -30,7 +30,6 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/dax.h>
#include <linux/sched/signal.h>
-#include <linux/swap.h>
#include "internal.h"
@@ -1795,7 +1794,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (pos >= dio->i_size)
goto out_free_dio;
- if (iter->type == ITER_IOVEC)
+ if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ)
dio->flags |= IOMAP_DIO_DIRTY;
} else {
flags |= IOMAP_WRITE;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4288a6e..46d691b 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
return nfs42_proc_allocate(filep, offset, len);
}
-static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
- struct file *dst_file, loff_t dst_off, u64 count)
+static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
+ struct file *dst_file, loff_t dst_off, loff_t count,
+ unsigned int remap_flags)
{
struct inode *dst_inode = file_inode(dst_file);
struct nfs_server *server = NFS_SERVER(dst_inode);
@@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
bool same_inode = false;
int ret;
+ if (remap_flags & ~REMAP_FILE_ADVISORY)
+ return -EINVAL;
+
/* check alignment w.r.t. clone_blksize */
ret = -EINVAL;
if (bs) {
@@ -240,7 +244,7 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
inode_unlock(src_inode);
}
out:
- return ret;
+ return ret < 0 ? ret : count;
}
#endif /* CONFIG_NFS_V4_2 */
@@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = {
.copy_file_range = nfs4_copy_file_range,
.llseek = nfs4_file_llseek,
.fallocate = nfs42_fallocate,
- .clone_file_range = nfs42_clone_file_range,
+ .remap_file_range = nfs42_remap_file_range,
#else
.llseek = nfs_file_llseek,
#endif
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2751976..eb67098 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
- return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
- count));
+ loff_t cloned;
+
+ cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
+ if (count && cloned != count)
+ cloned = -EINVAL;
+ return nfserrno(cloned < 0 ? cloned : 0);
}
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
@@ -923,7 +927,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
int host_err;
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
+ iov_iter_kvec(&iter, READ, vec, vlen, *count);
host_err = vfs_iter_read(file, &iter, &offset, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
}
@@ -999,7 +1003,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (stable && !use_wgather)
flags |= RWF_SYNC;
- iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
host_err = vfs_iter_write(file, &iter, &pos, flags);
if (host_err < 0)
goto out_nfserr;
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 4690cd7..3986c7a 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -312,7 +312,7 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent)
/* Get the mft record of the inode belonging to the child dentry. */
mrec = map_mft_record(ni);
if (IS_ERR(mrec))
- return (struct dentry *)mrec;
+ return ERR_CAST(mrec);
/* Find the first file name attribute in the mft record. */
ctx = ntfs_attr_get_search_ctx(ni, mrec);
if (unlikely(!ctx)) {
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 1d098c3..4ebbd57 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -99,25 +99,34 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
return ret;
}
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[])
{
int status = 0;
unsigned int i;
struct buffer_head *bh;
+ int new_bh = 0;
trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
if (!nr)
goto bail;
+ /* Don't put buffer head and re-assign it to NULL if it is allocated
+ * outside since the caller can't be aware of this alternation!
+ */
+ new_bh = (bhs[0] == NULL);
+
for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(osb->sb, block++);
if (bhs[i] == NULL) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ break;
}
}
bh = bhs[i];
@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
submit_bh(REQ_OP_READ, 0, bh);
}
+read_failure:
for (i = nr; i > 0; i--) {
bh = bhs[i - 1];
+ if (unlikely(status)) {
+ if (new_bh && bh) {
+ /* If middle bh fails, let previous bh
+ * finish its read and then put it to
+ * aovoid bh leak
+ */
+ if (!buffer_jbd(bh))
+ wait_on_buffer(bh);
+ put_bh(bh);
+ bhs[i - 1] = NULL;
+ } else if (bh && buffer_uptodate(bh)) {
+ clear_buffer_uptodate(bh);
+ }
+ continue;
+ }
+
/* No need to wait on the buffer if it's managed by JBD. */
if (!buffer_jbd(bh))
wait_on_buffer(bh);
@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
* so we can safely record this and loop back
* to cleanup the other buffers. */
status = -EIO;
- put_bh(bh);
- bhs[i - 1] = NULL;
+ goto read_failure;
}
}
@@ -179,6 +204,9 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
return status;
}
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
int i, ignore_cache = 0;
struct buffer_head *bh;
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+ int new_bh = 0;
trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
goto bail;
}
+ /* Don't put buffer head and re-assign it to NULL if it is allocated
+ * outside since the caller can't be aware of this alternation!
+ */
+ new_bh = (bhs[0] == NULL);
+
ocfs2_metadata_cache_io_lock(ci);
for (i = 0 ; i < nr ; i++) {
if (bhs[i] == NULL) {
@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ /* Don't forget to put previous bh! */
+ break;
}
}
bh = bhs[i];
@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
}
}
- status = 0;
-
+read_failure:
for (i = (nr - 1); i >= 0; i--) {
bh = bhs[i];
if (!(flags & OCFS2_BH_READAHEAD)) {
- if (status) {
- /* Clear the rest of the buffers on error */
- put_bh(bh);
- bhs[i] = NULL;
+ if (unlikely(status)) {
+ /* Clear the buffers on error including those
+ * ever succeeded in reading
+ */
+ if (new_bh && bh) {
+ /* If middle bh fails, let previous bh
+ * finish its read and then put it to
+ * aovoid bh leak
+ */
+ if (!buffer_jbd(bh))
+ wait_on_buffer(bh);
+ put_bh(bh);
+ bhs[i] = NULL;
+ } else if (bh && buffer_uptodate(bh)) {
+ clear_buffer_uptodate(bh);
+ }
continue;
}
/* We know this can't have changed as we hold the
@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
* uptodate. */
status = -EIO;
clear_buffer_needs_validate(bh);
- put_bh(bh);
- bhs[i] = NULL;
- continue;
+ goto read_failure;
}
if (buffer_needs_validate(bh)) {
@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
BUG_ON(buffer_jbd(bh));
clear_buffer_needs_validate(bh);
status = validate(sb, bh);
- if (status) {
- put_bh(bh);
- bhs[i] = NULL;
- continue;
- }
+ if (status)
+ goto read_failure;
}
}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 7d9eea7..e9f236a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -916,7 +916,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
{
struct kvec vec = { .iov_len = len, .iov_base = data, };
struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, len);
+ iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len);
return sock_recvmsg(sock, &msg, MSG_DONTWAIT);
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b048d4f..c121abb 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
/* On error, skip the f_pos to the
next block. */
ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
- brelse(bh);
- continue;
+ break;
}
if (le64_to_cpu(de->inode)) {
unsigned char d_type = DT_UNKNOWN;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 933aac5..7c83582 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
/* LVB only has room for 64 bits of time here so we pack it for
* now. */
-static u64 ocfs2_pack_timespec(struct timespec *spec)
+static u64 ocfs2_pack_timespec(struct timespec64 *spec)
{
u64 res;
- u64 sec = spec->tv_sec;
+ u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
u32 nsec = spec->tv_nsec;
res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb;
- struct timespec ts;
lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
lvb->lvb_imode = cpu_to_be16(inode->i_mode);
lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
- ts = timespec64_to_timespec(inode->i_atime);
lvb->lvb_iatime_packed =
- cpu_to_be64(ocfs2_pack_timespec(&ts));
- ts = timespec64_to_timespec(inode->i_ctime);
+ cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
lvb->lvb_ictime_packed =
- cpu_to_be64(ocfs2_pack_timespec(&ts));
- ts = timespec64_to_timespec(inode->i_mtime);
+ cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
lvb->lvb_imtime_packed =
- cpu_to_be64(ocfs2_pack_timespec(&ts));
+ cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
@@ -2180,7 +2176,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
mlog_meta_lvb(0, lockres);
}
-static void ocfs2_unpack_timespec(struct timespec *spec,
+static void ocfs2_unpack_timespec(struct timespec64 *spec,
u64 packed_time)
{
spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec,
static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
{
- struct timespec ts;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb;
@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
inode->i_mode = be16_to_cpu(lvb->lvb_imode);
set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
- ocfs2_unpack_timespec(&ts,
+ ocfs2_unpack_timespec(&inode->i_atime,
be64_to_cpu(lvb->lvb_iatime_packed));
- inode->i_atime = timespec_to_timespec64(ts);
- ocfs2_unpack_timespec(&ts,
+ ocfs2_unpack_timespec(&inode->i_mtime,
be64_to_cpu(lvb->lvb_imtime_packed));
- inode->i_mtime = timespec_to_timespec64(ts);
- ocfs2_unpack_timespec(&ts,
+ ocfs2_unpack_timespec(&inode->i_ctime,
be64_to_cpu(lvb->lvb_ictime_packed));
- inode->i_ctime = timespec_to_timespec64(ts);
spin_unlock(&oi->ip_lock);
}
@@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
* we can recover correctly from node failure. Otherwise, we may get
* invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
*/
- if (!ocfs2_is_o2cb_active() &&
+ if (ocfs2_userspace_stack(osb) &&
lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
lvb = 1;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9fa35cb..d640c5f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
written = __generic_file_write_iter(iocb, from);
/* buffered aio wouldn't have proper lock coverage today */
- BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+ BUG_ON(written == -EIOCBQUEUED && !direct_io);
/*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
trace_generic_file_read_iter_ret(ret);
/* buffered aio wouldn't have proper lock coverage today */
- BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+ BUG_ON(ret == -EIOCBQUEUED && !direct_io);
/* see ocfs2_file_write_iter */
if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
@@ -2527,24 +2527,79 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
return offset;
}
-static int ocfs2_file_clone_range(struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len)
+static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
{
- return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, false);
-}
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
+ struct buffer_head *in_bh = NULL, *out_bh = NULL;
+ bool same_inode = (inode_in == inode_out);
+ loff_t remapped = 0;
+ ssize_t ret;
-static int ocfs2_file_dedupe_range(struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len)
-{
- return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, true);
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+ return -EINVAL;
+ if (!ocfs2_refcount_tree(osb))
+ return -EOPNOTSUPP;
+ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+ return -EROFS;
+
+ /* Lock both files against IO */
+ ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
+ if (ret)
+ return ret;
+
+ /* Check file eligibility and prepare for block sharing. */
+ ret = -EINVAL;
+ if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
+ (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
+ goto out_unlock;
+
+ ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
+ &len, remap_flags);
+ if (ret < 0 || len == 0)
+ goto out_unlock;
+
+ /* Lock out changes to the allocation maps and remap. */
+ down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
+ SINGLE_DEPTH_NESTING);
+
+ /* Zap any page cache for the destination file's range. */
+ truncate_inode_pages_range(&inode_out->i_data,
+ round_down(pos_out, PAGE_SIZE),
+ round_up(pos_out + len, PAGE_SIZE) - 1);
+
+ remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
+ inode_out, out_bh, pos_out, len);
+ up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
+ if (remapped < 0) {
+ ret = remapped;
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ /*
+ * Empty the extent map so that we may get the right extent
+ * record from the disk.
+ */
+ ocfs2_extent_map_trunc(inode_in, 0);
+ ocfs2_extent_map_trunc(inode_out, 0);
+
+ ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+out_unlock:
+ ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+ return remapped > 0 ? remapped : ret;
}
const struct inode_operations ocfs2_file_iops = {
@@ -2586,8 +2641,7 @@ const struct file_operations ocfs2_fops = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
- .clone_file_range = ocfs2_file_clone_range,
- .dedupe_file_range = ocfs2_file_dedupe_range,
+ .remap_file_range = ocfs2_remap_file_range,
};
const struct file_operations ocfs2_dops = {
@@ -2633,8 +2687,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
- .clone_file_range = ocfs2_file_clone_range,
- .dedupe_file_range = ocfs2_file_dedupe_range,
+ .remap_file_range = ocfs2_remap_file_range,
};
const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bd34756..b63c97f4 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg)
int rm_quota_used = 0, i;
struct ocfs2_quota_recovery *qrec;
+ /* Whether the quota supported. */
+ int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
+ || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
+
status = ocfs2_wait_on_mount(osb);
if (status < 0) {
goto bail;
}
- rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
- if (!rm_quota) {
- status = -ENOMEM;
- goto bail;
+ if (quota_enabled) {
+ rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
+ if (!rm_quota) {
+ status = -ENOMEM;
+ goto bail;
+ }
}
restart:
status = ocfs2_super_lock(osb, 1);
@@ -1422,9 +1430,14 @@ static int __ocfs2_recovery_thread(void *arg)
* then quota usage would be out of sync until some node takes
* the slot. So we remember which nodes need quota recovery
* and when everything else is done, we recover quotas. */
- for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
- if (i == rm_quota_used)
- rm_quota[rm_quota_used++] = slot_num;
+ if (quota_enabled) {
+ for (i = 0; i < rm_quota_used
+ && rm_quota[i] != slot_num; i++)
+ ;
+
+ if (i == rm_quota_used)
+ rm_quota[rm_quota_used++] = slot_num;
+ }
status = ocfs2_recover_node(osb, node_num, slot_num);
skip_recovery:
@@ -1452,16 +1465,19 @@ static int __ocfs2_recovery_thread(void *arg)
/* Now it is right time to recover quotas... We have to do this under
* superblock lock so that no one can start using the slot (and crash)
* before we recover it */
- for (i = 0; i < rm_quota_used; i++) {
- qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
- if (IS_ERR(qrec)) {
- status = PTR_ERR(qrec);
- mlog_errno(status);
- continue;
+ if (quota_enabled) {
+ for (i = 0; i < rm_quota_used; i++) {
+ qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
+ if (IS_ERR(qrec)) {
+ status = PTR_ERR(qrec);
+ mlog_errno(status);
+ continue;
+ }
+ ocfs2_queue_recovery_completion(osb->journal,
+ rm_quota[i],
+ NULL, NULL, qrec,
+ ORPHAN_NEED_TRUNCATE);
}
- ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
- NULL, NULL, qrec,
- ORPHAN_NEED_TRUNCATE);
}
ocfs2_super_unlock(osb, 1);
@@ -1483,7 +1499,8 @@ static int __ocfs2_recovery_thread(void *arg)
mutex_unlock(&osb->recovery_lock);
- kfree(rm_quota);
+ if (quota_enabled)
+ kfree(rm_quota);
/* no one is callint kthread_stop() for us so the kthread() api
* requires that we call do_exit(). And it isn't exported, but
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 7eb3b0a..3f1685d 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -25,6 +25,7 @@
#include "ocfs2_ioctl.h"
#include "alloc.h"
+#include "localalloc.h"
#include "aops.h"
#include "dlmglue.h"
#include "extent_map.h"
@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
struct ocfs2_refcount_tree *ref_tree = NULL;
u32 new_phys_cpos, new_len;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+ int need_free = 0;
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
BUG_ON(!ocfs2_is_refcount_inode(inode));
@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
if (!partial) {
context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
ret = -ENOSPC;
+ need_free = 1;
goto out_commit;
}
}
@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
mlog_errno(ret);
out_commit:
+ if (need_free && context->data_ac) {
+ struct ocfs2_alloc_context *data_ac = context->data_ac;
+
+ if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ new_phys_cpos, new_len);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
+ new_len);
+ }
+
ocfs2_commit_trans(osb, handle);
out_unlock_mutex:
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 1114ef0..a35259e 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4466,9 +4466,9 @@ int ocfs2_reflink_ioctl(struct inode *inode,
}
/* Update destination inode size, if necessary. */
-static int ocfs2_reflink_update_dest(struct inode *dest,
- struct buffer_head *d_bh,
- loff_t newlen)
+int ocfs2_reflink_update_dest(struct inode *dest,
+ struct buffer_head *d_bh,
+ loff_t newlen)
{
handle_t *handle;
int ret;
@@ -4505,14 +4505,14 @@ static int ocfs2_reflink_update_dest(struct inode *dest,
}
/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
-static int ocfs2_reflink_remap_extent(struct inode *s_inode,
- struct buffer_head *s_bh,
- loff_t pos_in,
- struct inode *t_inode,
- struct buffer_head *t_bh,
- loff_t pos_out,
- loff_t len,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
+static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
{
struct ocfs2_extent_tree s_et;
struct ocfs2_extent_tree t_et;
@@ -4520,8 +4520,9 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *ref_tree;
struct ocfs2_super *osb;
+ loff_t remapped_bytes = 0;
loff_t pstart, plen;
- u32 p_cluster, num_clusters, slast, spos, tpos;
+ u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0;
unsigned int ext_flags;
int ret = 0;
@@ -4603,30 +4604,34 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
next_loop:
spos += num_clusters;
tpos += num_clusters;
+ remapped_clus += num_clusters;
}
-out:
- return ret;
+ goto out;
out_unlock_refcount:
ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
brelse(ref_root_bh);
- return ret;
+out:
+ remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus);
+ remapped_bytes = min_t(loff_t, len, remapped_bytes);
+
+ return remapped_bytes > 0 ? remapped_bytes : ret;
}
/* Set up refcount tree and remap s_inode to t_inode. */
-static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
- struct buffer_head *s_bh,
- loff_t pos_in,
- struct inode *t_inode,
- struct buffer_head *t_bh,
- loff_t pos_out,
- loff_t len)
+loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len)
{
struct ocfs2_cached_dealloc_ctxt dealloc;
struct ocfs2_super *osb;
struct ocfs2_dinode *dis;
struct ocfs2_dinode *dit;
- int ret;
+ loff_t ret;
osb = OCFS2_SB(s_inode->i_sb);
dis = (struct ocfs2_dinode *)s_bh->b_data;
@@ -4698,7 +4703,7 @@ static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
/* Actually remap extents now. */
ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
pos_out, len, &dealloc);
- if (ret) {
+ if (ret < 0) {
mlog_errno(ret);
goto out;
}
@@ -4713,10 +4718,10 @@ static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
}
/* Lock an inode and grab a bh pointing to the inode. */
-static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
- struct buffer_head **bh1,
- struct inode *t_inode,
- struct buffer_head **bh2)
+int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+ struct buffer_head **bh1,
+ struct inode *t_inode,
+ struct buffer_head **bh2)
{
struct inode *inode1;
struct inode *inode2;
@@ -4801,10 +4806,10 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
}
/* Unlock both inodes and release buffers. */
-static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
- struct buffer_head *s_bh,
- struct inode *t_inode,
- struct buffer_head *t_bh)
+void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ struct inode *t_inode,
+ struct buffer_head *t_bh)
{
ocfs2_inode_unlock(s_inode, 1);
ocfs2_rw_unlock(s_inode, 1);
@@ -4816,82 +4821,3 @@ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
}
unlock_two_nondirectories(s_inode, t_inode);
}
-
-/* Link a range of blocks from one file to another. */
-int ocfs2_reflink_remap_range(struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len,
- bool is_dedupe)
-{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
- struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
- struct buffer_head *in_bh = NULL, *out_bh = NULL;
- bool same_inode = (inode_in == inode_out);
- ssize_t ret;
-
- if (!ocfs2_refcount_tree(osb))
- return -EOPNOTSUPP;
- if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
- return -EROFS;
-
- /* Lock both files against IO */
- ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
- if (ret)
- return ret;
-
- /* Check file eligibility and prepare for block sharing. */
- ret = -EINVAL;
- if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
- (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
- goto out_unlock;
-
- ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
- &len, is_dedupe);
- if (ret <= 0)
- goto out_unlock;
-
- /* Lock out changes to the allocation maps and remap. */
- down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
- if (!same_inode)
- down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
- SINGLE_DEPTH_NESTING);
-
- ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
- out_bh, pos_out, len);
-
- /* Zap any page cache for the destination file's range. */
- if (!ret)
- truncate_inode_pages_range(&inode_out->i_data, pos_out,
- PAGE_ALIGN(pos_out + len) - 1);
-
- up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
- if (!same_inode)
- up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
- if (ret) {
- mlog_errno(ret);
- goto out_unlock;
- }
-
- /*
- * Empty the extent map so that we may get the right extent
- * record from the disk.
- */
- ocfs2_extent_map_trunc(inode_in, 0);
- ocfs2_extent_map_trunc(inode_out, 0);
-
- ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
- if (ret) {
- mlog_errno(ret);
- goto out_unlock;
- }
-
- ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
- return 0;
-
-out_unlock:
- ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
- return ret;
-}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 4af55bf..e9e862b 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -115,11 +115,23 @@ int ocfs2_reflink_ioctl(struct inode *inode,
const char __user *oldname,
const char __user *newname,
bool preserve);
-int ocfs2_reflink_remap_range(struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len,
- bool is_dedupe);
+loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len);
+int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+ struct buffer_head **bh1,
+ struct inode *t_inode,
+ struct buffer_head **bh2);
+void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ struct inode *t_inode,
+ struct buffer_head *t_bh);
+int ocfs2_reflink_update_dest(struct inode *dest,
+ struct buffer_head *d_bh,
+ loff_t newlen);
#endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index d6c350b..c4b029c 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
*/
static struct ocfs2_stack_plugin *active_stack;
-inline int ocfs2_is_o2cb_active(void)
-{
- return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
-}
-EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
-
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
{
struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index e3036e1..f2dce10 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
-/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
-int ocfs2_is_o2cb_active(void);
-
extern struct kset *ocfs2_kset;
#endif /* STACKGLUE_H */
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 5e65d81..fe53381 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -25,7 +25,7 @@ static int read_one_page(struct page *page)
struct iov_iter to;
struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};
- iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
+ iov_iter_bvec(&to, READ, &bv, 1, PAGE_SIZE);
gossip_debug(GOSSIP_INODE_DEBUG,
"orangefs_readpage called with page %p\n",
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 1cc797a..9e62dcf 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
struct file *new_file;
loff_t old_pos = 0;
loff_t new_pos = 0;
+ loff_t cloned;
int error = 0;
if (len == 0)
@@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
}
/* Try to use clone_file_range to clone up within the same fs */
- error = do_clone_file_range(old_file, 0, new_file, 0, len);
- if (!error)
+ cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+ if (cloned == len)
goto out;
/* Couldn't clone, so now we try to copy the data */
- error = 0;
/* FIXME: copy up sparse files efficiently */
while (len) {
@@ -395,7 +395,6 @@ struct ovl_copy_up_ctx {
struct dentry *destdir;
struct qstr destname;
struct dentry *workdir;
- bool tmpfile;
bool origin;
bool indexed;
bool metacopy;
@@ -440,63 +439,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
return err;
}
-static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
- struct dentry **newdentry)
-{
- int err;
- struct dentry *upper;
- struct inode *udir = d_inode(c->destdir);
-
- upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
- if (IS_ERR(upper))
- return PTR_ERR(upper);
-
- if (c->tmpfile)
- err = ovl_do_link(temp, udir, upper);
- else
- err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
-
- if (!err)
- *newdentry = dget(c->tmpfile ? upper : temp);
- dput(upper);
-
- return err;
-}
-
-static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
-{
- int err;
- struct dentry *temp;
- const struct cred *old_creds = NULL;
- struct cred *new_creds = NULL;
- struct ovl_cattr cattr = {
- /* Can't properly set mode on creation because of the umask */
- .mode = c->stat.mode & S_IFMT,
- .rdev = c->stat.rdev,
- .link = c->link
- };
-
- err = security_inode_copy_up(c->dentry, &new_creds);
- temp = ERR_PTR(err);
- if (err < 0)
- goto out;
-
- if (new_creds)
- old_creds = override_creds(new_creds);
-
- if (c->tmpfile)
- temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
- else
- temp = ovl_create_temp(c->workdir, &cattr);
-out:
- if (new_creds) {
- revert_creds(old_creds);
- put_cred(new_creds);
- }
-
- return temp;
-}
-
static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
{
int err;
@@ -548,51 +490,148 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
return err;
}
-static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+struct ovl_cu_creds {
+ const struct cred *old;
+ struct cred *new;
+};
+
+static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
{
- struct inode *udir = c->destdir->d_inode;
- struct inode *inode;
- struct dentry *newdentry = NULL;
- struct dentry *temp;
int err;
- temp = ovl_get_tmpfile(c);
+ cc->old = cc->new = NULL;
+ err = security_inode_copy_up(dentry, &cc->new);
+ if (err < 0)
+ return err;
+
+ if (cc->new)
+ cc->old = override_creds(cc->new);
+
+ return 0;
+}
+
+static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
+{
+ if (cc->new) {
+ revert_creds(cc->old);
+ put_cred(cc->new);
+ }
+}
+
+/*
+ * Copyup using workdir to prepare temp file. Used when copying up directories,
+ * special files or when upper fs doesn't support O_TMPFILE.
+ */
+static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
+{
+ struct inode *inode;
+ struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
+ struct dentry *temp, *upper;
+ struct ovl_cu_creds cc;
+ int err;
+ struct ovl_cattr cattr = {
+ /* Can't properly set mode on creation because of the umask */
+ .mode = c->stat.mode & S_IFMT,
+ .rdev = c->stat.rdev,
+ .link = c->link
+ };
+
+ err = ovl_lock_rename_workdir(c->workdir, c->destdir);
+ if (err)
+ return err;
+
+ err = ovl_prep_cu_creds(c->dentry, &cc);
+ if (err)
+ goto unlock;
+
+ temp = ovl_create_temp(c->workdir, &cattr);
+ ovl_revert_cu_creds(&cc);
+
+ err = PTR_ERR(temp);
+ if (IS_ERR(temp))
+ goto unlock;
+
+ err = ovl_copy_up_inode(c, temp);
+ if (err)
+ goto cleanup;
+
+ if (S_ISDIR(c->stat.mode) && c->indexed) {
+ err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+ if (err)
+ goto cleanup;
+ }
+
+ upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
+ goto cleanup;
+
+ err = ovl_do_rename(wdir, temp, udir, upper, 0);
+ dput(upper);
+ if (err)
+ goto cleanup;
+
+ if (!c->metacopy)
+ ovl_set_upperdata(d_inode(c->dentry));
+ inode = d_inode(c->dentry);
+ ovl_inode_update(inode, temp);
+ if (S_ISDIR(inode->i_mode))
+ ovl_set_flag(OVL_WHITEOUTS, inode);
+unlock:
+ unlock_rename(c->workdir, c->destdir);
+
+ return err;
+
+cleanup:
+ ovl_cleanup(wdir, temp);
+ dput(temp);
+ goto unlock;
+}
+
+/* Copyup using O_TMPFILE which does not require cross dir locking */
+static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
+{
+ struct inode *udir = d_inode(c->destdir);
+ struct dentry *temp, *upper;
+ struct ovl_cu_creds cc;
+ int err;
+
+ err = ovl_prep_cu_creds(c->dentry, &cc);
+ if (err)
+ return err;
+
+ temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+ ovl_revert_cu_creds(&cc);
+
if (IS_ERR(temp))
return PTR_ERR(temp);
err = ovl_copy_up_inode(c, temp);
if (err)
- goto out;
+ goto out_dput;
- if (S_ISDIR(c->stat.mode) && c->indexed) {
- err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
- if (err)
- goto out;
- }
+ inode_lock_nested(udir, I_MUTEX_PARENT);
- if (c->tmpfile) {
- inode_lock_nested(udir, I_MUTEX_PARENT);
- err = ovl_install_temp(c, temp, &newdentry);
- inode_unlock(udir);
- } else {
- err = ovl_install_temp(c, temp, &newdentry);
+ upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ err = PTR_ERR(upper);
+ if (!IS_ERR(upper)) {
+ err = ovl_do_link(temp, udir, upper);
+ dput(upper);
}
+ inode_unlock(udir);
+
if (err)
- goto out;
+ goto out_dput;
if (!c->metacopy)
ovl_set_upperdata(d_inode(c->dentry));
- inode = d_inode(c->dentry);
- ovl_inode_update(inode, newdentry);
- if (S_ISDIR(inode->i_mode))
- ovl_set_flag(OVL_WHITEOUTS, inode);
+ ovl_inode_update(d_inode(c->dentry), temp);
-out:
- if (err && !c->tmpfile)
- ovl_cleanup(d_inode(c->workdir), temp);
+ return 0;
+
+out_dput:
dput(temp);
return err;
-
}
/*
@@ -646,18 +685,10 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
}
/* Should we copyup with O_TMPFILE or with workdir? */
- if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
- c->tmpfile = true;
- err = ovl_copy_up_locked(c);
- } else {
- err = ovl_lock_rename_workdir(c->workdir, c->destdir);
- if (!err) {
- err = ovl_copy_up_locked(c);
- unlock_rename(c->workdir, c->destdir);
- }
- }
-
-
+ if (S_ISREG(c->stat.mode) && ofs->tmpfile)
+ err = ovl_copy_up_tmpfile(c);
+ else
+ err = ovl_copy_up_workdir(c);
if (err)
goto out;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 276914a..c628914 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -414,13 +414,12 @@ static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
return 0;
- size = posix_acl_to_xattr(NULL, acl, NULL, 0);
+ size = posix_acl_xattr_size(acl->a_count);
buffer = kmalloc(size, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
- size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
- err = size;
+ err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
if (err < 0)
goto out_free;
@@ -463,6 +462,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (IS_ERR(upper))
goto out_unlock;
+ err = -ESTALE;
+ if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+ goto out_dput;
+
newdentry = ovl_create_temp(workdir, cattr);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
@@ -652,7 +655,6 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
struct dentry *new)
{
int err;
- bool locked = false;
struct inode *inode;
err = ovl_want_write(old);
@@ -663,13 +665,17 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
goto out_drop_write;
+ err = ovl_copy_up(new->d_parent);
+ if (err)
+ goto out_drop_write;
+
if (ovl_is_metacopy_dentry(old)) {
err = ovl_set_redirect(old, false);
if (err)
goto out_drop_write;
}
- err = ovl_nlink_start(old, &locked);
+ err = ovl_nlink_start(old);
if (err)
goto out_drop_write;
@@ -682,7 +688,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
iput(inode);
- ovl_nlink_end(old, locked);
+ ovl_nlink_end(old);
out_drop_write:
ovl_drop_write(old);
out:
@@ -807,7 +813,6 @@ static bool ovl_pure_upper(struct dentry *dentry)
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
int err;
- bool locked = false;
const struct cred *old_cred;
struct dentry *upperdentry;
bool lower_positive = ovl_lower_positive(dentry);
@@ -828,7 +833,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (err)
goto out_drop_write;
- err = ovl_nlink_start(dentry, &locked);
+ err = ovl_nlink_start(dentry);
if (err)
goto out_drop_write;
@@ -844,7 +849,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
else
drop_nlink(dentry->d_inode);
}
- ovl_nlink_end(dentry, locked);
+ ovl_nlink_end(dentry);
/*
* Copy ctime
@@ -1008,7 +1013,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
unsigned int flags)
{
int err;
- bool locked = false;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
struct dentry *olddentry;
@@ -1017,6 +1021,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
bool old_opaque;
bool new_opaque;
bool cleanup_whiteout = false;
+ bool update_nlink = false;
bool overwrite = !(flags & RENAME_EXCHANGE);
bool is_dir = d_is_dir(old);
bool new_is_dir = d_is_dir(new);
@@ -1074,10 +1079,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
err = ovl_copy_up(new);
if (err)
goto out_drop_write;
- } else {
- err = ovl_nlink_start(new, &locked);
+ } else if (d_inode(new)) {
+ err = ovl_nlink_start(new);
if (err)
goto out_drop_write;
+
+ update_nlink = true;
}
old_cred = ovl_override_creds(old->d_sb);
@@ -1206,7 +1213,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
revert_creds(old_cred);
- ovl_nlink_end(new, locked);
+ if (update_nlink)
+ ovl_nlink_end(new);
out_drop_write:
ovl_drop_write(old);
out:
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 986313d..84dd957 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -434,14 +434,14 @@ enum ovl_copyop {
OVL_DEDUPE,
};
-static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
- u64 len, unsigned int flags, enum ovl_copyop op)
+ loff_t len, unsigned int flags, enum ovl_copyop op)
{
struct inode *inode_out = file_inode(file_out);
struct fd real_in, real_out;
const struct cred *old_cred;
- ssize_t ret;
+ loff_t ret;
ret = ovl_real_fdget(file_out, &real_out);
if (ret)
@@ -462,12 +462,13 @@ static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
case OVL_CLONE:
ret = vfs_clone_file_range(real_in.file, pos_in,
- real_out.file, pos_out, len);
+ real_out.file, pos_out, len, flags);
break;
case OVL_DEDUPE:
ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
- real_out.file, pos_out, len);
+ real_out.file, pos_out, len,
+ flags);
break;
}
revert_creds(old_cred);
@@ -489,26 +490,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
OVL_COPY);
}
-static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len)
+static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
{
- return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
- OVL_CLONE);
-}
+ enum ovl_copyop op;
-static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len)
-{
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+ return -EINVAL;
+
+ if (remap_flags & REMAP_FILE_DEDUP)
+ op = OVL_DEDUPE;
+ else
+ op = OVL_CLONE;
+
/*
* Don't copy up because of a dedupe request, this wouldn't make sense
* most of the time (data would be duplicated instead of deduplicated).
*/
- if (!ovl_inode_upper(file_inode(file_in)) ||
- !ovl_inode_upper(file_inode(file_out)))
+ if (op == OVL_DEDUPE &&
+ (!ovl_inode_upper(file_inode(file_in)) ||
+ !ovl_inode_upper(file_inode(file_out))))
return -EPERM;
- return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
- OVL_DEDUPE);
+ return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
+ remap_flags, op);
}
const struct file_operations ovl_file_operations = {
@@ -525,6 +531,5 @@ const struct file_operations ovl_file_operations = {
.compat_ioctl = ovl_compat_ioctl,
.copy_file_range = ovl_copy_file_range,
- .clone_file_range = ovl_clone_file_range,
- .dedupe_file_range = ovl_dedupe_file_range,
+ .remap_file_range = ovl_remap_file_range,
};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 3b7ed5d..6bcc9de 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -286,13 +286,22 @@ int ovl_permission(struct inode *inode, int mask)
if (err)
return err;
- old_cred = ovl_override_creds(inode->i_sb);
- if (!upperinode &&
- !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+ /* No need to do any access on underlying for special files */
+ if (special_file(realinode->i_mode))
+ return 0;
+
+ /* No need to access underlying for execute */
+ mask &= ~MAY_EXEC;
+ if ((mask & (MAY_READ | MAY_WRITE)) == 0)
+ return 0;
+
+ /* Lower files get copied up, so turn write access into read */
+ if (!upperinode && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
- /* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
}
+
+ old_cred = ovl_override_creds(inode->i_sb);
err = inode_permission(realinode, mask);
revert_creds(old_cred);
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 9c0ca6a..efd3723 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -422,8 +422,10 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
fh = ovl_encode_real_fh(real, is_upper);
err = PTR_ERR(fh);
- if (IS_ERR(fh))
+ if (IS_ERR(fh)) {
+ fh = NULL;
goto fail;
+ }
err = ovl_verify_fh(dentry, name, fh);
if (set && err == -ENODATA)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index a3c0d95..5e45cb3 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -271,8 +271,8 @@ bool ovl_test_flag(unsigned long flag, struct inode *inode);
bool ovl_inuse_trylock(struct dentry *dentry);
void ovl_inuse_unlock(struct dentry *dentry);
bool ovl_need_index(struct dentry *dentry);
-int ovl_nlink_start(struct dentry *dentry, bool *locked);
-void ovl_nlink_end(struct dentry *dentry, bool locked);
+int ovl_nlink_start(struct dentry *dentry);
+void ovl_nlink_end(struct dentry *dentry);
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
int ovl_check_metacopy_xattr(struct dentry *dentry);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
@@ -290,6 +290,16 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
return ofs->xino_bits;
}
+static inline int ovl_inode_lock(struct inode *inode)
+{
+ return mutex_lock_interruptible(&OVL_I(inode)->lock);
+}
+
+static inline void ovl_inode_unlock(struct inode *inode)
+{
+ mutex_unlock(&OVL_I(inode)->lock);
+}
+
/* namei.c */
int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 30adc9d..0116735 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -472,6 +472,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
char *p;
int err;
+ bool metacopy_opt = false, redirect_opt = false;
config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
if (!config->redirect_mode)
@@ -516,6 +517,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->redirect_mode = match_strdup(&args[0]);
if (!config->redirect_mode)
return -ENOMEM;
+ redirect_opt = true;
break;
case OPT_INDEX_ON:
@@ -548,6 +550,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
case OPT_METACOPY_ON:
config->metacopy = true;
+ metacopy_opt = true;
break;
case OPT_METACOPY_OFF:
@@ -572,13 +575,32 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
if (err)
return err;
- /* metacopy feature with upper requires redirect_dir=on */
- if (config->upperdir && config->metacopy && !config->redirect_dir) {
- pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
- config->metacopy = false;
- } else if (config->metacopy && !config->redirect_follow) {
- pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
- config->metacopy = false;
+ /*
+ * This is to make the logic below simpler. It doesn't make any other
+ * difference, since config->redirect_dir is only used for upper.
+ */
+ if (!config->upperdir && config->redirect_follow)
+ config->redirect_dir = true;
+
+ /* Resolve metacopy -> redirect_dir dependency */
+ if (config->metacopy && !config->redirect_dir) {
+ if (metacopy_opt && redirect_opt) {
+ pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
+ config->redirect_mode);
+ return -EINVAL;
+ }
+ if (redirect_opt) {
+ /*
+ * There was an explicit redirect_dir=... that resulted
+ * in this conflict.
+ */
+ pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
+ config->redirect_mode);
+ config->metacopy = false;
+ } else {
+ /* Automatically enable redirect otherwise. */
+ config->redirect_follow = config->redirect_dir = true;
+ }
}
return 0;
@@ -1175,10 +1197,30 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
return err;
}
-/* Get a unique fsid for the layer */
-static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
+static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
unsigned int i;
+
+ if (!ofs->config.nfs_export && !(ofs->config.index && ofs->upper_mnt))
+ return true;
+
+ for (i = 0; i < ofs->numlowerfs; i++) {
+ /*
+ * We use uuid to associate an overlay lower file handle with a
+ * lower layer, so we can accept lower fs with null uuid as long
+ * as all lower layers with null uuid are on the same fs.
+ */
+ if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid))
+ return false;
+ }
+ return true;
+}
+
+/* Get a unique fsid for the layer */
+static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
+{
+ struct super_block *sb = path->mnt->mnt_sb;
+ unsigned int i;
dev_t dev;
int err;
@@ -1191,6 +1233,14 @@ static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
return i + 1;
}
+ if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
+ ofs->config.index = false;
+ ofs->config.nfs_export = false;
+ pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+ uuid_is_null(&sb->s_uuid) ? "null" : "conflicting",
+ path->dentry);
+ }
+
err = get_anon_bdev(&dev);
if (err) {
pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
@@ -1225,7 +1275,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
struct vfsmount *mnt;
int fsid;
- err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
+ err = fsid = ovl_get_fsid(ofs, &stack[i]);
if (err < 0)
goto out;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index ace4fe4..7c01327 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -65,8 +65,7 @@ struct super_block *ovl_same_sb(struct super_block *sb)
*/
int ovl_can_decode_fh(struct super_block *sb)
{
- if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry ||
- uuid_is_null(&sb->s_uuid))
+ if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
return 0;
return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
@@ -522,13 +521,13 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags)
int ovl_copy_up_start(struct dentry *dentry, int flags)
{
- struct ovl_inode *oi = OVL_I(d_inode(dentry));
+ struct inode *inode = d_inode(dentry);
int err;
- err = mutex_lock_interruptible(&oi->lock);
+ err = ovl_inode_lock(inode);
if (!err && ovl_already_copied_up_locked(dentry, flags)) {
err = 1; /* Already copied up */
- mutex_unlock(&oi->lock);
+ ovl_inode_unlock(inode);
}
return err;
@@ -536,7 +535,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
void ovl_copy_up_end(struct dentry *dentry)
{
- mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+ ovl_inode_unlock(d_inode(dentry));
}
bool ovl_check_origin_xattr(struct dentry *dentry)
@@ -739,14 +738,14 @@ static void ovl_cleanup_index(struct dentry *dentry)
* Operations that change overlay inode and upper inode nlink need to be
* synchronized with copy up for persistent nlink accounting.
*/
-int ovl_nlink_start(struct dentry *dentry, bool *locked)
+int ovl_nlink_start(struct dentry *dentry)
{
- struct ovl_inode *oi = OVL_I(d_inode(dentry));
+ struct inode *inode = d_inode(dentry);
const struct cred *old_cred;
int err;
- if (!d_inode(dentry))
- return 0;
+ if (WARN_ON(!inode))
+ return -ENOENT;
/*
* With inodes index is enabled, we store the union overlay nlink
@@ -768,11 +767,11 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
return err;
}
- err = mutex_lock_interruptible(&oi->lock);
+ err = ovl_inode_lock(inode);
if (err)
return err;
- if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
goto out;
old_cred = ovl_override_creds(dentry->d_sb);
@@ -787,27 +786,24 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
out:
if (err)
- mutex_unlock(&oi->lock);
- else
- *locked = true;
+ ovl_inode_unlock(inode);
return err;
}
-void ovl_nlink_end(struct dentry *dentry, bool locked)
+void ovl_nlink_end(struct dentry *dentry)
{
- if (locked) {
- if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
- d_inode(dentry)->i_nlink == 0) {
- const struct cred *old_cred;
+ struct inode *inode = d_inode(dentry);
- old_cred = ovl_override_creds(dentry->d_sb);
- ovl_cleanup_index(dentry);
- revert_creds(old_cred);
- }
+ if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
+ const struct cred *old_cred;
- mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+ old_cred = ovl_override_creds(dentry->d_sb);
+ ovl_cleanup_index(dentry);
+ revert_creds(old_cred);
}
+
+ ovl_inode_unlock(inode);
}
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7e9f07bf..ce34654 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2905,6 +2905,21 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
}
#endif /* CONFIG_LIVEPATCH */
+#ifdef CONFIG_STACKLEAK_METRICS
+static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ unsigned long prev_depth = THREAD_SIZE -
+ (task->prev_lowest_stack & (THREAD_SIZE - 1));
+ unsigned long depth = THREAD_SIZE -
+ (task->lowest_stack & (THREAD_SIZE - 1));
+
+ seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
+ prev_depth, depth);
+ return 0;
+}
+#endif /* CONFIG_STACKLEAK_METRICS */
+
/*
* Thread groups
*/
@@ -3006,6 +3021,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
+#ifdef CONFIG_STACKLEAK_METRICS
+ ONE("stack_depth", S_IRUGO, proc_stack_depth),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/read_write.c b/fs/read_write.c
index 603794b..bfcb4ce 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1407,7 +1407,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
goto fput_in;
if (!(out.file->f_mode & FMODE_WRITE))
goto fput_out;
- retval = -EINVAL;
in_inode = file_inode(in.file);
out_inode = file_inode(out.file);
out_pos = out.file->f_pos;
@@ -1588,11 +1587,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* Try cloning first, this is supported by more file systems, and
* more efficient if both clone and copy are supported (e.g. NFS).
*/
- if (file_in->f_op->clone_file_range) {
- ret = file_in->f_op->clone_file_range(file_in, pos_in,
- file_out, pos_out, len);
- if (ret == 0) {
- ret = len;
+ if (file_in->f_op->remap_file_range) {
+ loff_t cloned;
+
+ cloned = file_in->f_op->remap_file_range(file_in, pos_in,
+ file_out, pos_out,
+ min_t(loff_t, MAX_RW_COUNT, len),
+ REMAP_FILE_CAN_SHORTEN);
+ if (cloned > 0) {
+ ret = cloned;
goto done;
}
}
@@ -1686,11 +1689,12 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
return ret;
}
-static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
+ bool write)
{
struct inode *inode = file_inode(file);
- if (unlikely(pos < 0))
+ if (unlikely(pos < 0 || len < 0))
return -EINVAL;
if (unlikely((loff_t) (pos + len) < 0))
@@ -1708,178 +1712,44 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
}
-
/*
- * Check that the two inodes are eligible for cloning, the ranges make
- * sense, and then flush all dirty data. Caller must ensure that the
- * inodes have been locked against any other modifications.
+ * Ensure that we don't remap a partial EOF block in the middle of something
+ * else. Assume that the offsets have already been checked for block
+ * alignment.
*
- * Returns: 0 for "nothing to clone", 1 for "something to clone", or
- * the usual negative error code.
+ * For deduplication we always scale down to the previous block because we
+ * can't meaningfully compare post-EOF contents.
+ *
+ * For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
*/
-int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
- struct inode *inode_out, loff_t pos_out,
- u64 *len, bool is_dedupe)
+static int generic_remap_check_len(struct inode *inode_in,
+ struct inode *inode_out,
+ loff_t pos_out,
+ loff_t *len,
+ unsigned int remap_flags)
{
- loff_t bs = inode_out->i_sb->s_blocksize;
- loff_t blen;
- loff_t isize;
- bool same_inode = (inode_in == inode_out);
- int ret;
+ u64 blkmask = i_blocksize(inode_in) - 1;
+ loff_t new_len = *len;
- /* Don't touch certain kinds of inodes */
- if (IS_IMMUTABLE(inode_out))
- return -EPERM;
-
- if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
- return -ETXTBSY;
-
- /* Don't reflink dirs, pipes, sockets... */
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
- /* Are we going all the way to the end? */
- isize = i_size_read(inode_in);
- if (isize == 0)
+ if ((*len & blkmask) == 0)
return 0;
- /* Zero length dedupe exits immediately; reflink goes to EOF. */
- if (*len == 0) {
- if (is_dedupe || pos_in == isize)
- return 0;
- if (pos_in > isize)
- return -EINVAL;
- *len = isize - pos_in;
+ if ((remap_flags & REMAP_FILE_DEDUP) ||
+ pos_out + *len < i_size_read(inode_out))
+ new_len &= ~blkmask;
+
+ if (new_len == *len)
+ return 0;
+
+ if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+ *len = new_len;
+ return 0;
}
- /* Ensure offsets don't wrap and the input is inside i_size */
- if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
- pos_in + *len > isize)
- return -EINVAL;
-
- /* Don't allow dedupe past EOF in the dest file */
- if (is_dedupe) {
- loff_t disize;
-
- disize = i_size_read(inode_out);
- if (pos_out >= disize || pos_out + *len > disize)
- return -EINVAL;
- }
-
- /* If we're linking to EOF, continue to the block boundary. */
- if (pos_in + *len == isize)
- blen = ALIGN(isize, bs) - pos_in;
- else
- blen = *len;
-
- /* Only reflink if we're aligned to block boundaries */
- if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
- !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
- return -EINVAL;
-
- /* Don't allow overlapped reflink within the same file */
- if (same_inode) {
- if (pos_out + blen > pos_in && pos_out < pos_in + blen)
- return -EINVAL;
- }
-
- /* Wait for the completion of any pending IOs on both files */
- inode_dio_wait(inode_in);
- if (!same_inode)
- inode_dio_wait(inode_out);
-
- ret = filemap_write_and_wait_range(inode_in->i_mapping,
- pos_in, pos_in + *len - 1);
- if (ret)
- return ret;
-
- ret = filemap_write_and_wait_range(inode_out->i_mapping,
- pos_out, pos_out + *len - 1);
- if (ret)
- return ret;
-
- /*
- * Check that the extents are the same.
- */
- if (is_dedupe) {
- bool is_same = false;
-
- ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
- inode_out, pos_out, *len, &is_same);
- if (ret)
- return ret;
- if (!is_same)
- return -EBADE;
- }
-
- return 1;
+ return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
}
-EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
-
-int do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len)
-{
- struct inode *inode_in = file_inode(file_in);
- struct inode *inode_out = file_inode(file_out);
- int ret;
-
- if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
- return -EISDIR;
- if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
- return -EINVAL;
-
- /*
- * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
- * the same mount. Practically, they only need to be on the same file
- * system.
- */
- if (inode_in->i_sb != inode_out->i_sb)
- return -EXDEV;
-
- if (!(file_in->f_mode & FMODE_READ) ||
- !(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND))
- return -EBADF;
-
- if (!file_in->f_op->clone_file_range)
- return -EOPNOTSUPP;
-
- ret = clone_verify_area(file_in, pos_in, len, false);
- if (ret)
- return ret;
-
- ret = clone_verify_area(file_out, pos_out, len, true);
- if (ret)
- return ret;
-
- if (pos_in + len > i_size_read(inode_in))
- return -EINVAL;
-
- ret = file_in->f_op->clone_file_range(file_in, pos_in,
- file_out, pos_out, len);
- if (!ret) {
- fsnotify_access(file_in);
- fsnotify_modify(file_out);
- }
-
- return ret;
-}
-EXPORT_SYMBOL(do_clone_file_range);
-
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len)
-{
- int ret;
-
- file_start_write(file_out);
- ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
- file_end_write(file_out);
-
- return ret;
-}
-EXPORT_SYMBOL(vfs_clone_file_range);
/*
* Read a page's worth of file data into the page cache. Return the page
@@ -1887,13 +1757,9 @@ EXPORT_SYMBOL(vfs_clone_file_range);
*/
static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
{
- struct address_space *mapping;
struct page *page;
- pgoff_t n;
- n = offset >> PAGE_SHIFT;
- mapping = inode->i_mapping;
- page = read_mapping_page(mapping, n, NULL);
+ page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
if (IS_ERR(page))
return page;
if (!PageUptodate(page)) {
@@ -1908,9 +1774,9 @@ static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
* Compare extents of two files to see if they are the same.
* Caller must have locked both inodes to prevent write races.
*/
-int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
- struct inode *dest, loff_t destoff,
- loff_t len, bool *is_same)
+static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same)
{
loff_t src_poff;
loff_t dest_poff;
@@ -1975,23 +1841,211 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
out_error:
return error;
}
-EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
-int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
- struct file *dst_file, loff_t dst_pos, u64 len)
+/*
+ * Check that the two inodes are eligible for cloning, the ranges make
+ * sense, and then flush all dirty data. Caller must ensure that the
+ * inodes have been locked against any other modifications.
+ *
+ * If there's an error, then the usual negative error code is returned.
+ * Otherwise returns 0 with *len set to the request length.
+ */
+int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *len, unsigned int remap_flags)
{
- s64 ret;
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ bool same_inode = (inode_in == inode_out);
+ int ret;
+
+ /* Don't touch certain kinds of inodes */
+ if (IS_IMMUTABLE(inode_out))
+ return -EPERM;
+
+ if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+ return -ETXTBSY;
+
+ /* Don't reflink dirs, pipes, sockets... */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ /* Zero length dedupe exits immediately; reflink goes to EOF. */
+ if (*len == 0) {
+ loff_t isize = i_size_read(inode_in);
+
+ if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
+ return 0;
+ if (pos_in > isize)
+ return -EINVAL;
+ *len = isize - pos_in;
+ if (*len == 0)
+ return 0;
+ }
+
+ /* Check that we don't violate system file offset limits. */
+ ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+ remap_flags);
+ if (ret)
+ return ret;
+
+ /* Wait for the completion of any pending IOs on both files */
+ inode_dio_wait(inode_in);
+ if (!same_inode)
+ inode_dio_wait(inode_out);
+
+ ret = filemap_write_and_wait_range(inode_in->i_mapping,
+ pos_in, pos_in + *len - 1);
+ if (ret)
+ return ret;
+
+ ret = filemap_write_and_wait_range(inode_out->i_mapping,
+ pos_out, pos_out + *len - 1);
+ if (ret)
+ return ret;
+
+ /*
+ * Check that the extents are the same.
+ */
+ if (remap_flags & REMAP_FILE_DEDUP) {
+ bool is_same = false;
+
+ ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
+ inode_out, pos_out, *len, &is_same);
+ if (ret)
+ return ret;
+ if (!is_same)
+ return -EBADE;
+ }
+
+ ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
+ remap_flags);
+ if (ret)
+ return ret;
+
+ /* If can't alter the file contents, we're done. */
+ if (!(remap_flags & REMAP_FILE_DEDUP)) {
+ /* Update the timestamps, since we can alter file contents. */
+ if (!(file_out->f_mode & FMODE_NOCMTIME)) {
+ ret = file_update_time(file_out);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Clear the security bits if the process is not being run by
+ * root. This keeps people from modifying setuid and setgid
+ * binaries.
+ */
+ ret = file_remove_privs(file_out);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(generic_remap_file_range_prep);
+
+loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ loff_t ret;
+
+ WARN_ON_ONCE(remap_flags);
+
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ /*
+ * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
+ * the same mount. Practically, they only need to be on the same file
+ * system.
+ */
+ if (inode_in->i_sb != inode_out->i_sb)
+ return -EXDEV;
+
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND))
+ return -EBADF;
+
+ if (!file_in->f_op->remap_file_range)
+ return -EOPNOTSUPP;
+
+ ret = remap_verify_area(file_in, pos_in, len, false);
+ if (ret)
+ return ret;
+
+ ret = remap_verify_area(file_out, pos_out, len, true);
+ if (ret)
+ return ret;
+
+ ret = file_in->f_op->remap_file_range(file_in, pos_in,
+ file_out, pos_out, len, remap_flags);
+ if (ret < 0)
+ return ret;
+
+ fsnotify_access(file_in);
+ fsnotify_modify(file_out);
+ return ret;
+}
+EXPORT_SYMBOL(do_clone_file_range);
+
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags)
+{
+ loff_t ret;
+
+ file_start_write(file_out);
+ ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
+ remap_flags);
+ file_end_write(file_out);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_clone_file_range);
+
+/* Check whether we are allowed to dedupe the destination file */
+static bool allow_file_dedupe(struct file *file)
+{
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+ if (file->f_mode & FMODE_WRITE)
+ return true;
+ if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
+ return true;
+ if (!inode_permission(file_inode(file), MAY_WRITE))
+ return true;
+ return false;
+}
+
+loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+ struct file *dst_file, loff_t dst_pos,
+ loff_t len, unsigned int remap_flags)
+{
+ loff_t ret;
+
+ WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+ REMAP_FILE_CAN_SHORTEN));
ret = mnt_want_write_file(dst_file);
if (ret)
return ret;
- ret = clone_verify_area(dst_file, dst_pos, len, true);
+ ret = remap_verify_area(dst_file, dst_pos, len, true);
if (ret < 0)
goto out_drop_write;
- ret = -EINVAL;
- if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
+ ret = -EPERM;
+ if (!allow_file_dedupe(dst_file))
goto out_drop_write;
ret = -EXDEV;
@@ -2003,11 +2057,16 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
goto out_drop_write;
ret = -EINVAL;
- if (!dst_file->f_op->dedupe_file_range)
+ if (!dst_file->f_op->remap_file_range)
goto out_drop_write;
- ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
- dst_file, dst_pos, len);
+ if (len == 0) {
+ ret = 0;
+ goto out_drop_write;
+ }
+
+ ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
+ dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
out_drop_write:
mnt_drop_write_file(dst_file);
@@ -2024,7 +2083,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
int i;
int ret;
u16 count = same->dest_count;
- int deduped;
+ loff_t deduped;
if (!(file->f_mode & FMODE_READ))
return -EINVAL;
@@ -2043,7 +2102,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
if (!S_ISREG(src->i_mode))
goto out;
- ret = clone_verify_area(file, off, len, false);
+ ret = remap_verify_area(file, off, len, false);
if (ret < 0)
goto out;
ret = 0;
@@ -2075,7 +2134,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
}
deduped = vfs_dedupe_file_range_one(file, off, dst_file,
- info->dest_offset, len);
+ info->dest_offset, len,
+ REMAP_FILE_CAN_SHORTEN);
if (deduped == -EBADE)
info->status = FILE_DEDUPE_RANGE_DIFFERS;
else if (deduped < 0)
diff --git a/fs/splice.c b/fs/splice.c
index b3daa97..3553f19 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -301,7 +301,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct kiocb kiocb;
int idx, ret;
- iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+ iov_iter_pipe(&to, READ, pipe, len);
idx = to.idx;
init_sync_kiocb(&kiocb, in);
kiocb.ki_pos = *ppos;
@@ -386,7 +386,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
*/
offset = *ppos & ~PAGE_MASK;
- iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+ iov_iter_pipe(&to, READ, pipe, len + offset);
res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
if (res <= 0)
@@ -745,8 +745,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
left -= this_len;
}
- iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
- sd.total_len - left);
+ iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
ret = vfs_iter_write(out, &from, &sd.pos, 0);
if (ret <= 0)
break;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 61a5ad2..53c9ab8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -919,28 +919,67 @@ xfs_file_fallocate(
return error;
}
-STATIC int
-xfs_file_clone_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len)
-{
- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, false);
-}
-STATIC int
-xfs_file_dedupe_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len)
+loff_t
+xfs_file_remap_range(
+ struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ loff_t len,
+ unsigned int remap_flags)
{
- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, true);
+ struct inode *inode_in = file_inode(file_in);
+ struct xfs_inode *src = XFS_I(inode_in);
+ struct inode *inode_out = file_inode(file_out);
+ struct xfs_inode *dest = XFS_I(inode_out);
+ struct xfs_mount *mp = src->i_mount;
+ loff_t remapped = 0;
+ xfs_extlen_t cowextsize;
+ int ret;
+
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+ return -EINVAL;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ /* Prepare and then clone file data. */
+ ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
+ &len, remap_flags);
+ if (ret < 0 || len == 0)
+ return ret;
+
+ trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
+ ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
+ &remapped);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * Carry the cowextsize hint from src to dest if we're sharing the
+ * entire source file to the entire destination file, the source file
+ * has a cowextsize hint, and the destination file does not.
+ */
+ cowextsize = 0;
+ if (pos_in == 0 && len == i_size_read(inode_in) &&
+ (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
+ pos_out == 0 && len >= i_size_read(inode_out) &&
+ !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
+ cowextsize = src->i_d.di_cowextsize;
+
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+ remap_flags);
+
+out_unlock:
+ xfs_reflink_remap_unlock(file_in, file_out);
+ if (ret)
+ trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+ return remapped > 0 ? remapped : ret;
}
STATIC int
@@ -1175,8 +1214,7 @@ const struct file_operations xfs_file_operations = {
.fsync = xfs_file_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.fallocate = xfs_file_fallocate,
- .clone_file_range = xfs_file_clone_range,
- .dedupe_file_range = xfs_file_dedupe_range,
+ .remap_file_range = xfs_file_remap_range,
};
const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 8eaeec9..ecdb086 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -913,18 +913,18 @@ xfs_reflink_set_inode_flag(
/*
* Update destination inode size & cowextsize hint, if necessary.
*/
-STATIC int
+int
xfs_reflink_update_dest(
struct xfs_inode *dest,
xfs_off_t newlen,
xfs_extlen_t cowextsize,
- bool is_dedupe)
+ unsigned int remap_flags)
{
struct xfs_mount *mp = dest->i_mount;
struct xfs_trans *tp;
int error;
- if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+ if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
return 0;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -945,10 +945,6 @@ xfs_reflink_update_dest(
dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
}
- if (!is_dedupe) {
- xfs_trans_ichgtime(tp, dest,
- XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- }
xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
error = xfs_trans_commit(tp);
@@ -1112,19 +1108,28 @@ xfs_reflink_remap_extent(
/*
* Iteratively remap one file's extents (and holes) to another's.
*/
-STATIC int
+int
xfs_reflink_remap_blocks(
struct xfs_inode *src,
- xfs_fileoff_t srcoff,
+ loff_t pos_in,
struct xfs_inode *dest,
- xfs_fileoff_t destoff,
- xfs_filblks_t len,
- xfs_off_t new_isize)
+ loff_t pos_out,
+ loff_t remap_len,
+ loff_t *remapped)
{
struct xfs_bmbt_irec imap;
+ xfs_fileoff_t srcoff;
+ xfs_fileoff_t destoff;
+ xfs_filblks_t len;
+ xfs_filblks_t range_len;
+ xfs_filblks_t remapped_len = 0;
+ xfs_off_t new_isize = pos_out + remap_len;
int nimaps;
int error = 0;
- xfs_filblks_t range_len;
+
+ destoff = XFS_B_TO_FSBT(src->i_mount, pos_out);
+ srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in);
+ len = XFS_B_TO_FSB(src->i_mount, remap_len);
/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
while (len) {
@@ -1139,7 +1144,7 @@ xfs_reflink_remap_blocks(
error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
xfs_iunlock(src, lock_mode);
if (error)
- goto err;
+ break;
ASSERT(nimaps == 1);
trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
@@ -1153,23 +1158,24 @@ xfs_reflink_remap_blocks(
error = xfs_reflink_remap_extent(dest, &imap, destoff,
new_isize);
if (error)
- goto err;
+ break;
if (fatal_signal_pending(current)) {
error = -EINTR;
- goto err;
+ break;
}
/* Advance drange/srange */
srcoff += range_len;
destoff += range_len;
len -= range_len;
+ remapped_len += range_len;
}
- return 0;
-
-err:
- trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
+ if (error)
+ trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
+ *remapped = min_t(loff_t, remap_len,
+ XFS_FSB_TO_B(src->i_mount, remapped_len));
return error;
}
@@ -1218,7 +1224,7 @@ xfs_iolock_two_inodes_and_break_layout(
}
/* Unlock both inodes after they've been prepped for a range clone. */
-STATIC void
+void
xfs_reflink_remap_unlock(
struct file *file_in,
struct file *file_out)
@@ -1286,21 +1292,20 @@ xfs_reflink_zero_posteof(
* stale data in the destination file. Hence we reject these clone attempts with
* -EINVAL in this case.
*/
-STATIC int
+int
xfs_reflink_remap_prep(
struct file *file_in,
loff_t pos_in,
struct file *file_out,
loff_t pos_out,
- u64 *len,
- bool is_dedupe)
+ loff_t *len,
+ unsigned int remap_flags)
{
struct inode *inode_in = file_inode(file_in);
struct xfs_inode *src = XFS_I(inode_in);
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
bool same_inode = (inode_in == inode_out);
- u64 blkmask = i_blocksize(inode_in) - 1;
ssize_t ret;
/* Lock both files against IO */
@@ -1323,29 +1328,11 @@ xfs_reflink_remap_prep(
if (IS_DAX(inode_in) || IS_DAX(inode_out))
goto out_unlock;
- ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
- len, is_dedupe);
- if (ret <= 0)
+ ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
+ len, remap_flags);
+ if (ret < 0 || *len == 0)
goto out_unlock;
- /*
- * If the dedupe data matches, chop off the partial EOF block
- * from the source file so we don't try to dedupe the partial
- * EOF block.
- */
- if (is_dedupe) {
- *len &= ~blkmask;
- } else if (*len & blkmask) {
- /*
- * The user is attempting to share a partial EOF block,
- * if it's inside the destination EOF then reject it.
- */
- if (pos_out + *len < i_size_read(inode_out)) {
- ret = -EINVAL;
- goto out_unlock;
- }
- }
-
/* Attach dquots to dest inode before changing block map */
ret = xfs_qm_dqattach(dest);
if (ret)
@@ -1365,31 +1352,9 @@ xfs_reflink_remap_prep(
goto out_unlock;
/* Zap any page cache for the destination file's range. */
- truncate_inode_pages_range(&inode_out->i_data, pos_out,
- PAGE_ALIGN(pos_out + *len) - 1);
-
- /* If we're altering the file contents... */
- if (!is_dedupe) {
- /*
- * ...update the timestamps (which will grab the ilock again
- * from xfs_fs_dirty_inode, so we have to call it before we
- * take the ilock).
- */
- if (!(file_out->f_mode & FMODE_NOCMTIME)) {
- ret = file_update_time(file_out);
- if (ret)
- goto out_unlock;
- }
-
- /*
- * ...clear the security bits if the process is not being run
- * by root. This keeps people from modifying setuid and setgid
- * binaries.
- */
- ret = file_remove_privs(file_out);
- if (ret)
- goto out_unlock;
- }
+ truncate_inode_pages_range(&inode_out->i_data,
+ round_down(pos_out, PAGE_SIZE),
+ round_up(pos_out + *len, PAGE_SIZE) - 1);
return 1;
out_unlock:
@@ -1398,72 +1363,6 @@ xfs_reflink_remap_prep(
}
/*
- * Link a range of blocks from one file to another.
- */
-int
-xfs_reflink_remap_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len,
- bool is_dedupe)
-{
- struct inode *inode_in = file_inode(file_in);
- struct xfs_inode *src = XFS_I(inode_in);
- struct inode *inode_out = file_inode(file_out);
- struct xfs_inode *dest = XFS_I(inode_out);
- struct xfs_mount *mp = src->i_mount;
- xfs_fileoff_t sfsbno, dfsbno;
- xfs_filblks_t fsblen;
- xfs_extlen_t cowextsize;
- ssize_t ret;
-
- if (!xfs_sb_version_hasreflink(&mp->m_sb))
- return -EOPNOTSUPP;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- /* Prepare and then clone file data. */
- ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
- &len, is_dedupe);
- if (ret <= 0)
- return ret;
-
- trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
-
- dfsbno = XFS_B_TO_FSBT(mp, pos_out);
- sfsbno = XFS_B_TO_FSBT(mp, pos_in);
- fsblen = XFS_B_TO_FSB(mp, len);
- ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
- pos_out + len);
- if (ret)
- goto out_unlock;
-
- /*
- * Carry the cowextsize hint from src to dest if we're sharing the
- * entire source file to the entire destination file, the source file
- * has a cowextsize hint, and the destination file does not.
- */
- cowextsize = 0;
- if (pos_in == 0 && len == i_size_read(inode_in) &&
- (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
- pos_out == 0 && len >= i_size_read(inode_out) &&
- !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
- cowextsize = src->i_d.di_cowextsize;
-
- ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
- is_dedupe);
-
-out_unlock:
- xfs_reflink_remap_unlock(file_in, file_out);
- if (ret)
- trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
- return ret;
-}
-
-/*
* The user wants to preemptively CoW all shared blocks in this file,
* which enables us to turn off the reflink flag. Iterate all
* extents which are not prealloc/delalloc to see which ranges are
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 7f47202..6d73dae 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -27,13 +27,24 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, loff_t len,
+ unsigned int remap_flags);
extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
struct xfs_inode *ip, bool *has_shared);
extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
struct xfs_trans **tpp);
extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
+extern int xfs_reflink_remap_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out, loff_t *len,
+ unsigned int remap_flags);
+extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in,
+ struct xfs_inode *dest, loff_t pos_out, loff_t remap_len,
+ loff_t *remapped);
+extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
+ xfs_extlen_t cowextsize, unsigned int remap_flags);
+extern void xfs_reflink_remap_unlock(struct file *file_in,
+ struct file *file_out);
#endif /* __XFS_REFLINK_H */
diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h
new file mode 100644
index 0000000..48198c3
--- /dev/null
+++ b/include/crypto/asym_tpm_subtype.h
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _LINUX_ASYM_TPM_SUBTYPE_H
+#define _LINUX_ASYM_TPM_SUBTYPE_H
+
+#include <linux/keyctl.h>
+
+struct tpm_key {
+ void *blob;
+ u32 blob_len;
+ uint16_t key_len; /* Size in bits of the key */
+ const void *pub_key; /* pointer inside blob to the public key bytes */
+ uint16_t pub_key_len; /* length of the public key */
+};
+
+struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len);
+
+extern struct asymmetric_key_subtype asym_tpm_subtype;
+
+#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index e0b681a..be626ea 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -14,6 +14,8 @@
#ifndef _LINUX_PUBLIC_KEY_H
#define _LINUX_PUBLIC_KEY_H
+#include <linux/keyctl.h>
+
/*
* Cryptographic data for the public-key subtype of the asymmetric key type.
*
@@ -23,6 +25,7 @@
struct public_key {
void *key;
u32 keylen;
+ bool key_is_private;
const char *id_type;
const char *pkey_algo;
};
@@ -40,6 +43,7 @@ struct public_key_signature {
u8 digest_size; /* Number of bytes in digest */
const char *pkey_algo;
const char *hash_algo;
+ const char *encoding;
};
extern void public_key_signature_free(struct public_key_signature *sig);
@@ -65,8 +69,14 @@ extern int restrict_link_by_key_or_keyring_chain(struct key *trust_keyring,
const union key_payload *payload,
struct key *trusted);
-extern int verify_signature(const struct key *key,
- const struct public_key_signature *sig);
+extern int query_asymmetric_key(const struct kernel_pkey_params *,
+ struct kernel_pkey_query *);
+
+extern int encrypt_blob(struct kernel_pkey_params *, const void *, void *);
+extern int decrypt_blob(struct kernel_pkey_params *, const void *, void *);
+extern int create_signature(struct kernel_pkey_params *, const void *, void *);
+extern int verify_signature(const struct key *,
+ const struct public_key_signature *);
int public_key_verify_signature(const struct public_key *pkey,
const struct public_key_signature *sig);
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 91a877f..9ccad6b 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -82,6 +82,53 @@ enum drm_connector_status {
connector_status_unknown = 3,
};
+/**
+ * enum drm_connector_registration_status - userspace registration status for
+ * a &drm_connector
+ *
+ * This enum is used to track the status of initializing a connector and
+ * registering it with userspace, so that DRM can prevent bogus modesets on
+ * connectors that no longer exist.
+ */
+enum drm_connector_registration_state {
+ /**
+ * @DRM_CONNECTOR_INITIALIZING: The connector has just been created,
+ * but has yet to be exposed to userspace. There should be no
+ * additional restrictions to how the state of this connector may be
+ * modified.
+ */
+ DRM_CONNECTOR_INITIALIZING = 0,
+
+ /**
+ * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized
+ * and registered with sysfs, as such it has been exposed to
+ * userspace. There should be no additional restrictions to how the
+ * state of this connector may be modified.
+ */
+ DRM_CONNECTOR_REGISTERED = 1,
+
+ /**
+ * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed
+ * to userspace and has since been unregistered and removed from
+ * userspace, or the connector was unregistered before it had a chance
+ * to be exposed to userspace (e.g. still in the
+ * @DRM_CONNECTOR_INITIALIZING state). When a connector is
+ * unregistered, there are additional restrictions to how its state
+ * may be modified:
+ *
+ * - An unregistered connector may only have its DPMS changed from
+ * On->Off. Once DPMS is changed to Off, it may not be switched back
+ * to On.
+ * - Modesets are not allowed on unregistered connectors, unless they
+ * would result in disabling its assigned CRTCs. This means
+ * disabling a CRTC on an unregistered connector is OK, but enabling
+ * one is not.
+ * - Removing a CRTC from an unregistered connector is OK, but new
+ * CRTCs may never be assigned to an unregistered connector.
+ */
+ DRM_CONNECTOR_UNREGISTERED = 2,
+};
+
enum subpixel_order {
SubPixelUnknown = 0,
SubPixelHorizontalRGB,
@@ -853,10 +900,12 @@ struct drm_connector {
bool ycbcr_420_allowed;
/**
- * @registered: Is this connector exposed (registered) with userspace?
+ * @registration_state: Is this connector initializing, exposed
+ * (registered) with userspace, or unregistered?
+ *
* Protected by @mutex.
*/
- bool registered;
+ enum drm_connector_registration_state registration_state;
/**
* @modes:
@@ -1166,6 +1215,24 @@ static inline void drm_connector_unreference(struct drm_connector *connector)
drm_connector_put(connector);
}
+/**
+ * drm_connector_is_unregistered - has the connector been unregistered from
+ * userspace?
+ * @connector: DRM connector
+ *
+ * Checks whether or not @connector has been unregistered from userspace.
+ *
+ * Returns:
+ * True if the connector was unregistered, false if the connector is
+ * registered or has not yet been registered with userspace.
+ */
+static inline bool
+drm_connector_is_unregistered(struct drm_connector *connector)
+{
+ return READ_ONCE(connector->registration_state) ==
+ DRM_CONNECTOR_UNREGISTERED;
+}
+
const char *drm_get_connector_status_name(enum drm_connector_status status);
const char *drm_get_subpixel_order_name(enum subpixel_order order);
const char *drm_get_dpms_name(int val);
diff --git a/include/keys/asymmetric-subtype.h b/include/keys/asymmetric-subtype.h
index e0a9c23..9ce2f0f 100644
--- a/include/keys/asymmetric-subtype.h
+++ b/include/keys/asymmetric-subtype.h
@@ -17,6 +17,8 @@
#include <linux/seq_file.h>
#include <keys/asymmetric-type.h>
+struct kernel_pkey_query;
+struct kernel_pkey_params;
struct public_key_signature;
/*
@@ -34,6 +36,13 @@ struct asymmetric_key_subtype {
/* Destroy a key of this subtype */
void (*destroy)(void *payload_crypto, void *payload_auth);
+ int (*query)(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info);
+
+ /* Encrypt/decrypt/sign data */
+ int (*eds_op)(struct kernel_pkey_params *params,
+ const void *in, void *out);
+
/* Verify the signature on a key of this subtype (optional) */
int (*verify_signature)(const struct key *key,
const struct public_key_signature *sig);
diff --git a/security/keys/trusted.h b/include/keys/trusted.h
similarity index 86%
rename from security/keys/trusted.h
rename to include/keys/trusted.h
index 8d5fe9e..adbcb68 100644
--- a/security/keys/trusted.h
+++ b/include/keys/trusted.h
@@ -3,7 +3,7 @@
#define __TRUSTED_KEY_H
/* implementation specific TPM constants */
-#define MAX_BUF_SIZE 512
+#define MAX_BUF_SIZE 1024
#define TPM_GETRANDOM_SIZE 14
#define TPM_OSAP_SIZE 36
#define TPM_OIAP_SIZE 10
@@ -36,6 +36,18 @@ enum {
SRK_keytype = 4
};
+int TSS_authhmac(unsigned char *digest, const unsigned char *key,
+ unsigned int keylen, unsigned char *h1,
+ unsigned char *h2, unsigned char h3, ...);
+int TSS_checkhmac1(unsigned char *buffer,
+ const uint32_t command,
+ const unsigned char *ononce,
+ const unsigned char *key,
+ unsigned int keylen, ...);
+
+int trusted_tpm_send(unsigned char *cmd, size_t buflen);
+int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce);
+
#define TPM_DEBUG 0
#if TPM_DEBUG
diff --git a/include/linux/adxl.h b/include/linux/adxl.h
index 2a629ac..2d29f55 100644
--- a/include/linux/adxl.h
+++ b/include/linux/adxl.h
@@ -7,7 +7,12 @@
#ifndef _LINUX_ADXL_H
#define _LINUX_ADXL_H
+#ifdef CONFIG_ACPI_ADXL
const char * const *adxl_get_component_names(void);
int adxl_decode(u64 addr, u64 component_values[]);
+#else
+static inline const char * const *adxl_get_component_names(void) { return NULL; }
+static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; }
+#endif
#endif /* _LINUX_ADXL_H */
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 2c9756b..b248805 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -62,13 +62,19 @@
/* Error Codes */
enum virtchnl_status_code {
VIRTCHNL_STATUS_SUCCESS = 0,
- VIRTCHNL_ERR_PARAM = -5,
+ VIRTCHNL_STATUS_ERR_PARAM = -5,
+ VIRTCHNL_STATUS_ERR_NO_MEMORY = -18,
VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH = -38,
VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR = -39,
VIRTCHNL_STATUS_ERR_INVALID_VF_ID = -40,
- VIRTCHNL_STATUS_NOT_SUPPORTED = -64,
+ VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR = -53,
+ VIRTCHNL_STATUS_ERR_NOT_SUPPORTED = -64,
};
+/* Backward compatibility */
+#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM
+#define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED
+
#define VIRTCHNL_LINK_SPEED_100MB_SHIFT 0x1
#define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2
#define VIRTCHNL_LINK_SPEED_10GB_SHIFT 0x3
@@ -831,7 +837,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
case VIRTCHNL_OP_EVENT:
case VIRTCHNL_OP_UNKNOWN:
default:
- return VIRTCHNL_ERR_PARAM;
+ return VIRTCHNL_STATUS_ERR_PARAM;
}
/* few more checks */
if (err_msg_format || valid_len != msglen)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b47c7f7..056fb62 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -503,31 +503,23 @@ do { \
disk_devt((bio)->bi_disk)
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
#else
-static inline int bio_associate_blkg_from_page(struct bio *bio,
- struct page *page) { return 0; }
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+ struct page *page) { return 0; }
#endif
#ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-int bio_associate_blkg_from_css(struct bio *bio,
- struct cgroup_subsys_state *css);
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkg_association(struct bio *dst, struct bio *src);
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
#else /* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkg_from_css(struct bio *bio,
- struct cgroup_subsys_state *css)
-{ return 0; }
-static inline int bio_associate_create_blkg(struct request_queue *q,
- struct bio *bio) { return 0; }
-static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
-{ return 0; }
+static inline int bio_associate_blkcg(struct bio *bio,
+ struct cgroup_subsys_state *blkcg_css) { return 0; }
static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkg_association(struct bio *dst,
- struct bio *src) { }
+static inline void bio_clone_blkcg_association(struct bio *dst,
+ struct bio *src) { }
#endif /* CONFIG_BLK_CGROUP */
#ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1e76cee..6d766a1 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@ struct blkcg_gq {
struct request_list rl;
/* reference count */
- struct percpu_ref refcnt;
+ atomic_t refcnt;
/* is this blkg online? protected by both blkcg and q locks */
bool online;
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
- struct request_queue *q);
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
- struct cgroup_subsys_state *css;
-
- css = kthread_blkcg();
- if (css)
- return css;
- return task_css(current, io_cgrp_id);
-}
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct blkcg, css) : NULL;
}
-/**
- * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
- *
- * DO NOT USE.
- * There is a flaw using this version of the function. In particular, this was
- * used in a broken paradigm where association was called on the given css. It
- * is possible though that the returned css from task_css() is in the process
- * of dying due to migration of the current task. So it is improper to assume
- * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will
- * take additional work to handle more gracefully.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
- if (bio && bio->bi_blkg)
- return bio->bi_blkg->blkcg;
- return css_to_blkcg(blkcg_css());
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, NULL if not associated.
- * Callers are expected to either handle NULL or know association has been
- * done prior to calling this.
- */
static inline struct blkcg *bio_blkcg(struct bio *bio)
{
- if (bio && bio->bi_blkg)
- return bio->bi_blkg->blkcg;
- return NULL;
+ struct cgroup_subsys_state *css;
+
+ if (bio && bio->bi_css)
+ return css_to_blkcg(bio->bi_css);
+ css = kthread_blkcg();
+ if (css)
+ return css_to_blkcg(css);
+ return css_to_blkcg(task_css(current, io_cgrp_id));
}
static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
*/
static inline void blkg_get(struct blkcg_gq *blkg)
{
- percpu_ref_get(&blkg->refcnt);
+ WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+ atomic_inc(&blkg->refcnt);
}
/**
- * blkg_tryget - try and get a blkg reference
+ * blkg_try_get - try and get a blkg reference
* @blkg: blkg to get
*
* This is for use when doing an RCU lookup of the blkg. We may be in the midst
* of freeing this blkg, so we can only use it if the refcnt is not zero.
*/
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
{
- return percpu_ref_tryget(&blkg->refcnt);
+ if (atomic_inc_not_zero(&blkg->refcnt))
+ return blkg;
+ return NULL;
}
-/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
- * the blkg that it did association with as it may not be the passed in blkg.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
- while (!percpu_ref_tryget(&blkg->refcnt))
- blkg = blkg->parent;
- return blkg;
-}
+void __blkg_release_rcu(struct rcu_head *rcu);
/**
* blkg_put - put a blkg reference
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
*/
static inline void blkg_put(struct blkcg_gq *blkg)
{
- percpu_ref_put(&blkg->refcnt);
+ WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+ if (atomic_dec_and_test(&blkg->refcnt))
+ call_rcu(&blkg->rcu_head, __blkg_release_rcu);
}
/**
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
rcu_read_lock();
- if (bio && bio->bi_blkg) {
- blkcg = bio->bi_blkg->blkcg;
- if (blkcg == &blkcg_root)
- goto rl_use_root;
+ blkcg = bio_blkcg(bio);
- blkg_get(bio->bi_blkg);
- rcu_read_unlock();
- return &bio->bi_blkg->rl;
- }
-
- blkcg = css_to_blkcg(blkcg_css());
+ /* bypass blkg lookup and use @q->root_rl directly for root */
if (blkcg == &blkcg_root)
- goto rl_use_root;
-
- blkg = blkg_lookup(blkcg, q);
- if (unlikely(!blkg))
- blkg = __blkg_lookup_create(blkcg, q);
-
- if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
- goto rl_use_root;
-
- rcu_read_unlock();
- return &blkg->rl;
+ goto root_rl;
/*
- * Each blkg has its own request_list, however, the root blkcg
- * uses the request_queue's root_rl. This is to avoid most
- * overhead for the root blkcg.
+ * Try to use blkg->rl. blkg lookup may fail under memory pressure
+ * or if either the blkcg or queue is going away. Fall back to
+ * root_rl in such cases.
*/
-rl_use_root:
+ blkg = blkg_lookup(blkcg, q);
+ if (unlikely(!blkg))
+ goto root_rl;
+
+ blkg_get(blkg);
+ rcu_read_unlock();
+ return &blkg->rl;
+root_rl:
rcu_read_unlock();
return &q->root_rl;
}
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
struct bio *bio) { return false; }
#endif
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
- bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio)
{
+ struct blkcg *blkcg;
struct blkcg_gq *blkg;
bool throtl = false;
rcu_read_lock();
+ blkcg = bio_blkcg(bio);
- bio_associate_create_blkg(q, bio);
- blkg = bio->bi_blkg;
+ /* associate blkcg if bio hasn't attached one */
+ bio_associate_blkcg(bio, &blkcg->css);
+
+ blkg = blkg_lookup(blkcg, q);
+ if (unlikely(!blkg)) {
+ spin_lock_irq(q->queue_lock);
+ blkg = blkg_lookup_create(blkcg, q);
+ if (IS_ERR(blkg))
+ blkg = NULL;
+ spin_unlock_irq(q->queue_lock);
+ }
throtl = blk_throtl_bio(q, blkg, bio);
if (!throtl) {
+ blkg = blkg ?: q->root_blkg;
/*
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
* is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
}
- blkcg_bio_issue_init(bio);
-
rcu_read_unlock();
return !throtl;
}
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio) { return true; }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 093a818..1dcf652 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@ struct bio {
* release. Read comment on top of bio_associate_current().
*/
struct io_context *bi_ioc;
+ struct cgroup_subsys_state *bi_css;
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9e8056e..d93e897 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -51,6 +51,9 @@ struct bpf_reg_state {
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
+
+ /* Max size from any of the above. */
+ unsigned long raw;
};
/* Fixed part of pointer offset, pointer types only */
s32 off;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9968332..9d12757 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
- struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
struct cgroup_subsys *ss);
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index b1ce500..3e7dafb 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -21,8 +21,6 @@
#define __SANITIZE_ADDRESS__
#endif
-#define __no_sanitize_address __attribute__((no_sanitize("address")))
-
/*
* Not all versions of clang implement the the type-generic versions
* of the builtin overflow checkers. Fortunately, clang implements
@@ -41,6 +39,3 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
-#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#define __assume_aligned(a, ...) \
- __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 90ddfef..c0f5db3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -68,31 +68,20 @@
*/
#define uninitialized_var(x) x = x
-#ifdef __CHECKER__
-#define __must_be_array(a) 0
-#else
-/* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#endif
-
#ifdef RETPOLINE
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#define __noretpoline __attribute__((__indirect_branch__("keep")))
#endif
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
-#define __optimize(level) __attribute__((__optimize__(level)))
-
#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-#ifndef __CHECKER__
-#define __compiletime_warning(message) __attribute__((warning(message)))
-#define __compiletime_error(message) __attribute__((error(message)))
+#define __compiletime_warning(message) __attribute__((__warning__(message)))
+#define __compiletime_error(message) __attribute__((__error__(message)))
-#ifdef LATENT_ENTROPY_PLUGIN
+#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
#define __latent_entropy __attribute__((latent_entropy))
#endif
-#endif /* __CHECKER__ */
/*
* calling noreturn functions, __builtin_unreachable() and __builtin_trap()
@@ -107,10 +96,6 @@
* Mark a position in code as unreachable. This can be used to
* suppress control flow warnings after asm blocks that transfer
* control elsewhere.
- *
- * Early snapshots of gcc 4.5 don't support this and we can't detect
- * this in the preprocessor, but we can live with this because they're
- * unreleased. Really, we need to have autoconf for the kernel.
*/
#define unreachable() \
do { \
@@ -119,9 +104,6 @@
__builtin_unreachable(); \
} while (0)
-/* Mark a function definition as prohibited from being cloned. */
-#define __noclone __attribute__((__noclone__, __optimize__("no-tracer")))
-
#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
#define __randomize_layout __attribute__((randomize_layout))
#define __no_randomize_layout __attribute__((no_randomize_layout))
@@ -131,32 +113,6 @@
#endif
/*
- * When used with Link Time Optimization, gcc can optimize away C functions or
- * variables which are referenced only from assembly code. __visible tells the
- * optimizer that something else uses this function or variable, thus preventing
- * this.
- */
-#define __visible __attribute__((externally_visible))
-
-/* gcc version specific checks */
-
-#if GCC_VERSION >= 40900 && !defined(__CHECKER__)
-/*
- * __assume_aligned(n, k): Tell the optimizer that the returned
- * pointer can be assumed to be k modulo n. The second argument is
- * optional (default 0), so we use a variadic macro to make the
- * shorthand.
- *
- * Beware: Do not apply this to functions which may return
- * ERR_PTRs. Also, it is probably unwise to apply it to functions
- * returning extra information in the low bits (but in that case the
- * compiler should see some alignment anyway, when the return value is
- * massaged by 'flags = ptr & 3; ptr &= ~3;').
- */
-#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
-#endif
-
-/*
* GCC 'asm goto' miscompiles certain code sequences:
*
* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
@@ -187,39 +143,22 @@
#define KASAN_ABI_VERSION 3
#endif
-#if GCC_VERSION >= 40902
/*
- * Tell the compiler that address safety instrumentation (KASAN)
- * should not be applied to that function.
- * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
+ * Because __no_sanitize_address conflicts with inlining:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
+ * we do one or the other.
*/
-#define __no_sanitize_address __attribute__((no_sanitize_address))
#ifdef CONFIG_KASAN
#define __no_sanitize_address_or_inline \
__no_sanitize_address __maybe_unused notrace
#else
#define __no_sanitize_address_or_inline inline
#endif
-#endif
#if GCC_VERSION >= 50100
-/*
- * Mark structures as requiring designated initializers.
- * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
- */
-#define __designated_init __attribute__((designated_init))
#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
#endif
-#if !defined(__noclone)
-#define __noclone /* not needed */
-#endif
-
-#if !defined(__no_sanitize_address)
-#define __no_sanitize_address
-#define __no_sanitize_address_or_inline inline
-#endif
-
/*
* Turn individual warnings and errors on and off locally, depending
* on version.
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 4c7f9be..517bd14 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -29,17 +29,8 @@
*/
#define OPTIMIZER_HIDE_VAR(var) barrier()
-/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
-#define __must_be_array(a) 0
-
#endif
/* icc has this, but it's called _bswap16 */
#define __HAVE_BUILTIN_BSWAP16__
#define __builtin_bswap16 _bswap16
-
-/* The following are for compatibility with GCC, from compiler-gcc.h,
- * and may be redefined here because they should not be shared with other
- * compilers, like clang.
- */
-#define __visible __attribute__((externally_visible))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4170fce..18c80cf 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -23,8 +23,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#define __branch_check__(x, expect, is_constant) ({ \
long ______r; \
static struct ftrace_likely_data \
- __attribute__((__aligned__(4))) \
- __attribute__((section("_ftrace_annotated_branch"))) \
+ __aligned(4) \
+ __section("_ftrace_annotated_branch") \
______f = { \
.data.func = __func__, \
.data.file = __FILE__, \
@@ -59,8 +59,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
({ \
int ______r; \
static struct ftrace_branch_data \
- __attribute__((__aligned__(4))) \
- __attribute__((section("_ftrace_branch"))) \
+ __aligned(4) \
+ __section("_ftrace_branch") \
______f = { \
.func = __func__, \
.file = __FILE__, \
@@ -115,7 +115,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
# define ASM_UNREACHABLE
#endif
#ifndef unreachable
-# define unreachable() do { annotate_reachable(); do { } while (1); } while (0)
+# define unreachable() do { \
+ annotate_unreachable(); \
+ __builtin_unreachable(); \
+} while (0)
#endif
/*
@@ -137,7 +140,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
extern typeof(sym) sym; \
static const unsigned long __kentry_##sym \
__used \
- __attribute__((section("___kentry" "+" #sym ), used)) \
+ __section("___kentry" "+" #sym ) \
= (unsigned long)&sym;
#endif
@@ -278,7 +281,7 @@ unsigned long read_word_at_a_time(const void *addr)
* visible to the compiler.
*/
#define __ADDRESSABLE(sym) \
- static void * __attribute__((section(".discard.addressable"), used)) \
+ static void * __section(".discard.addressable") __used \
__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
/**
@@ -331,10 +334,6 @@ static inline void *offset_to_ptr(const int *off)
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
-#ifndef __optimize
-# define __optimize(level)
-#endif
-
/* Compile time object size, -1 for unknown */
#ifndef __compiletime_object_size
# define __compiletime_object_size(obj) -1
@@ -376,4 +375,7 @@ static inline void *offset_to_ptr(const int *off)
compiletime_assert(__native_word(t), \
"Need native word sized stores/loads for atomicity.")
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+
#endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
new file mode 100644
index 0000000..6b28c1b
--- /dev/null
+++ b/include/linux/compiler_attributes.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COMPILER_ATTRIBUTES_H
+#define __LINUX_COMPILER_ATTRIBUTES_H
+
+/*
+ * The attributes in this file are unconditionally defined and they directly
+ * map to compiler attribute(s) -- except those that are optional.
+ *
+ * Any other "attributes" (i.e. those that depend on a configuration option,
+ * on a compiler, on an architecture, on plugins, on other attributes...)
+ * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h).
+ *
+ * This file is meant to be sorted (by actual attribute name,
+ * not by #define identifier). Use the __attribute__((__name__)) syntax
+ * (i.e. with underscores) to avoid future collisions with other macros.
+ * If an attribute is optional, state the reason in the comment.
+ */
+
+/*
+ * To check for optional attributes, we use __has_attribute, which is supported
+ * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support
+ * 4.6 <= gcc < 5, we implement __has_attribute by hand.
+ *
+ * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__
+ * depending on the compiler used to build it; however, these attributes have
+ * no semantic effects for sparse, so it does not matter. Also note that,
+ * in order to avoid sparse's warnings, even the unsupported ones must be
+ * defined to 0.
+ */
+#ifndef __has_attribute
+# define __has_attribute(x) __GCC4_has_attribute_##x
+# define __GCC4_has_attribute___assume_aligned__ (__GNUC_MINOR__ >= 9)
+# define __GCC4_has_attribute___designated_init__ 0
+# define __GCC4_has_attribute___externally_visible__ 1
+# define __GCC4_has_attribute___noclone__ 1
+# define __GCC4_has_attribute___optimize__ 1
+# define __GCC4_has_attribute___nonstring__ 0
+# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+#endif
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute
+ */
+#define __alias(symbol) __attribute__((__alias__(#symbol)))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-aligned-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-aligned-type-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-aligned-variable-attribute
+ */
+#define __aligned(x) __attribute__((__aligned__(x)))
+#define __aligned_largest __attribute__((__aligned__))
+
+/*
+ * Note: users of __always_inline currently do not write "inline" themselves,
+ * which seems to be required by gcc to apply the attribute according
+ * to its docs (and also "warning: always_inline function might not be
+ * inlinable [-Wattributes]" is emitted).
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-always_005finline-function-attribute
+ * clang: mentioned
+ */
+#define __always_inline inline __attribute__((__always_inline__))
+
+/*
+ * The second argument is optional (default 0), so we use a variadic macro
+ * to make the shorthand.
+ *
+ * Beware: Do not apply this to functions which may return
+ * ERR_PTRs. Also, it is probably unwise to apply it to functions
+ * returning extra information in the low bits (but in that case the
+ * compiler should see some alignment anyway, when the return value is
+ * massaged by 'flags = ptr & 3; ptr &= ~3;').
+ *
+ * Optional: only supported since gcc >= 4.9
+ * Optional: not supported by icc
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned
+ */
+#if __has_attribute(__assume_aligned__)
+# define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
+#else
+# define __assume_aligned(a, ...)
+#endif
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
+ */
+#define __cold __attribute__((__cold__))
+
+/*
+ * Note the long name.
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-const-function-attribute
+ */
+#define __attribute_const__ __attribute__((__const__))
+
+/*
+ * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated'
+ * attribute warnings entirely and for good") for more information.
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-deprecated-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-deprecated-type-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-deprecated-variable-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Enumerator-Attributes.html#index-deprecated-enumerator-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#deprecated
+ */
+#define __deprecated
+
+/*
+ * Optional: only supported since gcc >= 5.1
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute
+ */
+#if __has_attribute(__designated_init__)
+# define __designated_init __attribute__((__designated_init__))
+#else
+# define __designated_init
+#endif
+
+/*
+ * Optional: not supported by clang
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute
+ */
+#if __has_attribute(__externally_visible__)
+# define __visible __attribute__((__externally_visible__))
+#else
+# define __visible
+#endif
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-format-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#format
+ */
+#define __printf(a, b) __attribute__((__format__(printf, a, b)))
+#define __scanf(a, b) __attribute__((__format__(scanf, a, b)))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-gnu_005finline-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#gnu-inline
+ */
+#define __gnu_inline __attribute__((__gnu_inline__))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute
+ */
+#define __malloc __attribute__((__malloc__))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-mode-type-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-mode-variable-attribute
+ */
+#define __mode(x) __attribute__((__mode__(x)))
+
+/*
+ * Optional: not supported by clang
+ * Note: icc does not recognize gcc's no-tracer
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noclone-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-optimize-function-attribute
+ */
+#if __has_attribute(__noclone__)
+# if __has_attribute(__optimize__)
+# define __noclone __attribute__((__noclone__, __optimize__("no-tracer")))
+# else
+# define __noclone __attribute__((__noclone__))
+# endif
+#else
+# define __noclone
+#endif
+
+/*
+ * Note the missing underscores.
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute
+ * clang: mentioned
+ */
+#define noinline __attribute__((__noinline__))
+
+/*
+ * Optional: only supported since gcc >= 8
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute
+ */
+#if __has_attribute(__nonstring__)
+# define __nonstring __attribute__((__nonstring__))
+#else
+# define __nonstring
+#endif
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#id1
+ */
+#define __noreturn __attribute__((__noreturn__))
+
+/*
+ * Optional: only supported since gcc >= 4.8
+ * Optional: not supported by icc
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fsanitize_005faddress-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#no-sanitize-address-no-address-safety-analysis
+ */
+#if __has_attribute(__no_sanitize_address__)
+# define __no_sanitize_address __attribute__((__no_sanitize_address__))
+#else
+# define __no_sanitize_address
+#endif
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-packed-type-attribute
+ * clang: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-packed-variable-attribute
+ */
+#define __packed __attribute__((__packed__))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-pure-function-attribute
+ */
+#define __pure __attribute__((__pure__))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-section-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-section-variable-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate
+ */
+#define __section(S) __attribute__((__section__(#S)))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-unused-label-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#maybe-unused-unused
+ */
+#define __always_unused __attribute__((__unused__))
+#define __maybe_unused __attribute__((__unused__))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-used-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-used-variable-attribute
+ */
+#define __used __attribute__((__used__))
+
+/*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
+ */
+#define __weak __attribute__((__weak__))
+
+#endif /* __LINUX_COMPILER_ATTRIBUTES_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 97cfe29..3439d7d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_COMPILER_TYPES_H
#define __LINUX_COMPILER_TYPES_H
@@ -54,6 +55,9 @@ extern void __chk_io_ptr(const volatile void __iomem *);
#ifdef __KERNEL__
+/* Attributes */
+#include <linux/compiler_attributes.h>
+
/* Compiler specific macros. */
#ifdef __clang__
#include <linux/compiler-clang.h>
@@ -78,12 +82,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
#include <asm/compiler.h>
#endif
-/*
- * Generic compiler-independent macros required for kernel
- * build go below this comment. Actual compiler/compiler version
- * specific implementations come from the above header files
- */
-
struct ftrace_branch_data {
const char *func;
const char *file;
@@ -106,10 +104,6 @@ struct ftrace_likely_data {
unsigned long constant;
};
-/* Don't. Just don't. */
-#define __deprecated
-#define __deprecated_for_modules
-
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
@@ -119,10 +113,6 @@ struct ftrace_likely_data {
* compilers. We don't consider that to be an error, so set them to nothing.
* For example, some of them are for compiler specific plugins.
*/
-#ifndef __designated_init
-# define __designated_init
-#endif
-
#ifndef __latent_entropy
# define __latent_entropy
#endif
@@ -140,17 +130,6 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif
-#ifndef __visible
-#define __visible
-#endif
-
-/*
- * Assume alignment of return value.
- */
-#ifndef __assume_aligned
-#define __assume_aligned(a, ...)
-#endif
-
/* Are two types/vars the same type (ignoring qualifiers)? */
#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
@@ -159,14 +138,6 @@ struct ftrace_likely_data {
(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
-#ifndef __attribute_const__
-#define __attribute_const__ __attribute__((__const__))
-#endif
-
-#ifndef __noclone
-#define __noclone
-#endif
-
/* Helpers for emitting diagnostics in pragmas. */
#ifndef __diag
#define __diag(string)
@@ -186,43 +157,16 @@ struct ftrace_likely_data {
#define __diag_error(compiler, version, option, comment) \
__diag_ ## compiler(version, error, option)
-/*
- * From the GCC manual:
- *
- * Many functions have no effects except the return value and their
- * return value depends only on the parameters and/or global
- * variables. Such a function can be subject to common subexpression
- * elimination and loop optimization just as an arithmetic operator
- * would be.
- * [...]
- */
-#define __pure __attribute__((pure))
-#define __aligned(x) __attribute__((aligned(x)))
-#define __printf(a, b) __attribute__((format(printf, a, b)))
-#define __scanf(a, b) __attribute__((format(scanf, a, b)))
-#define __maybe_unused __attribute__((unused))
-#define __always_unused __attribute__((unused))
-#define __mode(x) __attribute__((mode(x)))
-#define __malloc __attribute__((__malloc__))
-#define __used __attribute__((__used__))
-#define __noreturn __attribute__((noreturn))
-#define __packed __attribute__((packed))
-#define __weak __attribute__((weak))
-#define __alias(symbol) __attribute__((alias(#symbol)))
-#define __cold __attribute__((cold))
-#define __section(S) __attribute__((__section__(#S)))
-
-
#ifdef CONFIG_ENABLE_MUST_CHECK
-#define __must_check __attribute__((warn_unused_result))
+#define __must_check __attribute__((__warn_unused_result__))
#else
#define __must_check
#endif
-#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
+#if defined(CC_USING_HOTPATCH)
#define notrace __attribute__((hotpatch(0, 0)))
#else
-#define notrace __attribute__((no_instrument_function))
+#define notrace __attribute__((__no_instrument_function__))
#endif
/*
@@ -231,23 +175,11 @@ struct ftrace_likely_data {
* stack and frame pointer being set up and there is no chance to
* restore the lr register to the value before mcount was called.
*/
-#define __naked __attribute__((naked)) notrace
+#define __naked __attribute__((__naked__)) notrace
#define __compiler_offsetof(a, b) __builtin_offsetof(a, b)
/*
- * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
- * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
- * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
- * defined so the gnu89 semantics are the default.
- */
-#ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline __attribute__((gnu_inline))
-#else
-# define __gnu_inline
-#endif
-
-/*
* Force always-inline if the user requests it so via the .config.
* GCC does not warn about unused static inline functions for
* -Wunused-function. This turns out to avoid the need for complex #ifdef
@@ -258,22 +190,20 @@ struct ftrace_likely_data {
* semantics rather than c99. This prevents multiple symbol definition errors
* of extern inline functions at link time.
* A lot of inline functions can cause havoc with function tracing.
+ * Do not use __always_inline here, since currently it expands to inline again
+ * (which would break users of __always_inline).
*/
#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
!defined(CONFIG_OPTIMIZE_INLINING)
-#define inline \
- inline __attribute__((always_inline, unused)) notrace __gnu_inline
+#define inline inline __attribute__((__always_inline__)) __gnu_inline \
+ __maybe_unused notrace
#else
-#define inline inline __attribute__((unused)) notrace __gnu_inline
+#define inline inline __gnu_inline \
+ __maybe_unused notrace
#endif
#define __inline__ inline
-#define __inline inline
-#define noinline __attribute__((noinline))
-
-#ifndef __always_inline
-#define __always_inline inline __attribute__((always_inline))
-#endif
+#define __inline inline
/*
* Rather then using noinline to prevent stack consumption, use
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8252df3..c95c080 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1752,6 +1752,25 @@ struct block_device_operations;
#define NOMMU_VMFLAGS \
(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
+/*
+ * These flags control the behavior of the remap_file_range function pointer.
+ * If it is called with len == 0 that means "remap to end of source file".
+ * See Documentation/filesystems/vfs.txt for more details about this call.
+ *
+ * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
+ */
+#define REMAP_FILE_DEDUP (1 << 0)
+#define REMAP_FILE_CAN_SHORTEN (1 << 1)
+
+/*
+ * These flags signal that the caller is ok with altering various aspects of
+ * the behavior of the remap operation. The changes must be made by the
+ * implementation; the vfs remap helper functions can take advantage of them.
+ * Flags in this category exist to preserve the quirky behavior of the hoisted
+ * btrfs clone/dedupe ioctls.
+ */
+#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
struct iov_iter;
@@ -1790,10 +1809,9 @@ struct file_operations {
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
- int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
- u64);
- int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
- u64);
+ loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;
@@ -1856,21 +1874,21 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
unsigned long, loff_t *, rwf_t);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
- struct inode *inode_out, loff_t pos_out,
- u64 *len, bool is_dedupe);
-extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
- struct inode *dest, loff_t destoff,
- loff_t len, bool *is_same);
+extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *count,
+ unsigned int remap_flags);
+extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
+extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
extern int vfs_dedupe_file_range(struct file *file,
struct file_dedupe_range *same);
-extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
- struct file *dst_file, loff_t dst_pos,
- u64 len);
+extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+ struct file *dst_file, loff_t dst_pos,
+ loff_t len, unsigned int remap_flags);
struct super_operations {
@@ -2998,6 +3016,9 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *count, unsigned int remap_flags);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 24bcc5ee..76f8db0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
}
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr,
- int node, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
- alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+ int node);
#else
#define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
- alloc_pages(gfp_mask, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
alloc_pages(gfp_mask, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define alloc_page_vma(gfp_mask, vma, addr) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
+ alloc_pages_vma(gfp_mask, 0, vma, addr, node)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index c759d1c..a64f21a9 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -37,7 +37,9 @@ struct in_device {
unsigned long mr_v1_seen;
unsigned long mr_v2_seen;
unsigned long mr_maxdelay;
- unsigned char mr_qrv;
+ unsigned long mr_qi; /* Query Interval */
+ unsigned long mr_qri; /* Query Response Interval */
+ unsigned char mr_qrv; /* Query Robustness Variable */
unsigned char mr_gq_running;
unsigned char mr_ifc_count;
struct timer_list mr_gq_timer; /* general query timer */
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 05d8fb5..bc9af55 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -17,6 +17,9 @@
#ifdef CONFIG_KEYS
+struct kernel_pkey_query;
+struct kernel_pkey_params;
+
/*
* key under-construction record
* - passed to the request_key actor if supplied
@@ -155,6 +158,14 @@ struct key_type {
*/
struct key_restriction *(*lookup_restriction)(const char *params);
+ /* Asymmetric key accessor functions. */
+ int (*asym_query)(const struct kernel_pkey_params *params,
+ struct kernel_pkey_query *info);
+ int (*asym_eds_op)(struct kernel_pkey_params *params,
+ const void *in, void *out);
+ int (*asym_verify_signature)(struct kernel_pkey_params *params,
+ const void *in, const void *in2);
+
/* internal fields */
struct list_head link; /* link in types list */
struct lock_class_key lock_class; /* key->sem lock class */
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
new file mode 100644
index 0000000..c7c48c7
--- /dev/null
+++ b/include/linux/keyctl.h
@@ -0,0 +1,46 @@
+/* keyctl kernel bits
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef __LINUX_KEYCTL_H
+#define __LINUX_KEYCTL_H
+
+#include <uapi/linux/keyctl.h>
+
+struct kernel_pkey_query {
+ __u32 supported_ops; /* Which ops are supported */
+ __u32 key_size; /* Size of the key in bits */
+ __u16 max_data_size; /* Maximum size of raw data to sign in bytes */
+ __u16 max_sig_size; /* Maximum size of signature in bytes */
+ __u16 max_enc_size; /* Maximum size of encrypted blob in bytes */
+ __u16 max_dec_size; /* Maximum size of decrypted blob in bytes */
+};
+
+enum kernel_pkey_operation {
+ kernel_pkey_encrypt,
+ kernel_pkey_decrypt,
+ kernel_pkey_sign,
+ kernel_pkey_verify,
+};
+
+struct kernel_pkey_params {
+ struct key *key;
+ const char *encoding; /* Encoding (eg. "oaep" or "raw" for none) */
+ const char *hash_algo; /* Digest algorithm used (eg. "sha1") or NULL if N/A */
+ char *info; /* Modified info string to be released later */
+ __u32 in_len; /* Input data size */
+ union {
+ __u32 out_len; /* Output buffer size (enc/dec/sign) */
+ __u32 in2_len; /* 2nd input data size (verify) */
+ };
+ enum kernel_pkey_operation op : 8;
+};
+
+#endif /* __LINUX_KEYCTL_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5228c62..bac395f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
struct mempolicy *get_task_policy(struct task_struct *p);
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
unsigned long addr);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+ unsigned long addr);
bool vma_policy_mof(struct vm_area_struct *vma);
extern void numa_default_policy(void);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index f35c7bf..0096a05 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#ifdef CONFIG_TREE_SRCU
#define _SRCU_NOTIFIER_HEAD(name, mod) \
- static DEFINE_PER_CPU(struct srcu_data, \
- name##_head_srcu_data); \
+ static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
mod struct srcu_notifier_head name = \
SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
new file mode 100644
index 0000000..53dfc25
--- /dev/null
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PLATFORM_DATA_X86_ASUS_WMI_H
+#define __PLATFORM_DATA_X86_ASUS_WMI_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+/* WMI Methods */
+#define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */
+#define ASUS_WMI_METHODID_SFBD 0x44424653 /* Set First Boot Device */
+#define ASUS_WMI_METHODID_GLCD 0x44434C47 /* Get LCD status */
+#define ASUS_WMI_METHODID_GPID 0x44495047 /* Get Panel ID?? (Resol) */
+#define ASUS_WMI_METHODID_QMOD 0x444F4D51 /* Quiet MODe */
+#define ASUS_WMI_METHODID_SPLV 0x4C425053 /* Set Panel Light Value */
+#define ASUS_WMI_METHODID_AGFN 0x4E464741 /* FaN? */
+#define ASUS_WMI_METHODID_SFUN 0x4E554653 /* FUNCtionalities */
+#define ASUS_WMI_METHODID_SDSP 0x50534453 /* Set DiSPlay output */
+#define ASUS_WMI_METHODID_GDSP 0x50534447 /* Get DiSPlay output */
+#define ASUS_WMI_METHODID_DEVP 0x50564544 /* DEVice Policy */
+#define ASUS_WMI_METHODID_OSVR 0x5256534F /* OS VeRsion */
+#define ASUS_WMI_METHODID_DSTS 0x53544344 /* Device STatuS */
+#define ASUS_WMI_METHODID_DSTS2 0x53545344 /* Device STatuS #2*/
+#define ASUS_WMI_METHODID_BSTS 0x53545342 /* Bios STatuS ? */
+#define ASUS_WMI_METHODID_DEVS 0x53564544 /* DEVice Set */
+#define ASUS_WMI_METHODID_CFVS 0x53564643 /* CPU Frequency Volt Set */
+#define ASUS_WMI_METHODID_KBFT 0x5446424B /* KeyBoard FilTer */
+#define ASUS_WMI_METHODID_INIT 0x54494E49 /* INITialize */
+#define ASUS_WMI_METHODID_HKEY 0x59454B48 /* Hot KEY ?? */
+
+#define ASUS_WMI_UNSUPPORTED_METHOD 0xFFFFFFFE
+
+/* Wireless */
+#define ASUS_WMI_DEVID_HW_SWITCH 0x00010001
+#define ASUS_WMI_DEVID_WIRELESS_LED 0x00010002
+#define ASUS_WMI_DEVID_CWAP 0x00010003
+#define ASUS_WMI_DEVID_WLAN 0x00010011
+#define ASUS_WMI_DEVID_WLAN_LED 0x00010012
+#define ASUS_WMI_DEVID_BLUETOOTH 0x00010013
+#define ASUS_WMI_DEVID_GPS 0x00010015
+#define ASUS_WMI_DEVID_WIMAX 0x00010017
+#define ASUS_WMI_DEVID_WWAN3G 0x00010019
+#define ASUS_WMI_DEVID_UWB 0x00010021
+
+/* Leds */
+/* 0x000200XX and 0x000400XX */
+#define ASUS_WMI_DEVID_LED1 0x00020011
+#define ASUS_WMI_DEVID_LED2 0x00020012
+#define ASUS_WMI_DEVID_LED3 0x00020013
+#define ASUS_WMI_DEVID_LED4 0x00020014
+#define ASUS_WMI_DEVID_LED5 0x00020015
+#define ASUS_WMI_DEVID_LED6 0x00020016
+
+/* Backlight and Brightness */
+#define ASUS_WMI_DEVID_ALS_ENABLE 0x00050001 /* Ambient Light Sensor */
+#define ASUS_WMI_DEVID_BACKLIGHT 0x00050011
+#define ASUS_WMI_DEVID_BRIGHTNESS 0x00050012
+#define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021
+#define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */
+#define ASUS_WMI_DEVID_LIGHTBAR 0x00050025
+
+/* Misc */
+#define ASUS_WMI_DEVID_CAMERA 0x00060013
+
+/* Storage */
+#define ASUS_WMI_DEVID_CARDREADER 0x00080013
+
+/* Input */
+#define ASUS_WMI_DEVID_TOUCHPAD 0x00100011
+#define ASUS_WMI_DEVID_TOUCHPAD_LED 0x00100012
+
+/* Fan, Thermal */
+#define ASUS_WMI_DEVID_THERMAL_CTRL 0x00110011
+#define ASUS_WMI_DEVID_FAN_CTRL 0x00110012
+
+/* Power */
+#define ASUS_WMI_DEVID_PROCESSOR_STATE 0x00120012
+
+/* Deep S3 / Resume on LID open */
+#define ASUS_WMI_DEVID_LID_RESUME 0x00120031
+
+/* DSTS masks */
+#define ASUS_WMI_DSTS_STATUS_BIT 0x00000001
+#define ASUS_WMI_DSTS_UNKNOWN_BIT 0x00000002
+#define ASUS_WMI_DSTS_PRESENCE_BIT 0x00010000
+#define ASUS_WMI_DSTS_USER_BIT 0x00020000
+#define ASUS_WMI_DSTS_BIOS_BIT 0x00040000
+#define ASUS_WMI_DSTS_BRIGHTNESS_MASK 0x000000FF
+#define ASUS_WMI_DSTS_MAX_BRIGTH_MASK 0x0000FF00
+#define ASUS_WMI_DSTS_LIGHTBAR_MASK 0x0000000F
+
+#if IS_REACHABLE(CONFIG_ASUS_WMI)
+int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval);
+#else
+static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
+ u32 *retval)
+{
+ return -ENODEV;
+}
+#endif
+
+#endif /* __PLATFORM_DATA_X86_ASUS_WMI_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8f8a541..a51c13c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1200,6 +1200,11 @@ struct task_struct {
void *security;
#endif
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ unsigned long lowest_stack;
+ unsigned long prev_lowest_stack;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
new file mode 100644
index 0000000..3d5c327
--- /dev/null
+++ b/include/linux/stackleak.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STACKLEAK_H
+#define _LINUX_STACKLEAK_H
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+/*
+ * Check that the poison value points to the unused hole in the
+ * virtual memory map for your platform.
+ */
+#define STACKLEAK_POISON -0xBEEF
+#define STACKLEAK_SEARCH_DEPTH 128
+
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#include <asm/stacktrace.h>
+
+static inline void stackleak_task_init(struct task_struct *t)
+{
+ t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
+# ifdef CONFIG_STACKLEAK_METRICS
+ t->prev_lowest_stack = t->lowest_stack;
+# endif
+}
+
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
+#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
+static inline void stackleak_task_init(struct task_struct *t) { }
+#endif
+
+#endif
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 422b1c0..55ce99d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -21,15 +21,16 @@ struct kvec {
size_t iov_len;
};
-enum {
+enum iter_type {
ITER_IOVEC = 0,
ITER_KVEC = 2,
ITER_BVEC = 4,
ITER_PIPE = 8,
+ ITER_DISCARD = 16,
};
struct iov_iter {
- int type;
+ unsigned int type;
size_t iov_offset;
size_t count;
union {
@@ -47,6 +48,41 @@ struct iov_iter {
};
};
+static inline enum iter_type iov_iter_type(const struct iov_iter *i)
+{
+ return i->type & ~(READ | WRITE);
+}
+
+static inline bool iter_is_iovec(const struct iov_iter *i)
+{
+ return iov_iter_type(i) == ITER_IOVEC;
+}
+
+static inline bool iov_iter_is_kvec(const struct iov_iter *i)
+{
+ return iov_iter_type(i) == ITER_KVEC;
+}
+
+static inline bool iov_iter_is_bvec(const struct iov_iter *i)
+{
+ return iov_iter_type(i) == ITER_BVEC;
+}
+
+static inline bool iov_iter_is_pipe(const struct iov_iter *i)
+{
+ return iov_iter_type(i) == ITER_PIPE;
+}
+
+static inline bool iov_iter_is_discard(const struct iov_iter *i)
+{
+ return iov_iter_type(i) == ITER_DISCARD;
+}
+
+static inline unsigned char iov_iter_rw(const struct iov_iter *i)
+{
+ return i->type & (READ | WRITE);
+}
+
/*
* Total number of bytes covered by an iovec.
*
@@ -74,7 +110,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
}
#define iov_for_each(iov, iter, start) \
- if (!((start).type & (ITER_BVEC | ITER_PIPE))) \
+ if (iov_iter_type(start) == ITER_IOVEC || \
+ iov_iter_type(start) == ITER_KVEC) \
for (iter = (start); \
(iter).count && \
((iov = iov_iter_iovec(&(iter))), 1); \
@@ -181,14 +218,15 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
unsigned long iov_iter_alignment(const struct iov_iter *i);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
-void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
unsigned long nr_segs, size_t count);
-void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
unsigned long nr_segs, size_t count);
-void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
size_t count);
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
size_t maxsize, unsigned maxpages, size_t *start);
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
@@ -202,19 +240,6 @@ static inline size_t iov_iter_count(const struct iov_iter *i)
return i->count;
}
-static inline bool iter_is_iovec(const struct iov_iter *i)
-{
- return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
-}
-
-/*
- * Get one of READ or WRITE out of iter->type without any other flags OR'd in
- * with it.
- *
- * The ?: is just for type safety.
- */
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
-
/*
* Cap the iov_iter by given limit; note that the second argument is
* *not* the new size - it's upper limit for such. Passing it a value
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 738a0c2..fdfd04e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
*
* @bio is a part of the writeback in progress controlled by @wbc. Perform
* writeback specific initialization. This is used to apply the cgroup
- * writeback context. Must be called after the bio has been associated with
- * a device.
+ * writeback context.
*/
static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
{
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
* regular writeback instead of writing things out itself.
*/
if (wbc->wb)
- bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
+ bio_associate_blkcg(bio, wbc->wb->blkcg_css);
}
#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index e2695c4..ddbba83 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -13,7 +13,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp);
void unix_gc(void);
void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
-struct sock *unix_peer_get(struct sock *);
+struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_SIZE 256
#define UNIX_HASH_BITS 8
@@ -40,7 +40,7 @@ struct unix_skb_parms {
u32 consumed;
} __randomize_layout;
-#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
+#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index d0a341b..33d2918 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -54,6 +54,35 @@ enum afs_fs_operation {
afs_FS_StoreData64 = 65538, /* AFS Store file data */
afs_FS_GiveUpAllCallBacks = 65539, /* AFS Give up all our callbacks on a server */
afs_FS_GetCapabilities = 65540, /* AFS Get FS server capabilities */
+
+ yfs_FS_FetchData = 130, /* YFS Fetch file data */
+ yfs_FS_FetchACL = 64131, /* YFS Fetch file ACL */
+ yfs_FS_FetchStatus = 64132, /* YFS Fetch file status */
+ yfs_FS_StoreACL = 64134, /* YFS Store file ACL */
+ yfs_FS_StoreStatus = 64135, /* YFS Store file status */
+ yfs_FS_RemoveFile = 64136, /* YFS Remove a file */
+ yfs_FS_CreateFile = 64137, /* YFS Create a file */
+ yfs_FS_Rename = 64138, /* YFS Rename or move a file or directory */
+ yfs_FS_Symlink = 64139, /* YFS Create a symbolic link */
+ yfs_FS_Link = 64140, /* YFS Create a hard link */
+ yfs_FS_MakeDir = 64141, /* YFS Create a directory */
+ yfs_FS_RemoveDir = 64142, /* YFS Remove a directory */
+ yfs_FS_GetVolumeStatus = 64149, /* YFS Get volume status information */
+ yfs_FS_SetVolumeStatus = 64150, /* YFS Set volume status information */
+ yfs_FS_SetLock = 64156, /* YFS Request a file lock */
+ yfs_FS_ExtendLock = 64157, /* YFS Extend a file lock */
+ yfs_FS_ReleaseLock = 64158, /* YFS Release a file lock */
+ yfs_FS_Lookup = 64161, /* YFS lookup file in directory */
+ yfs_FS_FlushCPS = 64165,
+ yfs_FS_FetchOpaqueACL = 64168,
+ yfs_FS_WhoAmI = 64170,
+ yfs_FS_RemoveACL = 64171,
+ yfs_FS_RemoveFile2 = 64173,
+ yfs_FS_StoreOpaqueACL2 = 64174,
+ yfs_FS_InlineBulkStatus = 64536, /* YFS Fetch multiple file statuses with errors */
+ yfs_FS_FetchData64 = 64537, /* YFS Fetch file data */
+ yfs_FS_StoreData64 = 64538, /* YFS Store file data */
+ yfs_FS_UpdateSymlink = 64540,
};
enum afs_vl_operation {
@@ -84,6 +113,44 @@ enum afs_edit_dir_reason {
afs_edit_dir_for_unlink,
};
+enum afs_eproto_cause {
+ afs_eproto_bad_status,
+ afs_eproto_cb_count,
+ afs_eproto_cb_fid_count,
+ afs_eproto_file_type,
+ afs_eproto_ibulkst_cb_count,
+ afs_eproto_ibulkst_count,
+ afs_eproto_motd_len,
+ afs_eproto_offline_msg_len,
+ afs_eproto_volname_len,
+ afs_eproto_yvl_fsendpt4_len,
+ afs_eproto_yvl_fsendpt6_len,
+ afs_eproto_yvl_fsendpt_num,
+ afs_eproto_yvl_fsendpt_type,
+ afs_eproto_yvl_vlendpt4_len,
+ afs_eproto_yvl_vlendpt6_len,
+ afs_eproto_yvl_vlendpt_type,
+};
+
+enum afs_io_error {
+ afs_io_error_cm_reply,
+ afs_io_error_extract,
+ afs_io_error_fs_probe_fail,
+ afs_io_error_vl_lookup_fail,
+ afs_io_error_vl_probe_fail,
+};
+
+enum afs_file_error {
+ afs_file_error_dir_bad_magic,
+ afs_file_error_dir_big,
+ afs_file_error_dir_missing_page,
+ afs_file_error_dir_over_end,
+ afs_file_error_dir_small,
+ afs_file_error_dir_unmarked_ext,
+ afs_file_error_mntpt,
+ afs_file_error_writeback_fail,
+};
+
#endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
/*
@@ -119,7 +186,34 @@ enum afs_edit_dir_reason {
EM(afs_FS_FetchData64, "FS.FetchData64") \
EM(afs_FS_StoreData64, "FS.StoreData64") \
EM(afs_FS_GiveUpAllCallBacks, "FS.GiveUpAllCallBacks") \
- E_(afs_FS_GetCapabilities, "FS.GetCapabilities")
+ EM(afs_FS_GetCapabilities, "FS.GetCapabilities") \
+ EM(yfs_FS_FetchACL, "YFS.FetchACL") \
+ EM(yfs_FS_FetchStatus, "YFS.FetchStatus") \
+ EM(yfs_FS_StoreACL, "YFS.StoreACL") \
+ EM(yfs_FS_StoreStatus, "YFS.StoreStatus") \
+ EM(yfs_FS_RemoveFile, "YFS.RemoveFile") \
+ EM(yfs_FS_CreateFile, "YFS.CreateFile") \
+ EM(yfs_FS_Rename, "YFS.Rename") \
+ EM(yfs_FS_Symlink, "YFS.Symlink") \
+ EM(yfs_FS_Link, "YFS.Link") \
+ EM(yfs_FS_MakeDir, "YFS.MakeDir") \
+ EM(yfs_FS_RemoveDir, "YFS.RemoveDir") \
+ EM(yfs_FS_GetVolumeStatus, "YFS.GetVolumeStatus") \
+ EM(yfs_FS_SetVolumeStatus, "YFS.SetVolumeStatus") \
+ EM(yfs_FS_SetLock, "YFS.SetLock") \
+ EM(yfs_FS_ExtendLock, "YFS.ExtendLock") \
+ EM(yfs_FS_ReleaseLock, "YFS.ReleaseLock") \
+ EM(yfs_FS_Lookup, "YFS.Lookup") \
+ EM(yfs_FS_FlushCPS, "YFS.FlushCPS") \
+ EM(yfs_FS_FetchOpaqueACL, "YFS.FetchOpaqueACL") \
+ EM(yfs_FS_WhoAmI, "YFS.WhoAmI") \
+ EM(yfs_FS_RemoveACL, "YFS.RemoveACL") \
+ EM(yfs_FS_RemoveFile2, "YFS.RemoveFile2") \
+ EM(yfs_FS_StoreOpaqueACL2, "YFS.StoreOpaqueACL2") \
+ EM(yfs_FS_InlineBulkStatus, "YFS.InlineBulkStatus") \
+ EM(yfs_FS_FetchData64, "YFS.FetchData64") \
+ EM(yfs_FS_StoreData64, "YFS.StoreData64") \
+ E_(yfs_FS_UpdateSymlink, "YFS.UpdateSymlink")
#define afs_vl_operations \
EM(afs_VL_GetEntryByNameU, "VL.GetEntryByNameU") \
@@ -146,6 +240,40 @@ enum afs_edit_dir_reason {
EM(afs_edit_dir_for_symlink, "Symlnk") \
E_(afs_edit_dir_for_unlink, "Unlink")
+#define afs_eproto_causes \
+ EM(afs_eproto_bad_status, "BadStatus") \
+ EM(afs_eproto_cb_count, "CbCount") \
+ EM(afs_eproto_cb_fid_count, "CbFidCount") \
+ EM(afs_eproto_file_type, "FileTYpe") \
+ EM(afs_eproto_ibulkst_cb_count, "IBS.CbCount") \
+ EM(afs_eproto_ibulkst_count, "IBS.FidCount") \
+ EM(afs_eproto_motd_len, "MotdLen") \
+ EM(afs_eproto_offline_msg_len, "OfflineMsgLen") \
+ EM(afs_eproto_volname_len, "VolNameLen") \
+ EM(afs_eproto_yvl_fsendpt4_len, "YVL.FsEnd4Len") \
+ EM(afs_eproto_yvl_fsendpt6_len, "YVL.FsEnd6Len") \
+ EM(afs_eproto_yvl_fsendpt_num, "YVL.FsEndCount") \
+ EM(afs_eproto_yvl_fsendpt_type, "YVL.FsEndType") \
+ EM(afs_eproto_yvl_vlendpt4_len, "YVL.VlEnd4Len") \
+ EM(afs_eproto_yvl_vlendpt6_len, "YVL.VlEnd6Len") \
+ E_(afs_eproto_yvl_vlendpt_type, "YVL.VlEndType")
+
+#define afs_io_errors \
+ EM(afs_io_error_cm_reply, "CM_REPLY") \
+ EM(afs_io_error_extract, "EXTRACT") \
+ EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \
+ EM(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") \
+ E_(afs_io_error_vl_probe_fail, "VL_PROBE_FAIL")
+
+#define afs_file_errors \
+ EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \
+ EM(afs_file_error_dir_big, "DIR_BIG") \
+ EM(afs_file_error_dir_missing_page, "DIR_MISSING_PAGE") \
+ EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \
+ EM(afs_file_error_dir_small, "DIR_SMALL") \
+ EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \
+ EM(afs_file_error_mntpt, "MNTPT_READ_FAILED") \
+ E_(afs_file_error_writeback_fail, "WRITEBACK_FAILED")
/*
* Export enum symbols via userspace.
@@ -160,6 +288,9 @@ afs_fs_operations;
afs_vl_operations;
afs_edit_dir_ops;
afs_edit_dir_reasons;
+afs_eproto_causes;
+afs_io_errors;
+afs_file_errors;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -170,17 +301,16 @@ afs_edit_dir_reasons;
#define EM(a, b) { a, b },
#define E_(a, b) { a, b }
-TRACE_EVENT(afs_recv_data,
- TP_PROTO(struct afs_call *call, unsigned count, unsigned offset,
+TRACE_EVENT(afs_receive_data,
+ TP_PROTO(struct afs_call *call, struct iov_iter *iter,
bool want_more, int ret),
- TP_ARGS(call, count, offset, want_more, ret),
+ TP_ARGS(call, iter, want_more, ret),
TP_STRUCT__entry(
+ __field(loff_t, remain )
__field(unsigned int, call )
__field(enum afs_call_state, state )
- __field(unsigned int, count )
- __field(unsigned int, offset )
__field(unsigned short, unmarshall )
__field(bool, want_more )
__field(int, ret )
@@ -190,17 +320,18 @@ TRACE_EVENT(afs_recv_data,
__entry->call = call->debug_id;
__entry->state = call->state;
__entry->unmarshall = call->unmarshall;
- __entry->count = count;
- __entry->offset = offset;
+ __entry->remain = iov_iter_count(iter);
__entry->want_more = want_more;
__entry->ret = ret;
),
- TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d",
+ TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d",
__entry->call,
- __entry->state, __entry->unmarshall,
- __entry->offset, __entry->count,
- __entry->want_more, __entry->ret)
+ __entry->remain,
+ __entry->unmarshall,
+ __entry->want_more,
+ __entry->state,
+ __entry->ret)
);
TRACE_EVENT(afs_notify_call,
@@ -301,7 +432,7 @@ TRACE_EVENT(afs_make_fs_call,
}
),
- TP_printk("c=%08x %06x:%06x:%06x %s",
+ TP_printk("c=%08x %06llx:%06llx:%06x %s",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
@@ -555,24 +686,70 @@ TRACE_EVENT(afs_edit_dir,
);
TRACE_EVENT(afs_protocol_error,
- TP_PROTO(struct afs_call *call, int error, const void *where),
+ TP_PROTO(struct afs_call *call, int error, enum afs_eproto_cause cause),
+
+ TP_ARGS(call, error, cause),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call )
+ __field(int, error )
+ __field(enum afs_eproto_cause, cause )
+ ),
+
+ TP_fast_assign(
+ __entry->call = call ? call->debug_id : 0;
+ __entry->error = error;
+ __entry->cause = cause;
+ ),
+
+ TP_printk("c=%08x r=%d %s",
+ __entry->call, __entry->error,
+ __print_symbolic(__entry->cause, afs_eproto_causes))
+ );
+
+TRACE_EVENT(afs_io_error,
+ TP_PROTO(unsigned int call, int error, enum afs_io_error where),
TP_ARGS(call, error, where),
TP_STRUCT__entry(
__field(unsigned int, call )
__field(int, error )
- __field(const void *, where )
+ __field(enum afs_io_error, where )
),
TP_fast_assign(
- __entry->call = call ? call->debug_id : 0;
+ __entry->call = call;
__entry->error = error;
__entry->where = where;
),
- TP_printk("c=%08x r=%d sp=%pSR",
- __entry->call, __entry->error, __entry->where)
+ TP_printk("c=%08x r=%d %s",
+ __entry->call, __entry->error,
+ __print_symbolic(__entry->where, afs_io_errors))
+ );
+
+TRACE_EVENT(afs_file_error,
+ TP_PROTO(struct afs_vnode *vnode, int error, enum afs_file_error where),
+
+ TP_ARGS(vnode, error, where),
+
+ TP_STRUCT__entry(
+ __field_struct(struct afs_fid, fid )
+ __field(int, error )
+ __field(enum afs_file_error, where )
+ ),
+
+ TP_fast_assign(
+ __entry->fid = vnode->fid;
+ __entry->error = error;
+ __entry->where = where;
+ ),
+
+ TP_printk("%llx:%llx:%x r=%d %s",
+ __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+ __entry->error,
+ __print_symbolic(__entry->where, afs_file_errors))
);
TRACE_EVENT(afs_cm_no_server,
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 0f3cb13..f45ee0f 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -61,6 +61,11 @@
#define KEYCTL_INVALIDATE 21 /* invalidate a key */
#define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */
#define KEYCTL_DH_COMPUTE 23 /* Compute Diffie-Hellman values */
+#define KEYCTL_PKEY_QUERY 24 /* Query public key parameters */
+#define KEYCTL_PKEY_ENCRYPT 25 /* Encrypt a blob using a public key */
+#define KEYCTL_PKEY_DECRYPT 26 /* Decrypt a blob using a public key */
+#define KEYCTL_PKEY_SIGN 27 /* Create a public key signature */
+#define KEYCTL_PKEY_VERIFY 28 /* Verify a public key signature */
#define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */
/* keyctl structures */
@@ -82,4 +87,29 @@ struct keyctl_kdf_params {
__u32 __spare[8];
};
+#define KEYCTL_SUPPORTS_ENCRYPT 0x01
+#define KEYCTL_SUPPORTS_DECRYPT 0x02
+#define KEYCTL_SUPPORTS_SIGN 0x04
+#define KEYCTL_SUPPORTS_VERIFY 0x08
+
+struct keyctl_pkey_query {
+ __u32 supported_ops; /* Which ops are supported */
+ __u32 key_size; /* Size of the key in bits */
+ __u16 max_data_size; /* Maximum size of raw data to sign in bytes */
+ __u16 max_sig_size; /* Maximum size of signature in bytes */
+ __u16 max_enc_size; /* Maximum size of encrypted blob in bytes */
+ __u16 max_dec_size; /* Maximum size of decrypted blob in bytes */
+ __u32 __spare[10];
+};
+
+struct keyctl_pkey_params {
+ __s32 key_id; /* Serial no. of public key to use */
+ __u32 in_len; /* Input data size */
+ union {
+ __u32 out_len; /* Output buffer size (encrypt/decrypt/sign) */
+ __u32 in2_len; /* 2nd input data size (verify) */
+ };
+ __u32 __spare[7];
+};
+
#endif /* _LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f35eb72..9de8780 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -646,10 +646,12 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events
* PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event
+ * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal)
* PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
*/
#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC (1 << 13)
#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
/*
* These PERF_RECORD_MISC_* flags below are safely reused
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 13b8cb5..a1966cd7 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,15 +34,23 @@
#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */
+#define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
+#define VIRTIO_BALLOON_CMD_ID_STOP 0
+#define VIRTIO_BALLOON_CMD_ID_DONE 1
struct virtio_balloon_config {
/* Number of pages host wants Guest to give up. */
__u32 num_pages;
/* Number of pages we've actually got in balloon. */
__u32 actual;
+ /* Free page report command id, readonly by guest */
+ __u32 free_page_report_cmd_id;
+ /* Stores PAGE_POISON if page poisoning is in use */
+ __u32 poison_val;
};
#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
diff --git a/kernel/Makefile b/kernel/Makefile
index 7a63d56..7343b3a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -117,6 +117,10 @@
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_RSEQ) += rseq.o
+obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
+KASAN_SANITIZE_stackleak.o := n
+KCOV_INSTRUMENT_stackleak.o := n
+
$(obj)/configs.o: $(obj)/config_data.h
targets += config_data.gz
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 171a2c8..1971ca32 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2852,10 +2852,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
fn->ret_type == RET_PTR_TO_MAP_VALUE) {
- if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
- else
- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
/* There is no offset yet applied, variable or fixed */
mark_reg_known_zero(env, regs, BPF_REG_0);
/* remember map_ptr, so that check_map_access()
@@ -2868,7 +2864,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
- regs[BPF_REG_0].id = ++env->id_gen;
+ if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+ } else {
+ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+ regs[BPF_REG_0].id = ++env->id_gen;
+ }
} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
int id = acquire_reference_state(env, insn_idx);
if (id < 0)
@@ -3046,7 +3047,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->umax_value = umax_ptr;
dst_reg->var_off = ptr_reg->var_off;
dst_reg->off = ptr_reg->off + smin_val;
- dst_reg->range = ptr_reg->range;
+ dst_reg->raw = ptr_reg->raw;
break;
}
/* A new variable offset is created. Note that off_reg->off
@@ -3076,10 +3077,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
dst_reg->off = ptr_reg->off;
+ dst_reg->raw = ptr_reg->raw;
if (reg_is_pkt_pointer(ptr_reg)) {
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
- dst_reg->range = 0;
+ dst_reg->raw = 0;
}
break;
case BPF_SUB:
@@ -3108,7 +3110,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg->var_off = ptr_reg->var_off;
dst_reg->id = ptr_reg->id;
dst_reg->off = ptr_reg->off - smin_val;
- dst_reg->range = ptr_reg->range;
+ dst_reg->raw = ptr_reg->raw;
break;
}
/* A new variable offset is created. If the subtrahend is known
@@ -3134,11 +3136,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
dst_reg->off = ptr_reg->off;
+ dst_reg->raw = ptr_reg->raw;
if (reg_is_pkt_pointer(ptr_reg)) {
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
if (smin_val < 0)
- dst_reg->range = 0;
+ dst_reg->raw = 0;
}
break;
case BPF_AND:
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8b79318..6aaf5dd 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
}
/**
- * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
*
@@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
* enabled. If @ss is associated with the hierarchy @cgrp is on, this
* function is guaranteed to return non-NULL css.
*/
-static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
- struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
{
lockdep_assert_held(&cgroup_mutex);
@@ -524,35 +524,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
}
/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest
- *
- * Find and get the effective css of @cgrp for @ss. The effective css is
- * defined as the matching css of the nearest ancestor including self which
- * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
- * the root css is returned, so this function always returns a valid css.
- *
- * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
- */
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
- struct cgroup_subsys *ss)
-{
- struct cgroup_subsys_state *css;
-
- do {
- css = cgroup_css(cgrp, ss);
-
- if (css)
- return css;
- cgrp = cgroup_parent(cgrp);
- } while (cgrp);
-
- return init_css_set.subsys[ss->id];
-}
-
-/**
* cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest
@@ -634,11 +605,10 @@ EXPORT_SYMBOL_GPL(of_css);
*
* Should be called under cgroup_[tree_]mutex.
*/
-#define for_each_e_css(css, ssid, cgrp) \
- for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
- if (!((css) = cgroup_e_css_by_mask(cgrp, \
- cgroup_subsys[(ssid)]))) \
- ; \
+#define for_each_e_css(css, ssid, cgrp) \
+ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
+ if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
+ ; \
else
/**
@@ -1037,7 +1007,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
* @ss is in this hierarchy, so we want the
* effective css from @cgrp.
*/
- template[i] = cgroup_e_css_by_mask(cgrp, ss);
+ template[i] = cgroup_e_css(cgrp, ss);
} else {
/*
* @ss is not in this hierarchy, so we don't want
@@ -3054,7 +3024,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
return ret;
/*
- * At this point, cgroup_e_css_by_mask() results reflect the new csses
+ * At this point, cgroup_e_css() results reflect the new csses
* making the following cgroup_update_dfl_csses() properly update
* css associations of all tasks in the subtree.
*/
diff --git a/kernel/configs/kvm_guest.config b/kernel/configs/kvm_guest.config
index 108fecc..208481d9 100644
--- a/kernel/configs/kvm_guest.config
+++ b/kernel/configs/kvm_guest.config
@@ -20,6 +20,7 @@
CONFIG_KVM_GUEST=y
CONFIG_S390_GUEST=y
CONFIG_VIRTIO=y
+CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_CONSOLE=y
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8c49013..84530ab 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -750,7 +750,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
/*
* Do not update time when cgroup is not active
*/
- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
__update_cgrp_time(event->cgrp);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 8f82a3b..07cddff 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -91,6 +91,7 @@
#include <linux/kcov.h>
#include <linux/livepatch.h>
#include <linux/thread_info.h>
+#include <linux/stackleak.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -1926,6 +1927,8 @@ static __latent_entropy struct task_struct *copy_process(
if (retval)
goto bad_fork_cleanup_io;
+ stackleak_task_init(p);
+
if (pid != &init_struct_pid) {
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
if (IS_ERR(pid)) {
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 6e6d467..1f0985a 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -8,7 +8,7 @@
#include <linux/cpu.h>
#include <linux/irq.h>
-#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS) * sizeof(unsigned long))
+#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS))
struct cpumap {
unsigned int available;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index c6a3b68..35cf0ad2 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -25,8 +25,6 @@
#include <linux/elf.h>
#include <linux/elfcore.h>
#include <linux/kernel.h>
-#include <linux/kexec.h>
-#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/vmalloc.h>
#include "kexec_internal.h"
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
new file mode 100644
index 0000000..e428929
--- /dev/null
+++ b/kernel/stackleak.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code fills the used part of the kernel stack with a poison value
+ * before returning to userspace. It's part of the STACKLEAK feature
+ * ported from grsecurity/PaX.
+ *
+ * Author: Alexander Popov <alex.popov@linux.com>
+ *
+ * STACKLEAK reduces the information which kernel stack leak bugs can
+ * reveal and blocks some uninitialized stack variable attacks.
+ */
+
+#include <linux/stackleak.h>
+
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/jump_label.h>
+#include <linux/sysctl.h>
+
+static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
+
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+ int state = !static_branch_unlikely(&stack_erasing_bypass);
+ int prev_state = state;
+
+ table->data = &state;
+ table->maxlen = sizeof(int);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ state = !!state;
+ if (ret || !write || state == prev_state)
+ return ret;
+
+ if (state)
+ static_branch_disable(&stack_erasing_bypass);
+ else
+ static_branch_enable(&stack_erasing_bypass);
+
+ pr_warn("stackleak: kernel stack erasing is %s\n",
+ state ? "enabled" : "disabled");
+ return ret;
+}
+
+#define skip_erasing() static_branch_unlikely(&stack_erasing_bypass)
+#else
+#define skip_erasing() false
+#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
+
+asmlinkage void stackleak_erase(void)
+{
+ /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
+ unsigned long kstack_ptr = current->lowest_stack;
+ unsigned long boundary = (unsigned long)end_of_stack(current);
+ unsigned int poison_count = 0;
+ const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+
+ if (skip_erasing())
+ return;
+
+ /* Check that 'lowest_stack' value is sane */
+ if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
+ kstack_ptr = boundary;
+
+ /* Search for the poison value in the kernel stack */
+ while (kstack_ptr > boundary && poison_count <= depth) {
+ if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
+ poison_count++;
+ else
+ poison_count = 0;
+
+ kstack_ptr -= sizeof(unsigned long);
+ }
+
+ /*
+ * One 'long int' at the bottom of the thread stack is reserved and
+ * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
+ */
+ if (kstack_ptr == boundary)
+ kstack_ptr += sizeof(unsigned long);
+
+#ifdef CONFIG_STACKLEAK_METRICS
+ current->prev_lowest_stack = kstack_ptr;
+#endif
+
+ /*
+ * Now write the poison value to the kernel stack. Start from
+ * 'kstack_ptr' and move up till the new 'boundary'. We assume that
+ * the stack pointer doesn't change when we write poison.
+ */
+ if (on_thread_stack())
+ boundary = current_stack_pointer;
+ else
+ boundary = current_top_of_stack();
+
+ while (kstack_ptr < boundary) {
+ *(unsigned long *)kstack_ptr = STACKLEAK_POISON;
+ kstack_ptr += sizeof(unsigned long);
+ }
+
+ /* Reset the 'lowest_stack' value for the next syscall */
+ current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
+}
+
+void __used stackleak_track_stack(void)
+{
+ /*
+ * N.B. stackleak_erase() fills the kernel stack with the poison value,
+ * which has the register width. That code assumes that the value
+ * of 'lowest_stack' is aligned on the register width boundary.
+ *
+ * That is true for x86 and x86_64 because of the kernel stack
+ * alignment on these platforms (for details, see 'cc_stack_align' in
+ * arch/x86/Makefile). Take care of that when you port STACKLEAK to
+ * new platforms.
+ */
+ unsigned long sp = (unsigned long)&sp;
+
+ /*
+ * Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
+ * STACKLEAK_SEARCH_DEPTH makes the poison search in
+ * stackleak_erase() unreliable. Let's prevent that.
+ */
+ BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);
+
+ if (sp < current->lowest_stack &&
+ sp >= (unsigned long)task_stack_page(current) +
+ sizeof(unsigned long)) {
+ current->lowest_stack = sp;
+ }
+}
+EXPORT_SYMBOL(stackleak_track_stack);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc02050..5fc724e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,7 +66,6 @@
#include <linux/kexec.h>
#include <linux/bpf.h>
#include <linux/mount.h>
-#include <linux/pipe_fs_i.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
@@ -91,7 +90,9 @@
#ifdef CONFIG_CHR_DEV_SG
#include <scsi/sg.h>
#endif
-
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/stackleak.h>
+#endif
#ifdef CONFIG_LOCKUP_DETECTOR
#include <linux/nmi.h>
#endif
@@ -1233,6 +1234,17 @@ static struct ctl_table kern_table[] = {
.extra2 = &one,
},
#endif
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+ {
+ .procname = "stack_erasing",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = stack_erasing_sysctl,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
{ }
};
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fac0ddf..2868d85 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return NULL;
- if (!bio->bi_blkg)
+ if (!bio->bi_css)
return NULL;
- return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
+ return cgroup_get_kernfs_id(bio->bi_css->cgroup);
}
#else
static union kernfs_node_id *
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index b0875b3..c3fd849 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -115,7 +115,7 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
* section, then we need to read the link list pointers. The trick is
* we pass the address of the string to the seq function just like
* we do for the kernel core formats. To get back the structure that
- * holds the format, we simply use containerof() and then go to the
+ * holds the format, we simply use container_of() and then go to the
* next format in the list.
*/
static const char **
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8be175d..7ebccb5 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -83,6 +83,7 @@
const struct kvec *kvec; \
struct kvec v; \
iterate_kvec(i, n, v, kvec, skip, (K)) \
+ } else if (unlikely(i->type & ITER_DISCARD)) { \
} else { \
const struct iovec *iov; \
struct iovec v; \
@@ -114,6 +115,8 @@
} \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
+ } else if (unlikely(i->type & ITER_DISCARD)) { \
+ skip += n; \
} else { \
const struct iovec *iov; \
struct iovec v; \
@@ -428,17 +431,19 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
}
EXPORT_SYMBOL(iov_iter_fault_in_readable);
-void iov_iter_init(struct iov_iter *i, int direction,
+void iov_iter_init(struct iov_iter *i, unsigned int direction,
const struct iovec *iov, unsigned long nr_segs,
size_t count)
{
+ WARN_ON(direction & ~(READ | WRITE));
+ direction &= READ | WRITE;
+
/* It will get better. Eventually... */
if (uaccess_kernel()) {
- direction |= ITER_KVEC;
- i->type = direction;
+ i->type = ITER_KVEC | direction;
i->kvec = (struct kvec *)iov;
} else {
- i->type = direction;
+ i->type = ITER_IOVEC | direction;
i->iov = iov;
}
i->nr_segs = nr_segs;
@@ -558,7 +563,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
- if (unlikely(i->type & ITER_PIPE))
+ if (unlikely(iov_iter_is_pipe(i)))
return copy_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
@@ -658,7 +663,7 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
const char *from = addr;
unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
- if (unlikely(i->type & ITER_PIPE))
+ if (unlikely(iov_iter_is_pipe(i)))
return copy_pipe_to_iter_mcsafe(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
@@ -692,7 +697,7 @@ EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return 0;
}
@@ -712,7 +717,7 @@ EXPORT_SYMBOL(_copy_from_iter);
bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return false;
}
@@ -739,7 +744,7 @@ EXPORT_SYMBOL(_copy_from_iter_full);
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return 0;
}
@@ -773,7 +778,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return 0;
}
@@ -794,7 +799,7 @@ EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return false;
}
@@ -836,7 +841,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
kunmap_atomic(kaddr);
return wanted;
- } else if (likely(!(i->type & ITER_PIPE)))
+ } else if (unlikely(iov_iter_is_discard(i)))
+ return bytes;
+ else if (likely(!iov_iter_is_pipe(i)))
return copy_page_to_iter_iovec(page, offset, bytes, i);
else
return copy_page_to_iter_pipe(page, offset, bytes, i);
@@ -848,7 +855,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
{
if (unlikely(!page_copy_sane(page, offset, bytes)))
return 0;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
WARN_ON(1);
return 0;
}
@@ -888,7 +895,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i)
size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
- if (unlikely(i->type & ITER_PIPE))
+ if (unlikely(iov_iter_is_pipe(i)))
return pipe_zero(bytes, i);
iterate_and_advance(i, bytes, v,
clear_user(v.iov_base, v.iov_len),
@@ -908,7 +915,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
kunmap_atomic(kaddr);
return 0;
}
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
kunmap_atomic(kaddr);
WARN_ON(1);
return 0;
@@ -972,10 +979,14 @@ static void pipe_advance(struct iov_iter *i, size_t size)
void iov_iter_advance(struct iov_iter *i, size_t size)
{
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
pipe_advance(i, size);
return;
}
+ if (unlikely(iov_iter_is_discard(i))) {
+ i->count -= size;
+ return;
+ }
iterate_and_advance(i, size, v, 0, 0, 0)
}
EXPORT_SYMBOL(iov_iter_advance);
@@ -987,7 +998,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
if (WARN_ON(unroll > MAX_RW_COUNT))
return;
i->count += unroll;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
struct pipe_inode_info *pipe = i->pipe;
int idx = i->idx;
size_t off = i->iov_offset;
@@ -1011,12 +1022,14 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
pipe_truncate(i);
return;
}
+ if (unlikely(iov_iter_is_discard(i)))
+ return;
if (unroll <= i->iov_offset) {
i->iov_offset -= unroll;
return;
}
unroll -= i->iov_offset;
- if (i->type & ITER_BVEC) {
+ if (iov_iter_is_bvec(i)) {
const struct bio_vec *bvec = i->bvec;
while (1) {
size_t n = (--bvec)->bv_len;
@@ -1049,23 +1062,25 @@ EXPORT_SYMBOL(iov_iter_revert);
*/
size_t iov_iter_single_seg_count(const struct iov_iter *i)
{
- if (unlikely(i->type & ITER_PIPE))
+ if (unlikely(iov_iter_is_pipe(i)))
return i->count; // it is a silly place, anyway
if (i->nr_segs == 1)
return i->count;
- else if (i->type & ITER_BVEC)
+ if (unlikely(iov_iter_is_discard(i)))
+ return i->count;
+ else if (iov_iter_is_bvec(i))
return min(i->count, i->bvec->bv_len - i->iov_offset);
else
return min(i->count, i->iov->iov_len - i->iov_offset);
}
EXPORT_SYMBOL(iov_iter_single_seg_count);
-void iov_iter_kvec(struct iov_iter *i, int direction,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
const struct kvec *kvec, unsigned long nr_segs,
size_t count)
{
- BUG_ON(!(direction & ITER_KVEC));
- i->type = direction;
+ WARN_ON(direction & ~(READ | WRITE));
+ i->type = ITER_KVEC | (direction & (READ | WRITE));
i->kvec = kvec;
i->nr_segs = nr_segs;
i->iov_offset = 0;
@@ -1073,12 +1088,12 @@ void iov_iter_kvec(struct iov_iter *i, int direction,
}
EXPORT_SYMBOL(iov_iter_kvec);
-void iov_iter_bvec(struct iov_iter *i, int direction,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
const struct bio_vec *bvec, unsigned long nr_segs,
size_t count)
{
- BUG_ON(!(direction & ITER_BVEC));
- i->type = direction;
+ WARN_ON(direction & ~(READ | WRITE));
+ i->type = ITER_BVEC | (direction & (READ | WRITE));
i->bvec = bvec;
i->nr_segs = nr_segs;
i->iov_offset = 0;
@@ -1086,13 +1101,13 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
}
EXPORT_SYMBOL(iov_iter_bvec);
-void iov_iter_pipe(struct iov_iter *i, int direction,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
struct pipe_inode_info *pipe,
size_t count)
{
- BUG_ON(direction != ITER_PIPE);
+ BUG_ON(direction != READ);
WARN_ON(pipe->nrbufs == pipe->buffers);
- i->type = direction;
+ i->type = ITER_PIPE | READ;
i->pipe = pipe;
i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
i->iov_offset = 0;
@@ -1101,12 +1116,30 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
}
EXPORT_SYMBOL(iov_iter_pipe);
+/**
+ * iov_iter_discard - Initialise an I/O iterator that discards data
+ * @i: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator that just discards everything that's written to it.
+ * It's only available as a READ iterator.
+ */
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
+{
+ BUG_ON(direction != READ);
+ i->type = ITER_DISCARD | READ;
+ i->count = count;
+ i->iov_offset = 0;
+}
+EXPORT_SYMBOL(iov_iter_discard);
+
unsigned long iov_iter_alignment(const struct iov_iter *i)
{
unsigned long res = 0;
size_t size = i->count;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
return size | i->iov_offset;
return size;
@@ -1125,7 +1158,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
unsigned long res = 0;
size_t size = i->count;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
WARN_ON(1);
return ~0U;
}
@@ -1193,8 +1226,11 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
if (maxsize > i->count)
maxsize = i->count;
- if (unlikely(i->type & ITER_PIPE))
+ if (unlikely(iov_iter_is_pipe(i)))
return pipe_get_pages(i, pages, maxsize, maxpages, start);
+ if (unlikely(iov_iter_is_discard(i)))
+ return -EFAULT;
+
iterate_all_kinds(i, maxsize, v, ({
unsigned long addr = (unsigned long)v.iov_base;
size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1205,7 +1241,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
len = maxpages * PAGE_SIZE;
addr &= ~(PAGE_SIZE - 1);
n = DIV_ROUND_UP(len, PAGE_SIZE);
- res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+ res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
if (unlikely(res < 0))
return res;
return (res == n ? len : res * PAGE_SIZE) - *start;
@@ -1270,8 +1306,11 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
if (maxsize > i->count)
maxsize = i->count;
- if (unlikely(i->type & ITER_PIPE))
+ if (unlikely(iov_iter_is_pipe(i)))
return pipe_get_pages_alloc(i, pages, maxsize, start);
+ if (unlikely(iov_iter_is_discard(i)))
+ return -EFAULT;
+
iterate_all_kinds(i, maxsize, v, ({
unsigned long addr = (unsigned long)v.iov_base;
size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1283,7 +1322,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
p = get_pages_array(n);
if (!p)
return -ENOMEM;
- res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+ res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
if (unlikely(res < 0)) {
kvfree(p);
return res;
@@ -1313,7 +1352,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
__wsum sum, next;
size_t off = 0;
sum = *csum;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
WARN_ON(1);
return 0;
}
@@ -1355,7 +1394,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
__wsum sum, next;
size_t off = 0;
sum = *csum;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
WARN_ON(1);
return false;
}
@@ -1400,7 +1439,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
__wsum sum, next;
size_t off = 0;
sum = *csum;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
WARN_ON(1); /* for now */
return 0;
}
@@ -1442,8 +1481,10 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
if (!size)
return 0;
+ if (unlikely(iov_iter_is_discard(i)))
+ return 0;
- if (unlikely(i->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(i))) {
struct pipe_inode_info *pipe = i->pipe;
size_t off;
int idx;
@@ -1481,11 +1522,13 @@ EXPORT_SYMBOL(iov_iter_npages);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
{
*new = *old;
- if (unlikely(new->type & ITER_PIPE)) {
+ if (unlikely(iov_iter_is_pipe(new))) {
WARN_ON(1);
return NULL;
}
- if (new->type & ITER_BVEC)
+ if (unlikely(iov_iter_is_discard(new)))
+ return NULL;
+ if (iov_iter_is_bvec(new))
return new->bvec = kmemdup(new->bvec,
new->nr_segs * sizeof(struct bio_vec),
flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index 218d0b2..81adec8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2049,7 +2049,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
!mapping->a_ops->is_partially_uptodate)
goto page_not_up_to_date;
/* pipes can't handle partially uptodate pages */
- if (unlikely(iter->type & ITER_PIPE))
+ if (unlikely(iov_iter_is_pipe(iter)))
goto page_not_up_to_date;
if (!trylock_page(page))
goto page_not_up_to_date;
@@ -2825,6 +2825,42 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
EXPORT_SYMBOL(read_cache_page_gfp);
/*
+ * Don't operate on ranges the page cache doesn't support, and don't exceed the
+ * LFS limits. If pos is under the limit it becomes a short access. If it
+ * exceeds the limit we return -EFBIG.
+ */
+static int generic_access_check_limits(struct file *file, loff_t pos,
+ loff_t *count)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t max_size = inode->i_sb->s_maxbytes;
+
+ if (!(file->f_flags & O_LARGEFILE))
+ max_size = MAX_NON_LFS;
+
+ if (unlikely(pos >= max_size))
+ return -EFBIG;
+ *count = min(*count, max_size - pos);
+ return 0;
+}
+
+static int generic_write_check_limits(struct file *file, loff_t pos,
+ loff_t *count)
+{
+ loff_t limit = rlimit(RLIMIT_FSIZE);
+
+ if (limit != RLIM_INFINITY) {
+ if (pos >= limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
+ }
+ *count = min(*count, limit - pos);
+ }
+
+ return generic_access_check_limits(file, pos, count);
+}
+
+/*
* Performs necessary checks before doing a write
*
* Can adjust writing position or amount of bytes to write.
@@ -2835,8 +2871,8 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- unsigned long limit = rlimit(RLIMIT_FSIZE);
- loff_t pos;
+ loff_t count;
+ int ret;
if (!iov_iter_count(from))
return 0;
@@ -2845,44 +2881,100 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_APPEND)
iocb->ki_pos = i_size_read(inode);
- pos = iocb->ki_pos;
-
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
return -EINVAL;
- if (limit != RLIM_INFINITY) {
- if (iocb->ki_pos >= limit) {
- send_sig(SIGXFSZ, current, 0);
- return -EFBIG;
- }
- iov_iter_truncate(from, limit - (unsigned long)pos);
- }
+ count = iov_iter_count(from);
+ ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+ if (ret)
+ return ret;
- /*
- * LFS rule
- */
- if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
- !(file->f_flags & O_LARGEFILE))) {
- if (pos >= MAX_NON_LFS)
- return -EFBIG;
- iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
- }
-
- /*
- * Are we about to exceed the fs block limit ?
- *
- * If we have written data it becomes a short write. If we have
- * exceeded without writing data we send a signal and return EFBIG.
- * Linus frestrict idea will clean these up nicely..
- */
- if (unlikely(pos >= inode->i_sb->s_maxbytes))
- return -EFBIG;
-
- iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
+ iov_iter_truncate(from, count);
return iov_iter_count(from);
}
EXPORT_SYMBOL(generic_write_checks);
+/*
+ * Performs necessary checks before doing a clone.
+ *
+ * Can adjust amount of bytes to clone.
+ * Returns appropriate error code that caller should return or
+ * zero in case the clone should be allowed.
+ */
+int generic_remap_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *req_count, unsigned int remap_flags)
+{
+ struct inode *inode_in = file_in->f_mapping->host;
+ struct inode *inode_out = file_out->f_mapping->host;
+ uint64_t count = *req_count;
+ uint64_t bcount;
+ loff_t size_in, size_out;
+ loff_t bs = inode_out->i_sb->s_blocksize;
+ int ret;
+
+ /* The start of both ranges must be aligned to an fs block. */
+ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
+ return -EINVAL;
+
+ /* Ensure offsets don't wrap. */
+ if (pos_in + count < pos_in || pos_out + count < pos_out)
+ return -EINVAL;
+
+ size_in = i_size_read(inode_in);
+ size_out = i_size_read(inode_out);
+
+ /* Dedupe requires both ranges to be within EOF. */
+ if ((remap_flags & REMAP_FILE_DEDUP) &&
+ (pos_in >= size_in || pos_in + count > size_in ||
+ pos_out >= size_out || pos_out + count > size_out))
+ return -EINVAL;
+
+ /* Ensure the infile range is within the infile. */
+ if (pos_in >= size_in)
+ return -EINVAL;
+ count = min(count, size_in - (uint64_t)pos_in);
+
+ ret = generic_access_check_limits(file_in, pos_in, &count);
+ if (ret)
+ return ret;
+
+ ret = generic_write_check_limits(file_out, pos_out, &count);
+ if (ret)
+ return ret;
+
+ /*
+ * If the user wanted us to link to the infile's EOF, round up to the
+ * next block boundary for this check.
+ *
+ * Otherwise, make sure the count is also block-aligned, having
+ * already confirmed the starting offsets' block alignment.
+ */
+ if (pos_in + count == size_in) {
+ bcount = ALIGN(size_in, bs) - pos_in;
+ } else {
+ if (!IS_ALIGNED(count, bs))
+ count = ALIGN_DOWN(count, bs);
+ bcount = count;
+ }
+
+ /* Don't allow overlapped cloning within the same file. */
+ if (inode_in == inode_out &&
+ pos_out + bcount > pos_in &&
+ pos_out < pos_in + bcount)
+ return -EINVAL;
+
+ /*
+ * We shortened the request but the caller can't deal with that, so
+ * bounce the request back to userspace.
+ */
+ if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
+ return -EINVAL;
+
+ *req_count = count;
+ return 0;
+}
+
int pagecache_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4e4ef8f..55478ab 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,21 +629,40 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
* available
* never: never stall for any thp allocation
*/
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
{
const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+ gfp_t this_node = 0;
+
+#ifdef CONFIG_NUMA
+ struct mempolicy *pol;
+ /*
+ * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+ * specified, to express a general desire to stay on the current
+ * node for optimistic allocation attempts. If the defrag mode
+ * and/or madvise hint requires the direct reclaim then we prefer
+ * to fallback to other node rather than node reclaim because that
+ * can lead to excessive reclaim even though there is free memory
+ * on other nodes. We expect that NUMA preferences are specified
+ * by memory policies.
+ */
+ pol = get_vma_policy(vma, addr);
+ if (pol->mode != MPOL_BIND)
+ this_node = __GFP_THISNODE;
+ mpol_cond_put(pol);
+#endif
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
- return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+ return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
- __GFP_KSWAPD_RECLAIM);
+ __GFP_KSWAPD_RECLAIM | this_node);
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
- 0);
- return GFP_TRANSHUGE_LIGHT;
+ this_node);
+ return GFP_TRANSHUGE_LIGHT | this_node;
}
/* Caller must hold page table lock. */
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
pte_free(vma->vm_mm, pgtable);
return ret;
}
- gfp = alloc_hugepage_direct_gfpmask(vma);
- page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+ gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+ page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
alloc:
if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) {
- huge_gfp = alloc_hugepage_direct_gfpmask(vma);
- new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+ huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+ new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+ haddr, numa_node_id());
} else
new_page = NULL;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 54920cb..6e1469b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
struct mem_cgroup *memcg;
int ret = 0;
- if (memcg_kmem_bypass())
+ if (mem_cgroup_disabled() || memcg_kmem_bypass())
return 0;
memcg = get_mem_cgroup_from_current();
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 61972da..2b2b3cc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ cond_resched();
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
altmap);
map_offset = 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cfd26d7..5837a06 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
} else if (PageTransHuge(page)) {
struct page *thp;
- thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
- HPAGE_PMD_ORDER);
+ thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+ address, numa_node_id());
if (!thp)
return NULL;
prep_transhuge_page(thp);
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
* freeing by another task. It is the caller's responsibility to free the
* extra reference for shared policies.
*/
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
unsigned long addr)
{
struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual Address of the allocation. Must be inside the VMA.
* @node: Which node to prefer for allocation (modulo policy).
- * @hugepage: for hugepages try only the preferred node if possible
*
* This function allocates a page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process.
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
*/
struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr, int node, bool hugepage)
+ unsigned long addr, int node)
{
struct mempolicy *pol;
struct page *page;
@@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
goto out;
}
- if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
- int hpage_node = node;
-
- /*
- * For hugepage allocation and non-interleave policy which
- * allows the current node (or other explicitly preferred
- * node) we only try to allocate from the current/preferred
- * node and don't fall back to other nodes, as the cost of
- * remote accesses would likely offset THP benefits.
- *
- * If the policy is interleave, or does not allow the current
- * node in its nodemask, we allocate the standard way.
- */
- if (pol->mode == MPOL_PREFERRED &&
- !(pol->flags & MPOL_F_LOCAL))
- hpage_node = pol->v.preferred_node;
-
- nmask = policy_nodemask(gfp, pol);
- if (!nmask || node_isset(hpage_node, *nmask)) {
- mpol_cond_put(pol);
- page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
- goto out;
- }
- }
-
nmask = policy_nodemask(gfp, pol);
preferred_nid = policy_node(gfp, pol, node);
page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/page_io.c b/mm/page_io.c
index a451ffa..d4d1c89 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -294,7 +294,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
};
struct iov_iter from;
- iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
+ iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
init_sync_kiocb(&kiocb, swap_file);
kiocb.ki_pos = page_file_offset(page);
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
goto out;
}
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
- bio_associate_blkg_from_page(bio, page);
+ bio_associate_blkcg_from_page(bio, page);
count_swpout_vm_event(page);
set_page_writeback(page);
unlock_page(page);
diff --git a/mm/page_poison.c b/mm/page_poison.c
index f7e2a67..f0c15e9 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf)
}
early_param("page_poison", early_page_poison_param);
+/**
+ * page_poisoning_enabled - check if page poisoning is enabled
+ *
+ * Return true if page poisoning is enabled, or false if not.
+ */
bool page_poisoning_enabled(void)
{
/*
@@ -29,6 +34,7 @@ bool page_poisoning_enabled(void)
(!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
debug_pagealloc_enabled()));
}
+EXPORT_SYMBOL_GPL(page_poisoning_enabled);
static void poison_page(struct page *page)
{
diff --git a/mm/percpu.c b/mm/percpu.c
index a6b74c6..db86282 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2591,7 +2591,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
BUG_ON(ai->nr_groups != 1);
upa = ai->alloc_size/ai->unit_size;
nr_g0_units = roundup(num_possible_cpus(), upa);
- if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) {
+ if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) {
pcpu_free_alloc_info(ai);
return -EINVAL;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 56bf122..ea26d7a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
shmem_pseudo_vma_init(&pvma, info, hindex);
page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
- HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+ HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
shmem_pseudo_vma_destroy(&pvma);
if (page)
prep_transhuge_page(page);
diff --git a/net/9p/client.c b/net/9p/client.c
index 5f23e18..2c9a17b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -2066,7 +2066,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
struct kvec kv = {.iov_base = data, .iov_len = count};
struct iov_iter to;
- iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count);
+ iov_iter_kvec(&to, READ, &kv, 1, count);
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
fid->fid, (unsigned long long) offset, count);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index eb596c2..b1d39ca 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -329,7 +329,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
if (!iov_iter_count(data))
return 0;
- if (!(data->type & ITER_KVEC)) {
+ if (!iov_iter_is_kvec(data)) {
int n;
/*
* We allow only p9_max_pages pinned. We wait for the
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4e2576f..828e87f 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -467,7 +467,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
iv.iov_len = skb->len;
memset(&msg, 0, sizeof(msg));
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len);
err = l2cap_chan_send(chan, &msg, skb->len);
if (err > 0) {
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 51c2cf2..58fc633 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -63,7 +63,7 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *dat
memset(&msg, 0, sizeof(msg));
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len);
l2cap_chan_send(chan, &msg, total_len);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a1c1b7e..c822e62 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -622,7 +622,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
memset(&msg, 0, sizeof(msg));
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len);
l2cap_chan_send(chan, &msg, 1 + len);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 88e3583..57fcc6b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -513,7 +513,7 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
if (!buf)
msg.msg_flags |= MSG_TRUNC;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
r = 0;
@@ -532,7 +532,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
int r;
BUG_ON(page_offset + length > PAGE_SIZE);
- iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+ iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
r = 0;
@@ -594,7 +594,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
else
msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
- iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+ iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size);
ret = sock_sendmsg(sock, &msg);
if (ret == -EAGAIN)
ret = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f679c7a..e01274b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3600,6 +3600,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
@@ -3704,6 +3709,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da3944..765b2b3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -111,13 +111,10 @@
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT (400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT (400*HZ)
#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ)
#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ)
+#define IGMP_QUERY_INTERVAL (125*HZ)
#define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE 2
-
#define IGMP_INITIAL_REPORT_DELAY (1)
@@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
in_dev->mr_v1_seen = jiffies +
- IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+ (in_dev->mr_qrv * in_dev->mr_qi) +
+ in_dev->mr_qri;
group = 0;
} else {
/* v2 router present */
max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
in_dev->mr_v2_seen = jiffies +
- IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+ (in_dev->mr_qrv * in_dev->mr_qi) +
+ in_dev->mr_qri;
}
/* cancel the interface change timer */
in_dev->mr_ifc_count = 0;
@@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (!max_delay)
max_delay = 1; /* can't mod w/ 0 */
in_dev->mr_maxdelay = max_delay;
- if (ih3->qrv)
- in_dev->mr_qrv = ih3->qrv;
+
+ /* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+ * received value was zero, use the default or statically
+ * configured value.
+ */
+ in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+ /* RFC3376, 8.3. Query Response Interval:
+ * The number of seconds represented by the [Query Response
+ * Interval] must be less than the [Query Interval].
+ */
+ if (in_dev->mr_qri >= in_dev->mr_qi)
+ in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
if (!group) { /* general query */
if (ih3->nsrcs)
return true; /* no sources allowed */
@@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev)
ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
}
+#ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
+ struct net *net = dev_net(in_dev->dev);
+
+ in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+ in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+ in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
+#endif
+
void ip_mc_init_dev(struct in_device *in_dev)
{
-#ifdef CONFIG_IP_MULTICAST
- struct net *net = dev_net(in_dev->dev);
-#endif
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTICAST
timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
#endif
+ ip_mc_reset(in_dev);
spin_lock_init(&in_dev->mc_tomb_lock);
}
@@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev)
void ip_mc_up(struct in_device *in_dev)
{
struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
- struct net *net = dev_net(in_dev->dev);
-#endif
ASSERT_RTNL();
-#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+ ip_mc_reset(in_dev);
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
for_each_pmc_rtnl(in_dev, pmc) {
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index b7918d4..3b45fe5 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -145,6 +145,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
ret = err;
goto out;
}
+ copied = -EAGAIN;
}
ret = copied;
out:
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d4020c5..2526be6 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1616,7 +1616,7 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
EnterFunction(7);
/* Receive a packet */
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen);
len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
if (len < 0)
return len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a70097e..865ecef 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -3030,7 +3030,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
* is already present */
if (mac_proto != MAC_PROTO_NONE)
return -EINVAL;
- mac_proto = MAC_PROTO_NONE;
+ mac_proto = MAC_PROTO_ETHERNET;
break;
case OVS_ACTION_ATTR_POP_ETH:
@@ -3038,7 +3038,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
if (vlan_tci & htons(VLAN_TAG_PRESENT))
return -EINVAL;
- mac_proto = MAC_PROTO_ETHERNET;
+ mac_proto = MAC_PROTO_NONE;
break;
case OVS_ACTION_ATTR_PUSH_NSH:
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a827a1f..6a28b96 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -499,8 +499,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
void sctp_assoc_rm_peer(struct sctp_association *asoc,
struct sctp_transport *peer)
{
- struct list_head *pos;
- struct sctp_transport *transport;
+ struct sctp_transport *transport;
+ struct list_head *pos;
+ struct sctp_chunk *ch;
pr_debug("%s: association:%p addr:%pISpc\n",
__func__, asoc, &peer->ipaddr.sa);
@@ -564,7 +565,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
*/
if (!list_empty(&peer->transmitted)) {
struct sctp_transport *active = asoc->peer.active_path;
- struct sctp_chunk *ch;
/* Reset the transport of each chunk on this list */
list_for_each_entry(ch, &peer->transmitted,
@@ -586,6 +586,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
sctp_transport_hold(active);
}
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list)
+ if (ch->transport == peer)
+ ch->transport = NULL;
+
asoc->peer.transport_count--;
sctp_transport_free(peer);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fc0386e..739f3e5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7083,14 +7083,15 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+ ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
if (!asoc)
goto out;
- if (policy & SCTP_PR_SCTP_ALL) {
+ if (policy == SCTP_PR_SCTP_ALL) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7142,7 +7143,8 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+ ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 52241d6..89c3a8c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -286,7 +286,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
*/
krflags = MSG_PEEK | MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+ iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
sizeof(struct smc_clc_msg_hdr));
len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (signal_pending(current)) {
@@ -325,7 +325,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
/* receive the complete CLC message */
memset(&msg, 0, sizeof(struct msghdr));
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
+ iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
krflags = MSG_WAITALL;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
diff --git a/net/socket.c b/net/socket.c
index 99c9685..593826e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -635,7 +635,7 @@ EXPORT_SYMBOL(sock_sendmsg);
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
return sock_sendmsg(sock, msg);
}
EXPORT_SYMBOL(kernel_sendmsg);
@@ -648,7 +648,7 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
if (!sock->ops->sendmsg_locked)
return sock_no_sendmsg_locked(sk, msg, size);
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
}
@@ -823,7 +823,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
mm_segment_t oldfs = get_fs();
int result;
- iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
+ iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
set_fs(KERNEL_DS);
result = sock_recvmsg(sock, msg, flags);
set_fs(oldfs);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3b525ac..986f3ed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -336,7 +336,7 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
+ iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
if (base != 0) {
iov_iter_advance(&msg.msg_iter, base);
buflen -= base;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1b51e04..ae77c71 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -361,7 +361,7 @@ static ssize_t
xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
struct kvec *kvec, size_t count, size_t seek)
{
- iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+ iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count);
return xs_sock_recvmsg(sock, msg, flags, seek);
}
@@ -370,7 +370,7 @@ xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
struct bio_vec *bvec, unsigned long nr, size_t count,
size_t seek)
{
- iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+ iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count);
return xs_sock_recvmsg(sock, msg, flags, seek);
}
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 4bdea00..efb16f6 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -394,7 +394,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
iov.iov_base = &s;
iov.iov_len = sizeof(s);
msg.msg_name = NULL;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len);
ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
if (ret == -EWOULDBLOCK)
return -EWOULDBLOCK;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 276edbc..d753e36 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -489,7 +489,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
iov.iov_base = kaddr + offset;
iov.iov_len = size;
- iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+ iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
rc = tls_push_data(sk, &msg_iter, size,
flags, TLS_RECORD_TYPE_DATA);
kunmap(page);
@@ -538,7 +538,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
{
struct iov_iter msg_iter;
- iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+ iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 5cd88ba..7b1af8b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -799,7 +799,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send);
bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
unsigned char record_type = TLS_RECORD_TYPE_DATA;
- bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+ bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool eor = !(msg->msg_flags & MSG_MORE);
size_t try_to_copy, copied = 0;
struct sk_msg *msg_pl, *msg_en;
@@ -1457,7 +1457,7 @@ int tls_sw_recvmsg(struct sock *sk,
bool cmsg = false;
int target, err = 0;
long timeo;
- bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+ bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
int num_async = 0;
flags |= nonblock;
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 4a9ee2d..140270a 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -8,7 +8,6 @@
config XFRM_OFFLOAD
bool
- depends on XFRM
config XFRM_ALGO
tristate
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index ca21a35..bb01555 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -140,17 +140,9 @@
cc-disable-warning = $(call try-run,\
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
-# cc-name
-# Expands to either gcc or clang
-cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
-
# cc-version
cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
-# cc-fullversion
-cc-fullversion = $(shell $(CONFIG_SHELL) \
- $(srctree)/scripts/gcc-version.sh -p $(CC))
-
# cc-ifversion
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 24b2fb1..768306a 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -29,6 +29,7 @@
warning-1 += $(call cc-option, -Wunused-but-set-variable)
warning-1 += $(call cc-option, -Wunused-const-variable)
warning-1 += $(call cc-option, -Wpacked-not-aligned)
+warning-1 += $(call cc-option, -Wstringop-truncation)
warning-1 += $(call cc-disable-warning, missing-field-initializers)
warning-1 += $(call cc-disable-warning, sign-compare)
@@ -64,7 +65,7 @@
KBUILD_CFLAGS += $(warning)
else
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
KBUILD_CFLAGS += $(call cc-disable-warning, format)
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 0a482f3..46c5c68 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -26,6 +26,16 @@
gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE) \
+= -fplugin-arg-randomize_layout_plugin-performance-mode
+gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak_plugin.so
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \
+ += -DSTACKLEAK_PLUGIN
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) \
+ += -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE)
+ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable
+endif
+export DISABLE_STACKLEAK_PLUGIN
+
# All the plugin CFLAGS are collected here in case a build target needs to
# filter them out of the KBUILD_CFLAGS.
GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index cb0c889..0d5c799 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -139,4 +139,55 @@
in structures. This reduces the performance hit of RANDSTRUCT
at the cost of weakened randomization.
+config GCC_PLUGIN_STACKLEAK
+ bool "Erase the kernel stack before returning from syscalls"
+ depends on GCC_PLUGINS
+ depends on HAVE_ARCH_STACKLEAK
+ help
+ This option makes the kernel erase the kernel stack before
+ returning from system calls. That reduces the information which
+ kernel stack leak bugs can reveal and blocks some uninitialized
+ stack variable attacks.
+
+ The tradeoff is the performance impact: on a single CPU system kernel
+ compilation sees a 1% slowdown, other systems and workloads may vary
+ and you are advised to test this feature on your expected workload
+ before deploying it.
+
+ This plugin was ported from grsecurity/PaX. More information at:
+ * https://grsecurity.net/
+ * https://pax.grsecurity.net/
+
+config STACKLEAK_TRACK_MIN_SIZE
+ int "Minimum stack frame size of functions tracked by STACKLEAK"
+ default 100
+ range 0 4096
+ depends on GCC_PLUGIN_STACKLEAK
+ help
+ The STACKLEAK gcc plugin instruments the kernel code for tracking
+ the lowest border of the kernel stack (and for some other purposes).
+ It inserts the stackleak_track_stack() call for the functions with
+ a stack frame size greater than or equal to this parameter.
+ If unsure, leave the default value 100.
+
+config STACKLEAK_METRICS
+ bool "Show STACKLEAK metrics in the /proc file system"
+ depends on GCC_PLUGIN_STACKLEAK
+ depends on PROC_FS
+ help
+ If this is set, STACKLEAK metrics for every task are available in
+ the /proc file system. In particular, /proc/<pid>/stack_depth
+ shows the maximum kernel stack consumption for the current and
+ previous syscalls. Although this information is not precise, it
+ can be useful for estimating the STACKLEAK performance impact for
+ your workloads.
+
+config STACKLEAK_RUNTIME_DISABLE
+ bool "Allow runtime disabling of kernel stack erasing"
+ depends on GCC_PLUGIN_STACKLEAK
+ help
+ This option provides 'stack_erasing' sysctl, which can be used in
+ runtime to control kernel stack erasing for kernels built with
+ CONFIG_GCC_PLUGIN_STACKLEAK.
+
endif
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
new file mode 100644
index 0000000..2f48da9
--- /dev/null
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2011-2017 by the PaX Team <pageexec@freemail.hu>
+ * Modified by Alexander Popov <alex.popov@linux.com>
+ * Licensed under the GPL v2
+ *
+ * Note: the choice of the license means that the compilation process is
+ * NOT 'eligible' as defined by gcc's library exception to the GPL v3,
+ * but for the kernel it doesn't matter since it doesn't link against
+ * any of the gcc libraries
+ *
+ * This gcc plugin is needed for tracking the lowest border of the kernel stack.
+ * It instruments the kernel code inserting stackleak_track_stack() calls:
+ * - after alloca();
+ * - for the functions with a stack frame size greater than or equal
+ * to the "track-min-size" plugin parameter.
+ *
+ * This plugin is ported from grsecurity/PaX. For more information see:
+ * https://grsecurity.net/
+ * https://pax.grsecurity.net/
+ *
+ * Debugging:
+ * - use fprintf() to stderr, debug_generic_expr(), debug_gimple_stmt(),
+ * print_rtl() and print_simple_rtl();
+ * - add "-fdump-tree-all -fdump-rtl-all" to the plugin CFLAGS in
+ * Makefile.gcc-plugins to see the verbose dumps of the gcc passes;
+ * - use gcc -E to understand the preprocessing shenanigans;
+ * - use gcc with enabled CFG/GIMPLE/SSA verification (--enable-checking).
+ */
+
+#include "gcc-common.h"
+
+__visible int plugin_is_GPL_compatible;
+
+static int track_frame_size = -1;
+static const char track_function[] = "stackleak_track_stack";
+
+/*
+ * Mark these global variables (roots) for gcc garbage collector since
+ * they point to the garbage-collected memory.
+ */
+static GTY(()) tree track_function_decl;
+
+static struct plugin_info stackleak_plugin_info = {
+ .version = "201707101337",
+ .help = "track-min-size=nn\ttrack stack for functions with a stack frame size >= nn bytes\n"
+ "disable\t\tdo not activate the plugin\n"
+};
+
+static void stackleak_add_track_stack(gimple_stmt_iterator *gsi, bool after)
+{
+ gimple stmt;
+ gcall *stackleak_track_stack;
+ cgraph_node_ptr node;
+ int frequency;
+ basic_block bb;
+
+ /* Insert call to void stackleak_track_stack(void) */
+ stmt = gimple_build_call(track_function_decl, 0);
+ stackleak_track_stack = as_a_gcall(stmt);
+ if (after) {
+ gsi_insert_after(gsi, stackleak_track_stack,
+ GSI_CONTINUE_LINKING);
+ } else {
+ gsi_insert_before(gsi, stackleak_track_stack, GSI_SAME_STMT);
+ }
+
+ /* Update the cgraph */
+ bb = gimple_bb(stackleak_track_stack);
+ node = cgraph_get_create_node(track_function_decl);
+ gcc_assert(node);
+ frequency = compute_call_stmt_bb_frequency(current_function_decl, bb);
+ cgraph_create_edge(cgraph_get_node(current_function_decl), node,
+ stackleak_track_stack, bb->count, frequency);
+}
+
+static bool is_alloca(gimple stmt)
+{
+ if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA))
+ return true;
+
+#if BUILDING_GCC_VERSION >= 4007
+ if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA_WITH_ALIGN))
+ return true;
+#endif
+
+ return false;
+}
+
+/*
+ * Work with the GIMPLE representation of the code. Insert the
+ * stackleak_track_stack() call after alloca() and into the beginning
+ * of the function if it is not instrumented.
+ */
+static unsigned int stackleak_instrument_execute(void)
+{
+ basic_block bb, entry_bb;
+ bool prologue_instrumented = false, is_leaf = true;
+ gimple_stmt_iterator gsi;
+
+ /*
+ * ENTRY_BLOCK_PTR is a basic block which represents possible entry
+ * point of a function. This block does not contain any code and
+ * has a CFG edge to its successor.
+ */
+ gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+ entry_bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+
+ /*
+ * Loop through the GIMPLE statements in each of cfun basic blocks.
+ * cfun is a global variable which represents the function that is
+ * currently processed.
+ */
+ FOR_EACH_BB_FN(bb, cfun) {
+ for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
+ gimple stmt;
+
+ stmt = gsi_stmt(gsi);
+
+ /* Leaf function is a function which makes no calls */
+ if (is_gimple_call(stmt))
+ is_leaf = false;
+
+ if (!is_alloca(stmt))
+ continue;
+
+ /* Insert stackleak_track_stack() call after alloca() */
+ stackleak_add_track_stack(&gsi, true);
+ if (bb == entry_bb)
+ prologue_instrumented = true;
+ }
+ }
+
+ if (prologue_instrumented)
+ return 0;
+
+ /*
+ * Special cases to skip the instrumentation.
+ *
+ * Taking the address of static inline functions materializes them,
+ * but we mustn't instrument some of them as the resulting stack
+ * alignment required by the function call ABI will break other
+ * assumptions regarding the expected (but not otherwise enforced)
+ * register clobbering ABI.
+ *
+ * Case in point: native_save_fl on amd64 when optimized for size
+ * clobbers rdx if it were instrumented here.
+ *
+ * TODO: any more special cases?
+ */
+ if (is_leaf &&
+ !TREE_PUBLIC(current_function_decl) &&
+ DECL_DECLARED_INLINE_P(current_function_decl)) {
+ return 0;
+ }
+
+ if (is_leaf &&
+ !strncmp(IDENTIFIER_POINTER(DECL_NAME(current_function_decl)),
+ "_paravirt_", 10)) {
+ return 0;
+ }
+
+ /* Insert stackleak_track_stack() call at the function beginning */
+ bb = entry_bb;
+ if (!single_pred_p(bb)) {
+ /* gcc_assert(bb_loop_depth(bb) ||
+ (bb->flags & BB_IRREDUCIBLE_LOOP)); */
+ split_edge(single_succ_edge(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+ gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+ bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+ }
+ gsi = gsi_after_labels(bb);
+ stackleak_add_track_stack(&gsi, false);
+
+ return 0;
+}
+
+static bool large_stack_frame(void)
+{
+#if BUILDING_GCC_VERSION >= 8000
+ return maybe_ge(get_frame_size(), track_frame_size);
+#else
+ return (get_frame_size() >= track_frame_size);
+#endif
+}
+
+/*
+ * Work with the RTL representation of the code.
+ * Remove the unneeded stackleak_track_stack() calls from the functions
+ * which don't call alloca() and don't have a large enough stack frame size.
+ */
+static unsigned int stackleak_cleanup_execute(void)
+{
+ rtx_insn *insn, *next;
+
+ if (cfun->calls_alloca)
+ return 0;
+
+ if (large_stack_frame())
+ return 0;
+
+ /*
+ * Find stackleak_track_stack() calls. Loop through the chain of insns,
+ * which is an RTL representation of the code for a function.
+ *
+ * The example of a matching insn:
+ * (call_insn 8 4 10 2 (call (mem (symbol_ref ("stackleak_track_stack")
+ * [flags 0x41] <function_decl 0x7f7cd3302a80 stackleak_track_stack>)
+ * [0 stackleak_track_stack S1 A8]) (0)) 675 {*call} (expr_list
+ * (symbol_ref ("stackleak_track_stack") [flags 0x41] <function_decl
+ * 0x7f7cd3302a80 stackleak_track_stack>) (expr_list (0) (nil))) (nil))
+ */
+ for (insn = get_insns(); insn; insn = next) {
+ rtx body;
+
+ next = NEXT_INSN(insn);
+
+ /* Check the expression code of the insn */
+ if (!CALL_P(insn))
+ continue;
+
+ /*
+ * Check the expression code of the insn body, which is an RTL
+ * Expression (RTX) describing the side effect performed by
+ * that insn.
+ */
+ body = PATTERN(insn);
+
+ if (GET_CODE(body) == PARALLEL)
+ body = XVECEXP(body, 0, 0);
+
+ if (GET_CODE(body) != CALL)
+ continue;
+
+ /*
+ * Check the first operand of the call expression. It should
+ * be a mem RTX describing the needed subroutine with a
+ * symbol_ref RTX.
+ */
+ body = XEXP(body, 0);
+ if (GET_CODE(body) != MEM)
+ continue;
+
+ body = XEXP(body, 0);
+ if (GET_CODE(body) != SYMBOL_REF)
+ continue;
+
+ if (SYMBOL_REF_DECL(body) != track_function_decl)
+ continue;
+
+ /* Delete the stackleak_track_stack() call */
+ delete_insn_and_edges(insn);
+#if BUILDING_GCC_VERSION >= 4007 && BUILDING_GCC_VERSION < 8000
+ if (GET_CODE(next) == NOTE &&
+ NOTE_KIND(next) == NOTE_INSN_CALL_ARG_LOCATION) {
+ insn = next;
+ next = NEXT_INSN(insn);
+ delete_insn_and_edges(insn);
+ }
+#endif
+ }
+
+ return 0;
+}
+
+static bool stackleak_gate(void)
+{
+ tree section;
+
+ section = lookup_attribute("section",
+ DECL_ATTRIBUTES(current_function_decl));
+ if (section && TREE_VALUE(section)) {
+ section = TREE_VALUE(TREE_VALUE(section));
+
+ if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10))
+ return false;
+ if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13))
+ return false;
+ if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13))
+ return false;
+ if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13))
+ return false;
+ }
+
+ return track_frame_size >= 0;
+}
+
+/* Build the function declaration for stackleak_track_stack() */
+static void stackleak_start_unit(void *gcc_data __unused,
+ void *user_data __unused)
+{
+ tree fntype;
+
+ /* void stackleak_track_stack(void) */
+ fntype = build_function_type_list(void_type_node, NULL_TREE);
+ track_function_decl = build_fn_decl(track_function, fntype);
+ DECL_ASSEMBLER_NAME(track_function_decl); /* for LTO */
+ TREE_PUBLIC(track_function_decl) = 1;
+ TREE_USED(track_function_decl) = 1;
+ DECL_EXTERNAL(track_function_decl) = 1;
+ DECL_ARTIFICIAL(track_function_decl) = 1;
+ DECL_PRESERVE_P(track_function_decl) = 1;
+}
+
+/*
+ * Pass gate function is a predicate function that gets executed before the
+ * corresponding pass. If the return value is 'true' the pass gets executed,
+ * otherwise, it is skipped.
+ */
+static bool stackleak_instrument_gate(void)
+{
+ return stackleak_gate();
+}
+
+#define PASS_NAME stackleak_instrument
+#define PROPERTIES_REQUIRED PROP_gimple_leh | PROP_cfg
+#define TODO_FLAGS_START TODO_verify_ssa | TODO_verify_flow | TODO_verify_stmts
+#define TODO_FLAGS_FINISH TODO_verify_ssa | TODO_verify_stmts | TODO_dump_func \
+ | TODO_update_ssa | TODO_rebuild_cgraph_edges
+#include "gcc-generate-gimple-pass.h"
+
+static bool stackleak_cleanup_gate(void)
+{
+ return stackleak_gate();
+}
+
+#define PASS_NAME stackleak_cleanup
+#define TODO_FLAGS_FINISH TODO_dump_func
+#include "gcc-generate-rtl-pass.h"
+
+/*
+ * Every gcc plugin exports a plugin_init() function that is called right
+ * after the plugin is loaded. This function is responsible for registering
+ * the plugin callbacks and doing other required initialization.
+ */
+__visible int plugin_init(struct plugin_name_args *plugin_info,
+ struct plugin_gcc_version *version)
+{
+ const char * const plugin_name = plugin_info->base_name;
+ const int argc = plugin_info->argc;
+ const struct plugin_argument * const argv = plugin_info->argv;
+ int i = 0;
+
+ /* Extra GGC root tables describing our GTY-ed data */
+ static const struct ggc_root_tab gt_ggc_r_gt_stackleak[] = {
+ {
+ .base = &track_function_decl,
+ .nelt = 1,
+ .stride = sizeof(track_function_decl),
+ .cb = >_ggc_mx_tree_node,
+ .pchw = >_pch_nx_tree_node
+ },
+ LAST_GGC_ROOT_TAB
+ };
+
+ /*
+ * The stackleak_instrument pass should be executed before the
+ * "optimized" pass, which is the control flow graph cleanup that is
+ * performed just before expanding gcc trees to the RTL. In former
+ * versions of the plugin this new pass was inserted before the
+ * "tree_profile" pass, which is currently called "profile".
+ */
+ PASS_INFO(stackleak_instrument, "optimized", 1,
+ PASS_POS_INSERT_BEFORE);
+
+ /*
+ * The stackleak_cleanup pass should be executed after the
+ * "reload" pass, when the stack frame size is final.
+ */
+ PASS_INFO(stackleak_cleanup, "reload", 1, PASS_POS_INSERT_AFTER);
+
+ if (!plugin_default_version_check(version, &gcc_version)) {
+ error(G_("incompatible gcc/plugin versions"));
+ return 1;
+ }
+
+ /* Parse the plugin arguments */
+ for (i = 0; i < argc; i++) {
+ if (!strcmp(argv[i].key, "disable"))
+ return 0;
+
+ if (!strcmp(argv[i].key, "track-min-size")) {
+ if (!argv[i].value) {
+ error(G_("no value supplied for option '-fplugin-arg-%s-%s'"),
+ plugin_name, argv[i].key);
+ return 1;
+ }
+
+ track_frame_size = atoi(argv[i].value);
+ if (track_frame_size < 0) {
+ error(G_("invalid option argument '-fplugin-arg-%s-%s=%s'"),
+ plugin_name, argv[i].key, argv[i].value);
+ return 1;
+ }
+ } else {
+ error(G_("unknown option '-fplugin-arg-%s-%s'"),
+ plugin_name, argv[i].key);
+ return 1;
+ }
+ }
+
+ /* Give the information about the plugin */
+ register_callback(plugin_name, PLUGIN_INFO, NULL,
+ &stackleak_plugin_info);
+
+ /* Register to be called before processing a translation unit */
+ register_callback(plugin_name, PLUGIN_START_UNIT,
+ &stackleak_start_unit, NULL);
+
+ /* Register an extra GCC garbage collector (GGC) root table */
+ register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL,
+ (void *)>_ggc_r_gt_stackleak);
+
+ /*
+ * Hook into the Pass Manager to register new gcc passes.
+ *
+ * The stack frame size info is available only at the last RTL pass,
+ * when it's too late to insert complex code like a function call.
+ * So we register two gcc passes to instrument every function at first
+ * and remove the unneeded instrumentation later.
+ */
+ register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+ &stackleak_instrument_pass_info);
+ register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+ &stackleak_cleanup_pass_info);
+
+ return 0;
+}
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 67ed9f6..63b6092 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -68,21 +68,7 @@
$(simple-targets): $(obj)/conf
$< $(silent) --$@ $(Kconfig)
-PHONY += oldnoconfig silentoldconfig savedefconfig defconfig
-
-# oldnoconfig is an alias of olddefconfig, because people already are dependent
-# on its behavior (sets new symbols to their default value but not 'n') with the
-# counter-intuitive name.
-oldnoconfig: olddefconfig
- @echo " WARNING: \"oldnoconfig\" target will be removed after Linux 4.19"
- @echo " Please use \"olddefconfig\" instead, which is an alias."
-
-# We do not expect manual invokcation of "silentoldcofig" (or "syncconfig").
-silentoldconfig: syncconfig
- @echo " WARNING: \"silentoldconfig\" has been renamed to \"syncconfig\""
- @echo " and is now an internal implementation detail."
- @echo " What you want is probably \"oldconfig\"."
- @echo " \"silentoldconfig\" will be removed after Linux 4.19"
+PHONY += savedefconfig defconfig
savedefconfig: $(obj)/conf
$< $(silent) --$@=defconfig $(Kconfig)
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 7b2b372..98e0c7a 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -460,12 +460,6 @@ static struct option long_opts[] = {
{"randconfig", no_argument, NULL, randconfig},
{"listnewconfig", no_argument, NULL, listnewconfig},
{"olddefconfig", no_argument, NULL, olddefconfig},
- /*
- * oldnoconfig is an alias of olddefconfig, because people already
- * are dependent on its behavior(sets new symbols to their default
- * value but not 'n') with the counter-intuitive name.
- */
- {"oldnoconfig", no_argument, NULL, olddefconfig},
{NULL, 0, NULL, 0}
};
@@ -480,7 +474,6 @@ static void conf_usage(const char *progname)
printf(" --syncconfig Similar to oldconfig but generates configuration in\n"
" include/{generated/,config/}\n");
printf(" --olddefconfig Same as oldconfig but sets new symbols to their default value\n");
- printf(" --oldnoconfig An alias of olddefconfig\n");
printf(" --defconfig <file> New config with default defined in <file>\n");
printf(" --savedefconfig <file> Save the minimal current configuration to <file>\n");
printf(" --allnoconfig New config where all options are answered with no\n");
diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index 67d1314..da66e77 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -33,12 +33,15 @@
echo " -n use allnoconfig instead of alldefconfig"
echo " -r list redundant entries when merging fragments"
echo " -O dir to put generated output files. Consider setting \$KCONFIG_CONFIG instead."
+ echo
+ echo "Used prefix: '$CONFIG_PREFIX'. You can redefine it with \$CONFIG_ environment variable."
}
RUNMAKE=true
ALLTARGET=alldefconfig
WARNREDUN=false
OUTPUT=.
+CONFIG_PREFIX=${CONFIG_-CONFIG_}
while true; do
case $1 in
@@ -99,7 +102,8 @@
fi
MERGE_LIST=$*
-SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(CONFIG_[a-zA-Z0-9_]*\)[= ].*/\2/p"
+SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)[= ].*/\2/p"
+
TMP_FILE=$(mktemp ./.tmp.config.XXXXXXXXXX)
echo "Using $INITFILE as base"
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index e09fe4d..8963203 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1742,7 +1742,7 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry)
if (error)
return error;
- parent = aa_get_ns(dir->i_private);
+ parent = aa_get_ns(dir->i_private);
/* rmdir calls the generic securityfs functions to remove files
* from the apparmor dir. It is up to the apparmor ns locking
* to avoid races.
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 4285943f..d0afed9 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -496,7 +496,7 @@ static void update_file_ctx(struct aa_file_ctx *fctx, struct aa_label *label,
/* update caching of label on file_ctx */
spin_lock(&fctx->lock);
old = rcu_dereference_protected(fctx->label,
- spin_is_locked(&fctx->lock));
+ lockdep_is_held(&fctx->lock));
l = aa_label_merge(old, label, GFP_ATOMIC);
if (l) {
if (l != old) {
diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h
index e287b7d..265ae66 100644
--- a/security/apparmor/include/cred.h
+++ b/security/apparmor/include/cred.h
@@ -151,6 +151,8 @@ static inline struct aa_label *begin_current_label_crit_section(void)
{
struct aa_label *label = aa_current_raw_label();
+ might_sleep();
+
if (label_is_stale(label)) {
label = aa_get_newest_label(label);
if (aa_replace_current_label(label) == 0)
diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
index ec7228e..7334ac9 100644
--- a/security/apparmor/include/net.h
+++ b/security/apparmor/include/net.h
@@ -83,6 +83,13 @@ struct aa_sk_ctx {
__e; \
})
+struct aa_secmark {
+ u8 audit;
+ u8 deny;
+ u32 secid;
+ char *label;
+};
+
extern struct aa_sfs_entry aa_sfs_entry_network[];
void audit_net_cb(struct audit_buffer *ab, void *va);
@@ -103,4 +110,7 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk);
int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
struct socket *sock);
+int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
+ u32 secid, struct sock *sk);
+
#endif /* __AA_NET_H */
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index ab64c6b..8e6707c 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -155,6 +155,9 @@ struct aa_profile {
struct aa_rlimit rlimits;
+ int secmark_count;
+ struct aa_secmark *secmark;
+
struct aa_loaddata *rawdata;
unsigned char *hash;
char *dirname;
diff --git a/security/apparmor/include/secid.h b/security/apparmor/include/secid.h
index dee6fa3..fa20627 100644
--- a/security/apparmor/include/secid.h
+++ b/security/apparmor/include/secid.h
@@ -22,6 +22,9 @@ struct aa_label;
/* secid value that will not be allocated */
#define AA_SECID_INVALID 0
+/* secid value that matches any other secid */
+#define AA_SECID_WILDCARD 1
+
struct aa_label *aa_secid_to_label(u32 secid);
int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 974affe..76491e7 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -90,10 +90,12 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name,
const char *end = fqname + n;
const char *name = skipn_spaces(fqname, n);
- if (!name)
- return NULL;
*ns_name = NULL;
*ns_len = 0;
+
+ if (!name)
+ return NULL;
+
if (name[0] == ':') {
char *split = strnchr(&name[1], end - &name[1], ':');
*ns_name = skipn_spaces(&name[1], end - &name[1]);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index aa35939..42446a2 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -23,6 +23,8 @@
#include <linux/sysctl.h>
#include <linux/audit.h>
#include <linux/user_namespace.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
#include <net/sock.h>
#include "include/apparmor.h"
@@ -114,13 +116,13 @@ static int apparmor_ptrace_access_check(struct task_struct *child,
struct aa_label *tracer, *tracee;
int error;
- tracer = begin_current_label_crit_section();
+ tracer = __begin_current_label_crit_section();
tracee = aa_get_task_label(child);
error = aa_may_ptrace(tracer, tracee,
(mode & PTRACE_MODE_READ) ? AA_PTRACE_READ
: AA_PTRACE_TRACE);
aa_put_label(tracee);
- end_current_label_crit_section(tracer);
+ __end_current_label_crit_section(tracer);
return error;
}
@@ -130,11 +132,11 @@ static int apparmor_ptrace_traceme(struct task_struct *parent)
struct aa_label *tracer, *tracee;
int error;
- tracee = begin_current_label_crit_section();
+ tracee = __begin_current_label_crit_section();
tracer = aa_get_task_label(parent);
error = aa_may_ptrace(tracer, tracee, AA_PTRACE_TRACE);
aa_put_label(tracer);
- end_current_label_crit_section(tracee);
+ __end_current_label_crit_section(tracee);
return error;
}
@@ -1020,6 +1022,7 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
}
+#ifdef CONFIG_NETWORK_SECMARK
/**
* apparmor_socket_sock_recv_skb - check perms before associating skb to sk
*
@@ -1030,8 +1033,15 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
*/
static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
- return 0;
+ struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+ if (!skb->secmark)
+ return 0;
+
+ return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE,
+ skb->secmark, sk);
}
+#endif
static struct aa_label *sk_peer_label(struct sock *sk)
@@ -1126,6 +1136,20 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
ctx->label = aa_get_current_label();
}
+#ifdef CONFIG_NETWORK_SECMARK
+static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req)
+{
+ struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+ if (!skb->secmark)
+ return 0;
+
+ return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT,
+ skb->secmark, sk);
+}
+#endif
+
static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -1177,12 +1201,17 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
+#ifdef CONFIG_NETWORK_SECMARK
LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
+#endif
LSM_HOOK_INIT(socket_getpeersec_stream,
apparmor_socket_getpeersec_stream),
LSM_HOOK_INIT(socket_getpeersec_dgram,
apparmor_socket_getpeersec_dgram),
LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+#ifdef CONFIG_NETWORK_SECMARK
+ LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request),
+#endif
LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
LSM_HOOK_INIT(cred_free, apparmor_cred_free),
@@ -1538,6 +1567,97 @@ static inline int apparmor_init_sysctl(void)
}
#endif /* CONFIG_SYSCTL */
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK)
+static unsigned int apparmor_ip_postroute(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct aa_sk_ctx *ctx;
+ struct sock *sk;
+
+ if (!skb->secmark)
+ return NF_ACCEPT;
+
+ sk = skb_to_full_sk(skb);
+ if (sk == NULL)
+ return NF_ACCEPT;
+
+ ctx = SK_CTX(sk);
+ if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND,
+ skb->secmark, sk))
+ return NF_ACCEPT;
+
+ return NF_DROP_ERR(-ECONNREFUSED);
+
+}
+
+static unsigned int apparmor_ipv4_postroute(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return apparmor_ip_postroute(priv, skb, state);
+}
+
+static unsigned int apparmor_ipv6_postroute(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return apparmor_ip_postroute(priv, skb, state);
+}
+
+static const struct nf_hook_ops apparmor_nf_ops[] = {
+ {
+ .hook = apparmor_ipv4_postroute,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_SELINUX_FIRST,
+ },
+#if IS_ENABLED(CONFIG_IPV6)
+ {
+ .hook = apparmor_ipv6_postroute,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_SELINUX_FIRST,
+ },
+#endif
+};
+
+static int __net_init apparmor_nf_register(struct net *net)
+{
+ int ret;
+
+ ret = nf_register_net_hooks(net, apparmor_nf_ops,
+ ARRAY_SIZE(apparmor_nf_ops));
+ return ret;
+}
+
+static void __net_exit apparmor_nf_unregister(struct net *net)
+{
+ nf_unregister_net_hooks(net, apparmor_nf_ops,
+ ARRAY_SIZE(apparmor_nf_ops));
+}
+
+static struct pernet_operations apparmor_net_ops = {
+ .init = apparmor_nf_register,
+ .exit = apparmor_nf_unregister,
+};
+
+static int __init apparmor_nf_ip_init(void)
+{
+ int err;
+
+ if (!apparmor_enabled)
+ return 0;
+
+ err = register_pernet_subsys(&apparmor_net_ops);
+ if (err)
+ panic("Apparmor: register_pernet_subsys: error %d\n", err);
+
+ return 0;
+}
+__initcall(apparmor_nf_ip_init);
+#endif
+
static int __init apparmor_init(void)
{
int error;
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index bb24cfa..c07fde4 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -18,6 +18,7 @@
#include "include/label.h"
#include "include/net.h"
#include "include/policy.h"
+#include "include/secid.h"
#include "net_names.h"
@@ -146,17 +147,20 @@ int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request,
struct sock *sk)
{
- struct aa_profile *profile;
- DEFINE_AUDIT_SK(sa, op, sk);
+ int error = 0;
AA_BUG(!label);
AA_BUG(!sk);
- if (unconfined(label))
- return 0;
+ if (!unconfined(label)) {
+ struct aa_profile *profile;
+ DEFINE_AUDIT_SK(sa, op, sk);
- return fn_for_each_confined(label, profile,
- aa_profile_af_sk_perm(profile, &sa, request, sk));
+ error = fn_for_each_confined(label, profile,
+ aa_profile_af_sk_perm(profile, &sa, request, sk));
+ }
+
+ return error;
}
int aa_sk_perm(const char *op, u32 request, struct sock *sk)
@@ -185,3 +189,70 @@ int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
return aa_label_sk_perm(label, op, request, sock->sk);
}
+
+#ifdef CONFIG_NETWORK_SECMARK
+static int apparmor_secmark_init(struct aa_secmark *secmark)
+{
+ struct aa_label *label;
+
+ if (secmark->label[0] == '*') {
+ secmark->secid = AA_SECID_WILDCARD;
+ return 0;
+ }
+
+ label = aa_label_strn_parse(&root_ns->unconfined->label,
+ secmark->label, strlen(secmark->label),
+ GFP_ATOMIC, false, false);
+
+ if (IS_ERR(label))
+ return PTR_ERR(label);
+
+ secmark->secid = label->secid;
+
+ return 0;
+}
+
+static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid,
+ struct common_audit_data *sa, struct sock *sk)
+{
+ int i, ret;
+ struct aa_perms perms = { };
+
+ if (profile->secmark_count == 0)
+ return 0;
+
+ for (i = 0; i < profile->secmark_count; i++) {
+ if (!profile->secmark[i].secid) {
+ ret = apparmor_secmark_init(&profile->secmark[i]);
+ if (ret)
+ return ret;
+ }
+
+ if (profile->secmark[i].secid == secid ||
+ profile->secmark[i].secid == AA_SECID_WILDCARD) {
+ if (profile->secmark[i].deny)
+ perms.deny = ALL_PERMS_MASK;
+ else
+ perms.allow = ALL_PERMS_MASK;
+
+ if (profile->secmark[i].audit)
+ perms.audit = ALL_PERMS_MASK;
+ }
+ }
+
+ aa_apply_modes_to_perms(profile, &perms);
+
+ return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
+}
+
+int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
+ u32 secid, struct sock *sk)
+{
+ struct aa_profile *profile;
+ DEFINE_AUDIT_SK(sa, op, sk);
+
+ return fn_for_each_confined(label, profile,
+ aa_secmark_perm(profile, request, secid,
+ &sa, sk));
+}
+#endif
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 1590e2d..df9c589 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -231,6 +231,9 @@ void aa_free_profile(struct aa_profile *profile)
for (i = 0; i < profile->xattr_count; i++)
kzfree(profile->xattrs[i]);
kzfree(profile->xattrs);
+ for (i = 0; i < profile->secmark_count; i++)
+ kzfree(profile->secmark[i].label);
+ kzfree(profile->secmark);
kzfree(profile->dirname);
aa_put_dfa(profile->xmatch);
aa_put_dfa(profile->policy.dfa);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 21cb384..379682e 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -292,6 +292,19 @@ static bool unpack_nameX(struct aa_ext *e, enum aa_code code, const char *name)
return 0;
}
+static bool unpack_u8(struct aa_ext *e, u8 *data, const char *name)
+{
+ if (unpack_nameX(e, AA_U8, name)) {
+ if (!inbounds(e, sizeof(u8)))
+ return 0;
+ if (data)
+ *data = get_unaligned((u8 *)e->pos);
+ e->pos += sizeof(u8);
+ return 1;
+ }
+ return 0;
+}
+
static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
{
if (unpack_nameX(e, AA_U32, name)) {
@@ -529,6 +542,49 @@ static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile)
return 0;
}
+static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
+{
+ void *pos = e->pos;
+ int i, size;
+
+ if (unpack_nameX(e, AA_STRUCT, "secmark")) {
+ size = unpack_array(e, NULL);
+
+ profile->secmark = kcalloc(size, sizeof(struct aa_secmark),
+ GFP_KERNEL);
+ if (!profile->secmark)
+ goto fail;
+
+ profile->secmark_count = size;
+
+ for (i = 0; i < size; i++) {
+ if (!unpack_u8(e, &profile->secmark[i].audit, NULL))
+ goto fail;
+ if (!unpack_u8(e, &profile->secmark[i].deny, NULL))
+ goto fail;
+ if (!unpack_strdup(e, &profile->secmark[i].label, NULL))
+ goto fail;
+ }
+ if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+ goto fail;
+ if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+ goto fail;
+ }
+
+ return 1;
+
+fail:
+ if (profile->secmark) {
+ for (i = 0; i < size; i++)
+ kfree(profile->secmark[i].label);
+ kfree(profile->secmark);
+ profile->secmark_count = 0;
+ }
+
+ e->pos = pos;
+ return 0;
+}
+
static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
{
void *pos = e->pos;
@@ -727,6 +783,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
goto fail;
}
+ if (!unpack_secmark(e, profile)) {
+ info = "failed to unpack profile secmark rules";
+ goto fail;
+ }
+
if (unpack_nameX(e, AA_STRUCT, "policydb")) {
/* generic policy dfa - optional and may be NULL */
info = "failed to unpack policydb";
diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c
index 4ccec1b..05373d9 100644
--- a/security/apparmor/secid.c
+++ b/security/apparmor/secid.c
@@ -32,8 +32,7 @@
* secids - do not pin labels with a refcount. They rely on the label
* properly updating/freeing them
*/
-
-#define AA_FIRST_SECID 1
+#define AA_FIRST_SECID 2
static DEFINE_IDR(aa_secids);
static DEFINE_SPINLOCK(secid_lock);
diff --git a/security/keys/Makefile b/security/keys/Makefile
index ef1581b..9cef540 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -22,6 +22,7 @@
obj-$(CONFIG_SYSCTL) += sysctl.o
obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
obj-$(CONFIG_KEY_DH_OPERATIONS) += dh.o
+obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += keyctl_pkey.o
#
# Key types
diff --git a/security/keys/compat.c b/security/keys/compat.c
index e87c89c..9482df60 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -141,6 +141,24 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
return keyctl_restrict_keyring(arg2, compat_ptr(arg3),
compat_ptr(arg4));
+ case KEYCTL_PKEY_QUERY:
+ if (arg3 != 0)
+ return -EINVAL;
+ return keyctl_pkey_query(arg2,
+ compat_ptr(arg4),
+ compat_ptr(arg5));
+
+ case KEYCTL_PKEY_ENCRYPT:
+ case KEYCTL_PKEY_DECRYPT:
+ case KEYCTL_PKEY_SIGN:
+ return keyctl_pkey_e_d_s(option,
+ compat_ptr(arg2), compat_ptr(arg3),
+ compat_ptr(arg4), compat_ptr(arg5));
+
+ case KEYCTL_PKEY_VERIFY:
+ return keyctl_pkey_verify(compat_ptr(arg2), compat_ptr(arg3),
+ compat_ptr(arg4), compat_ptr(arg5));
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 9f8208d..74cb0ff 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -298,6 +298,45 @@ static inline long compat_keyctl_dh_compute(
#endif
#endif
+#ifdef CONFIG_ASYMMETRIC_KEY_TYPE
+extern long keyctl_pkey_query(key_serial_t,
+ const char __user *,
+ struct keyctl_pkey_query __user *);
+
+extern long keyctl_pkey_verify(const struct keyctl_pkey_params __user *,
+ const char __user *,
+ const void __user *, const void __user *);
+
+extern long keyctl_pkey_e_d_s(int,
+ const struct keyctl_pkey_params __user *,
+ const char __user *,
+ const void __user *, void __user *);
+#else
+static inline long keyctl_pkey_query(key_serial_t id,
+ const char __user *_info,
+ struct keyctl_pkey_query __user *_res)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline long keyctl_pkey_verify(const struct keyctl_pkey_params __user *params,
+ const char __user *_info,
+ const void __user *_in,
+ const void __user *_in2)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline long keyctl_pkey_e_d_s(int op,
+ const struct keyctl_pkey_params __user *params,
+ const char __user *_info,
+ const void __user *_in,
+ void __user *_out)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
/*
* Debugging key validation
*/
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 1ffe60b..1861969 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1747,6 +1747,30 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
(const char __user *) arg3,
(const char __user *) arg4);
+ case KEYCTL_PKEY_QUERY:
+ if (arg3 != 0)
+ return -EINVAL;
+ return keyctl_pkey_query((key_serial_t)arg2,
+ (const char __user *)arg4,
+ (struct keyctl_pkey_query *)arg5);
+
+ case KEYCTL_PKEY_ENCRYPT:
+ case KEYCTL_PKEY_DECRYPT:
+ case KEYCTL_PKEY_SIGN:
+ return keyctl_pkey_e_d_s(
+ option,
+ (const struct keyctl_pkey_params __user *)arg2,
+ (const char __user *)arg3,
+ (const void __user *)arg4,
+ (void __user *)arg5);
+
+ case KEYCTL_PKEY_VERIFY:
+ return keyctl_pkey_verify(
+ (const struct keyctl_pkey_params __user *)arg2,
+ (const char __user *)arg3,
+ (const void __user *)arg4,
+ (const void __user *)arg5);
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
new file mode 100644
index 0000000..7839788
--- /dev/null
+++ b/security/keys/keyctl_pkey.c
@@ -0,0 +1,323 @@
+/* Public-key operation keyctls
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/parser.h>
+#include <linux/uaccess.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+static void keyctl_pkey_params_free(struct kernel_pkey_params *params)
+{
+ kfree(params->info);
+ key_put(params->key);
+}
+
+enum {
+ Opt_err = -1,
+ Opt_enc, /* "enc=<encoding>" eg. "enc=oaep" */
+ Opt_hash, /* "hash=<digest-name>" eg. "hash=sha1" */
+};
+
+static const match_table_t param_keys = {
+ { Opt_enc, "enc=%s" },
+ { Opt_hash, "hash=%s" },
+ { Opt_err, NULL }
+};
+
+/*
+ * Parse the information string which consists of key=val pairs.
+ */
+static int keyctl_pkey_params_parse(struct kernel_pkey_params *params)
+{
+ unsigned long token_mask = 0;
+ substring_t args[MAX_OPT_ARGS];
+ char *c = params->info, *p, *q;
+ int token;
+
+ while ((p = strsep(&c, " \t"))) {
+ if (*p == '\0' || *p == ' ' || *p == '\t')
+ continue;
+ token = match_token(p, param_keys, args);
+ if (__test_and_set_bit(token, &token_mask))
+ return -EINVAL;
+ q = args[0].from;
+ if (!q[0])
+ return -EINVAL;
+
+ switch (token) {
+ case Opt_enc:
+ params->encoding = q;
+ break;
+
+ case Opt_hash:
+ params->hash_algo = q;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Interpret parameters. Callers must always call the free function
+ * on params, even if an error is returned.
+ */
+static int keyctl_pkey_params_get(key_serial_t id,
+ const char __user *_info,
+ struct kernel_pkey_params *params)
+{
+ key_ref_t key_ref;
+ void *p;
+ int ret;
+
+ memset(params, 0, sizeof(*params));
+ params->encoding = "raw";
+
+ p = strndup_user(_info, PAGE_SIZE);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+ params->info = p;
+
+ ret = keyctl_pkey_params_parse(params);
+ if (ret < 0)
+ return ret;
+
+ key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
+ if (IS_ERR(key_ref))
+ return PTR_ERR(key_ref);
+ params->key = key_ref_to_ptr(key_ref);
+
+ if (!params->key->type->asym_query)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+/*
+ * Get parameters from userspace. Callers must always call the free function
+ * on params, even if an error is returned.
+ */
+static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_params,
+ const char __user *_info,
+ int op,
+ struct kernel_pkey_params *params)
+{
+ struct keyctl_pkey_params uparams;
+ struct kernel_pkey_query info;
+ int ret;
+
+ memset(params, 0, sizeof(*params));
+ params->encoding = "raw";
+
+ if (copy_from_user(&uparams, _params, sizeof(uparams)) != 0)
+ return -EFAULT;
+
+ ret = keyctl_pkey_params_get(uparams.key_id, _info, params);
+ if (ret < 0)
+ return ret;
+
+ ret = params->key->type->asym_query(params, &info);
+ if (ret < 0)
+ return ret;
+
+ switch (op) {
+ case KEYCTL_PKEY_ENCRYPT:
+ case KEYCTL_PKEY_DECRYPT:
+ if (uparams.in_len > info.max_enc_size ||
+ uparams.out_len > info.max_dec_size)
+ return -EINVAL;
+ break;
+ case KEYCTL_PKEY_SIGN:
+ case KEYCTL_PKEY_VERIFY:
+ if (uparams.in_len > info.max_sig_size ||
+ uparams.out_len > info.max_data_size)
+ return -EINVAL;
+ break;
+ default:
+ BUG();
+ }
+
+ params->in_len = uparams.in_len;
+ params->out_len = uparams.out_len;
+ return 0;
+}
+
+/*
+ * Query information about an asymmetric key.
+ */
+long keyctl_pkey_query(key_serial_t id,
+ const char __user *_info,
+ struct keyctl_pkey_query __user *_res)
+{
+ struct kernel_pkey_params params;
+ struct kernel_pkey_query res;
+ long ret;
+
+ memset(¶ms, 0, sizeof(params));
+
+ ret = keyctl_pkey_params_get(id, _info, ¶ms);
+ if (ret < 0)
+ goto error;
+
+ ret = params.key->type->asym_query(¶ms, &res);
+ if (ret < 0)
+ goto error;
+
+ ret = -EFAULT;
+ if (copy_to_user(_res, &res, sizeof(res)) == 0 &&
+ clear_user(_res->__spare, sizeof(_res->__spare)) == 0)
+ ret = 0;
+
+error:
+ keyctl_pkey_params_free(¶ms);
+ return ret;
+}
+
+/*
+ * Encrypt/decrypt/sign
+ *
+ * Encrypt data, decrypt data or sign data using a public key.
+ *
+ * _info is a string of supplementary information in key=val format. For
+ * instance, it might contain:
+ *
+ * "enc=pkcs1 hash=sha256"
+ *
+ * where enc= specifies the encoding and hash= selects the OID to go in that
+ * particular encoding if required. If enc= isn't supplied, it's assumed that
+ * the caller is supplying raw values.
+ *
+ * If successful, the amount of data written into the output buffer is
+ * returned.
+ */
+long keyctl_pkey_e_d_s(int op,
+ const struct keyctl_pkey_params __user *_params,
+ const char __user *_info,
+ const void __user *_in,
+ void __user *_out)
+{
+ struct kernel_pkey_params params;
+ void *in, *out;
+ long ret;
+
+ ret = keyctl_pkey_params_get_2(_params, _info, op, ¶ms);
+ if (ret < 0)
+ goto error_params;
+
+ ret = -EOPNOTSUPP;
+ if (!params.key->type->asym_eds_op)
+ goto error_params;
+
+ switch (op) {
+ case KEYCTL_PKEY_ENCRYPT:
+ params.op = kernel_pkey_encrypt;
+ break;
+ case KEYCTL_PKEY_DECRYPT:
+ params.op = kernel_pkey_decrypt;
+ break;
+ case KEYCTL_PKEY_SIGN:
+ params.op = kernel_pkey_sign;
+ break;
+ default:
+ BUG();
+ }
+
+ in = memdup_user(_in, params.in_len);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto error_params;
+ }
+
+ ret = -ENOMEM;
+ out = kmalloc(params.out_len, GFP_KERNEL);
+ if (!out)
+ goto error_in;
+
+ ret = params.key->type->asym_eds_op(¶ms, in, out);
+ if (ret < 0)
+ goto error_out;
+
+ if (copy_to_user(_out, out, ret) != 0)
+ ret = -EFAULT;
+
+error_out:
+ kfree(out);
+error_in:
+ kfree(in);
+error_params:
+ keyctl_pkey_params_free(¶ms);
+ return ret;
+}
+
+/*
+ * Verify a signature.
+ *
+ * Verify a public key signature using the given key, or if not given, search
+ * for a matching key.
+ *
+ * _info is a string of supplementary information in key=val format. For
+ * instance, it might contain:
+ *
+ * "enc=pkcs1 hash=sha256"
+ *
+ * where enc= specifies the signature blob encoding and hash= selects the OID
+ * to go in that particular encoding. If enc= isn't supplied, it's assumed
+ * that the caller is supplying raw values.
+ *
+ * If successful, 0 is returned.
+ */
+long keyctl_pkey_verify(const struct keyctl_pkey_params __user *_params,
+ const char __user *_info,
+ const void __user *_in,
+ const void __user *_in2)
+{
+ struct kernel_pkey_params params;
+ void *in, *in2;
+ long ret;
+
+ ret = keyctl_pkey_params_get_2(_params, _info, KEYCTL_PKEY_VERIFY,
+ ¶ms);
+ if (ret < 0)
+ goto error_params;
+
+ ret = -EOPNOTSUPP;
+ if (!params.key->type->asym_verify_signature)
+ goto error_params;
+
+ in = memdup_user(_in, params.in_len);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto error_params;
+ }
+
+ in2 = memdup_user(_in2, params.in2_len);
+ if (IS_ERR(in2)) {
+ ret = PTR_ERR(in2);
+ goto error_in;
+ }
+
+ params.op = kernel_pkey_verify;
+ ret = params.key->type->asym_verify_signature(¶ms, in, in2);
+
+ kfree(in2);
+error_in:
+ kfree(in);
+error_params:
+ keyctl_pkey_params_free(¶ms);
+ return ret;
+}
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index b69d3b1..ff67893 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -30,7 +30,7 @@
#include <linux/tpm.h>
#include <linux/tpm_command.h>
-#include "trusted.h"
+#include <keys/trusted.h>
static const char hmac_alg[] = "hmac(sha1)";
static const char hash_alg[] = "sha1";
@@ -121,7 +121,7 @@ static int TSS_rawhmac(unsigned char *digest, const unsigned char *key,
/*
* calculate authorization info fields to send to TPM
*/
-static int TSS_authhmac(unsigned char *digest, const unsigned char *key,
+int TSS_authhmac(unsigned char *digest, const unsigned char *key,
unsigned int keylen, unsigned char *h1,
unsigned char *h2, unsigned char h3, ...)
{
@@ -168,11 +168,12 @@ static int TSS_authhmac(unsigned char *digest, const unsigned char *key,
kzfree(sdesc);
return ret;
}
+EXPORT_SYMBOL_GPL(TSS_authhmac);
/*
* verify the AUTH1_COMMAND (Seal) result from TPM
*/
-static int TSS_checkhmac1(unsigned char *buffer,
+int TSS_checkhmac1(unsigned char *buffer,
const uint32_t command,
const unsigned char *ononce,
const unsigned char *key,
@@ -249,6 +250,7 @@ static int TSS_checkhmac1(unsigned char *buffer,
kzfree(sdesc);
return ret;
}
+EXPORT_SYMBOL_GPL(TSS_checkhmac1);
/*
* verify the AUTH2_COMMAND (unseal) result from TPM
@@ -355,7 +357,7 @@ static int TSS_checkhmac2(unsigned char *buffer,
* For key specific tpm requests, we will generate and send our
* own TPM command packets using the drivers send function.
*/
-static int trusted_tpm_send(unsigned char *cmd, size_t buflen)
+int trusted_tpm_send(unsigned char *cmd, size_t buflen)
{
int rc;
@@ -367,6 +369,7 @@ static int trusted_tpm_send(unsigned char *cmd, size_t buflen)
rc = -EPERM;
return rc;
}
+EXPORT_SYMBOL_GPL(trusted_tpm_send);
/*
* Lock a trusted key, by extending a selected PCR.
@@ -425,7 +428,7 @@ static int osap(struct tpm_buf *tb, struct osapsess *s,
/*
* Create an object independent authorisation protocol (oiap) session
*/
-static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
+int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
{
int ret;
@@ -442,6 +445,7 @@ static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
TPM_NONCE_SIZE);
return 0;
}
+EXPORT_SYMBOL_GPL(oiap);
struct tpm_digests {
unsigned char encauth[SHA1_DIGEST_SIZE];
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index fcd965f..9be76c8 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -146,53 +146,22 @@ static int apply_constraint_to_size(struct snd_pcm_hw_params *params,
struct snd_interval *s = hw_param_interval(params, rule->var);
const struct snd_interval *r =
hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE);
- struct snd_interval t = {
- .min = s->min, .max = s->max, .integer = 1,
- };
+ struct snd_interval t = {0};
+ unsigned int step = 0;
int i;
for (i = 0; i < CIP_SFC_COUNT; ++i) {
- unsigned int rate = amdtp_rate_table[i];
- unsigned int step = amdtp_syt_intervals[i];
-
- if (!snd_interval_test(r, rate))
- continue;
-
- t.min = roundup(t.min, step);
- t.max = rounddown(t.max, step);
+ if (snd_interval_test(r, amdtp_rate_table[i]))
+ step = max(step, amdtp_syt_intervals[i]);
}
- if (snd_interval_checkempty(&t))
- return -EINVAL;
+ t.min = roundup(s->min, step);
+ t.max = rounddown(s->max, step);
+ t.integer = 1;
return snd_interval_refine(s, &t);
}
-static int apply_constraint_to_rate(struct snd_pcm_hw_params *params,
- struct snd_pcm_hw_rule *rule)
-{
- struct snd_interval *r =
- hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
- const struct snd_interval *s = hw_param_interval_c(params, rule->deps[0]);
- struct snd_interval t = {
- .min = UINT_MAX, .max = 0, .integer = 1,
- };
- int i;
-
- for (i = 0; i < CIP_SFC_COUNT; ++i) {
- unsigned int step = amdtp_syt_intervals[i];
- unsigned int rate = amdtp_rate_table[i];
-
- if (s->min % step || s->max % step)
- continue;
-
- t.min = min(t.min, rate);
- t.max = max(t.max, rate);
- }
-
- return snd_interval_refine(r, &t);
-}
-
/**
* amdtp_stream_add_pcm_hw_constraints - add hw constraints for PCM substream
* @s: the AMDTP stream, which must be initialized.
@@ -250,24 +219,16 @@ int amdtp_stream_add_pcm_hw_constraints(struct amdtp_stream *s,
*/
err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
apply_constraint_to_size, NULL,
+ SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
SNDRV_PCM_HW_PARAM_RATE, -1);
if (err < 0)
goto end;
- err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
- apply_constraint_to_rate, NULL,
- SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
- if (err < 0)
- goto end;
err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
apply_constraint_to_size, NULL,
+ SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
SNDRV_PCM_HW_PARAM_RATE, -1);
if (err < 0)
goto end;
- err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
- apply_constraint_to_rate, NULL,
- SNDRV_PCM_HW_PARAM_BUFFER_SIZE, -1);
- if (err < 0)
- goto end;
end:
return err;
}
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 0f6dbcf..ed50b22 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -240,8 +240,8 @@ static void dice_remove(struct fw_unit *unit)
cancel_delayed_work_sync(&dice->dwork);
if (dice->registered) {
- /* No need to wait for releasing card object in this context. */
- snd_card_free_when_closed(dice->card);
+ // Block till all of ALSA character devices are released.
+ snd_card_free(dice->card);
}
mutex_destroy(&dice->mutex);
diff --git a/sound/pci/ca0106/ca0106.h b/sound/pci/ca0106/ca0106.h
index 04402c1..9847b66 100644
--- a/sound/pci/ca0106/ca0106.h
+++ b/sound/pci/ca0106/ca0106.h
@@ -582,7 +582,7 @@
#define SPI_PL_BIT_R_R (2<<7) /* right channel = right */
#define SPI_PL_BIT_R_C (3<<7) /* right channel = (L+R)/2 */
#define SPI_IZD_REG 2
-#define SPI_IZD_BIT (1<<4) /* infinite zero detect */
+#define SPI_IZD_BIT (0<<4) /* infinite zero detect */
#define SPI_FMT_REG 3
#define SPI_FMT_BIT_RJ (0<<0) /* right justified mode */
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
index 5072cbd..dae1584 100644
--- a/tools/arch/arm64/include/uapi/asm/unistd.h
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -16,5 +16,6 @@
*/
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
#include <asm-generic/unistd.h>
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 1b32b56..8c876c1 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 9a50f02..16511d9 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5
/* kvm attributes for migration mode */
#define KVM_S390_VM_MIGRATION_STOP 0
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 8a6eff9..dabfcf7 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -300,10 +300,7 @@ struct kvm_vcpu_events {
__u8 injected;
__u8 nr;
__u8 has_error_code;
- union {
- __u8 pad;
- __u8 pending;
- };
+ __u8 pending;
__u32 error_code;
} exception;
struct {
@@ -387,6 +384,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
+#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index df4bedb..538546e 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -242,10 +242,12 @@ __SYSCALL(__NR_tee, sys_tee)
/* fs/stat.c */
#define __NR_readlinkat 78
__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
#define __NR3264_fstatat 79
__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
#define __NR3264_fstat 80
__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+#endif
/* fs/sync.c */
#define __NR_sync 81
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
new file mode 100644
index 0000000..a441ea1
--- /dev/null
+++ b/tools/include/uapi/linux/fs.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FS_H
+#define _UAPI_LINUX_FS_H
+
+/*
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's. Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
+ */
+
+#include <linux/limits.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
+ * the file limit at runtime and only root can increase the per-process
+ * nr_file rlimit, so it's safe to set up a ridiculously high absolute
+ * upper limit on files-per-process.
+ *
+ * Some programs (notably those using select()) may have to be
+ * recompiled to take full advantage of the new limits..
+ */
+
+/* Fixed constants first: */
+#undef NR_OPEN
+#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */
+
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+#define SEEK_SET 0 /* seek relative to beginning of file */
+#define SEEK_CUR 1 /* seek relative to current file position */
+#define SEEK_END 2 /* seek relative to end of file */
+#define SEEK_DATA 3 /* seek to the next data */
+#define SEEK_HOLE 4 /* seek to the next hole */
+#define SEEK_MAX SEEK_HOLE
+
+#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
+#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
+#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
+
+struct file_clone_range {
+ __s64 src_fd;
+ __u64 src_offset;
+ __u64 src_length;
+ __u64 dest_offset;
+};
+
+struct fstrim_range {
+ __u64 start;
+ __u64 len;
+ __u64 minlen;
+};
+
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME 0
+#define FILE_DEDUPE_RANGE_DIFFERS 1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+ __s64 dest_fd; /* in - destination file */
+ __u64 dest_offset; /* in - start of extent in destination */
+ __u64 bytes_deduped; /* out - total # of bytes we were able
+ * to dedupe from this file. */
+ /* status of this dedupe operation:
+ * < 0 for error
+ * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+ * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+ */
+ __s32 status; /* out - see above description */
+ __u32 reserved; /* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+ __u64 src_offset; /* in - start of extent in source */
+ __u64 src_length; /* in - length of extent */
+ __u16 dest_count; /* in - total elements in info array */
+ __u16 reserved1; /* must be zero */
+ __u32 reserved2; /* must be zero */
+ struct file_dedupe_range_info info[0];
+};
+
+/* And dynamically-tunable limits and defaults: */
+struct files_stat_struct {
+ unsigned long nr_files; /* read only */
+ unsigned long nr_free_files; /* read only */
+ unsigned long max_files; /* tunable */
+};
+
+struct inodes_stat_t {
+ long nr_inodes;
+ long nr_unused;
+ long dummy[5]; /* padding for sysctl ABI compatibility */
+};
+
+
+#define NR_FILE 8192 /* this can well be larger on a larger system */
+
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ */
+#define MS_RDONLY 1 /* Mount read-only */
+#define MS_NOSUID 2 /* Ignore suid and sgid bits */
+#define MS_NODEV 4 /* Disallow access to device special files */
+#define MS_NOEXEC 8 /* Disallow program execution */
+#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
+#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
+#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
+#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
+#define MS_NOATIME 1024 /* Do not update access times. */
+#define MS_NODIRATIME 2048 /* Do not update directory access times */
+#define MS_BIND 4096
+#define MS_MOVE 8192
+#define MS_REC 16384
+#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
+ MS_VERBOSE is deprecated. */
+#define MS_SILENT 32768
+#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
+#define MS_UNBINDABLE (1<<17) /* change to unbindable */
+#define MS_PRIVATE (1<<18) /* change to private */
+#define MS_SLAVE (1<<19) /* change to slave */
+#define MS_SHARED (1<<20) /* change to shared */
+#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION (1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT (1<<26)
+#define MS_NOREMOTELOCK (1<<27)
+#define MS_NOSEC (1<<28)
+#define MS_BORN (1<<29)
+#define MS_ACTIVE (1<<30)
+#define MS_NOUSER (1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+ MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+ __u32 fsx_xflags; /* xflags field value (get/set) */
+ __u32 fsx_extsize; /* extsize field value (get/set)*/
+ __u32 fsx_nextents; /* nextents field value (get) */
+ __u32 fsx_projid; /* project identifier (get/set) */
+ __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/
+ unsigned char fsx_pad[8];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
+#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
+#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
+#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
+#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
+#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
+#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
+#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
+#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
+#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
+#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
+
+/* the read-only stuff doesn't really belong here, but any other place is
+ probably as bad and I don't want to create yet another include file. */
+
+#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */
+#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */
+#define BLKRRPART _IO(0x12,95) /* re-read partition table */
+#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
+#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
+#define BLKRASET _IO(0x12,98) /* set read ahead for block device */
+#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */
+#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
+#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
+#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
+#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
+#define BLKSSZGET _IO(0x12,104)/* get block device sector size */
+#if 0
+#define BLKPG _IO(0x12,105)/* See blkpg.h */
+
+/* Some people are morons. Do not use sizeof! */
+
+#define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */
+#define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */
+/* This was here just to show that the number is taken -
+ probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
+#endif
+/* A jump here: 108-111 have been used for various private purposes. */
+#define BLKBSZGET _IOR(0x12,112,size_t)
+#define BLKBSZSET _IOW(0x12,113,size_t)
+#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
+#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
+
+#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
+#define FIBMAP _IO(0x00,1) /* bmap access */
+#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
+#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
+#define FITHAW _IOWR('X', 120, int) /* Thaw */
+#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
+#define FICLONE _IOW(0x94, 9, int)
+#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range)
+
+#define FSLABEL_MAX 256 /* Max chars for the interface; each fs may differ */
+
+#define FS_IOC_GETFLAGS _IOR('f', 1, long)
+#define FS_IOC_SETFLAGS _IOW('f', 2, long)
+#define FS_IOC_GETVERSION _IOR('v', 1, long)
+#define FS_IOC_SETVERSION _IOW('v', 2, long)
+#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
+#define FS_IOC32_GETFLAGS _IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
+#define FS_IOC32_GETVERSION _IOR('v', 1, int)
+#define FS_IOC32_SETVERSION _IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
+#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
+#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
+
+/*
+ * File system encryption support
+ */
+/* Policy provided via an ioctl on the topmost directory */
+#define FS_KEY_DESCRIPTOR_SIZE 8
+
+#define FS_POLICY_FLAGS_PAD_4 0x00
+#define FS_POLICY_FLAGS_PAD_8 0x01
+#define FS_POLICY_FLAGS_PAD_16 0x02
+#define FS_POLICY_FLAGS_PAD_32 0x03
+#define FS_POLICY_FLAGS_PAD_MASK 0x03
+#define FS_POLICY_FLAGS_VALID 0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID 0
+#define FS_ENCRYPTION_MODE_AES_256_XTS 1
+#define FS_ENCRYPTION_MODE_AES_256_GCM 2
+#define FS_ENCRYPTION_MODE_AES_256_CBC 3
+#define FS_ENCRYPTION_MODE_AES_256_CTS 4
+#define FS_ENCRYPTION_MODE_AES_128_CBC 5
+#define FS_ENCRYPTION_MODE_AES_128_CTS 6
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */
+
+struct fscrypt_policy {
+ __u8 version;
+ __u8 contents_encryption_mode;
+ __u8 filenames_encryption_mode;
+ __u8 flags;
+ __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
+
+/* Parameters for passing an encryption key into the kernel keyring */
+#define FS_KEY_DESC_PREFIX "fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE 8
+
+/* Structure that userspace passes to the kernel keyring */
+#define FS_MAX_KEY_SIZE 64
+
+struct fscrypt_key {
+ __u32 mode;
+ __u8 raw[FS_MAX_KEY_SIZE];
+ __u32 size;
+};
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs). You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are
+ * almost out of 32-bit flags. :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface. This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
+ */
+#define FS_SECRM_FL 0x00000001 /* Secure deletion */
+#define FS_UNRM_FL 0x00000002 /* Undelete */
+#define FS_COMPR_FL 0x00000004 /* Compress file */
+#define FS_SYNC_FL 0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define FS_APPEND_FL 0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL 0x00000040 /* do not dump file */
+#define FS_NOATIME_FL 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL 0x00000100
+#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL 0x00000400 /* Don't compress */
+/* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */
+#define FS_BTREE_FL 0x00001000 /* btree format dir */
+#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL 0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
+#define FS_EXTENT_FL 0x00080000 /* Extents */
+#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
+#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
+#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
+#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+
+
+#define SYNC_FILE_RANGE_WAIT_BEFORE 1
+#define SYNC_FILE_RANGE_WRITE 2
+#define SYNC_FILE_RANGE_WAIT_AFTER 4
+
+/*
+ * Flags for preadv2/pwritev2:
+ */
+
+typedef int __bitwise __kernel_rwf_t;
+
+/* high priority request, poll if possible */
+#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001)
+
+/* per-IO O_DSYNC */
+#define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002)
+
+/* per-IO O_SYNC */
+#define RWF_SYNC ((__force __kernel_rwf_t)0x00000004)
+
+/* per-IO, return -EAGAIN if operation would block */
+#define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008)
+
+/* per-IO O_APPEND */
+#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010)
+
+/* mask of flags supported by the kernel */
+#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+ RWF_APPEND)
+
+#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 58faab8..1debfa4 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -287,6 +287,7 @@ enum {
IFLA_BR_MCAST_STATS_ENABLED,
IFLA_BR_MCAST_IGMP_VERSION,
IFLA_BR_MCAST_MLD_VERSION,
+ IFLA_BR_VLAN_STATS_PER_PORT,
__IFLA_BR_MAX,
};
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 2875ce8..2b7a652 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -420,13 +420,19 @@ struct kvm_run {
struct kvm_coalesced_mmio_zone {
__u64 addr;
__u32 size;
- __u32 pad;
+ union {
+ __u32 pad;
+ __u32 pio;
+ };
};
struct kvm_coalesced_mmio {
__u64 phys_addr;
__u32 len;
- __u32 pad;
+ union {
+ __u32 pad;
+ __u32 pio;
+ };
__u8 data[8];
};
@@ -752,6 +758,15 @@ struct kvm_ppc_resize_hpt {
#define KVM_S390_SIE_PAGE_OFFSET 1
/*
+ * On arm64, machine type can be used to request the physical
+ * address size for the VM. Bits[7-0] are reserved for the guest
+ * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
+ * value 0 implies the default IPA size, 40bits.
+ */
+#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL
+#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \
+ ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+/*
* ioctls for /dev/kvm fds:
*/
#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
@@ -958,6 +973,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_HYPERV_SEND_IPI 161
#define KVM_CAP_COALESCED_PIO 162
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
+#define KVM_CAP_EXCEPTION_PAYLOAD 164
+#define KVM_CAP_ARM_VM_IPA_SIZE 165
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index bfd5938..d0f515d 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -28,7 +28,9 @@
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h
index 776bc92..486ed1f 100644
--- a/tools/include/uapi/linux/netlink.h
+++ b/tools/include/uapi/linux/netlink.h
@@ -155,6 +155,7 @@ enum nlmsgerr_attrs {
#define NETLINK_LIST_MEMBERSHIPS 9
#define NETLINK_CAP_ACK 10
#define NETLINK_EXT_ACK 11
+#define NETLINK_DUMP_STRICT_CHK 12
struct nl_pktinfo {
__u32 group;
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index f35eb72..9de8780 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -646,10 +646,12 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events
* PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event
+ * PERF_RECORD_MISC_FORK_EXEC - PERF_RECORD_FORK event (perf internal)
* PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
*/
#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC (1 << 13)
#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
/*
* These PERF_RECORD_MISC_* flags below are safely reused
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index ed0a120..404d4b9 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -752,7 +752,7 @@ struct snd_timer_info {
#define SNDRV_TIMER_PSFLG_EARLY_EVENT (1<<2) /* write early event to the poll queue */
struct snd_timer_params {
- unsigned int flags; /* flags - SNDRV_MIXER_PSFLG_* */
+ unsigned int flags; /* flags - SNDRV_TIMER_PSFLG_* */
unsigned int ticks; /* requested resolution in ticks */
unsigned int queue_size; /* total size of queue (32-1024) */
unsigned int reserved0; /* reserved, was: failure locations */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b607be7..d6e62e9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2084,19 +2084,19 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
prog->expected_attach_type = type;
}
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, atype) \
- { string, sizeof(string) - 1, ptype, eatype, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
+ { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
/* Programs that can NOT be attached. */
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, -EINVAL)
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
/* Programs that can be attached. */
#define BPF_APROG_SEC(string, ptype, atype) \
- BPF_PROG_SEC_IMPL(string, ptype, 0, atype)
+ BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
/* Programs that must specify expected attach type at load time. */
#define BPF_EAPROG_SEC(string, ptype, eatype) \
- BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype)
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
/* Programs that can be attached but attach type can't be identified by section
* name. Kept for backward compatibility.
@@ -2108,6 +2108,7 @@ static const struct {
size_t len;
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
+ int is_attachable;
enum bpf_attach_type attach_type;
} section_names[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
@@ -2198,7 +2199,7 @@ int libbpf_attach_type_by_name(const char *name,
for (i = 0; i < ARRAY_SIZE(section_names); i++) {
if (strncmp(name, section_names[i].sec, section_names[i].len))
continue;
- if (section_names[i].attach_type == -EINVAL)
+ if (!section_names[i].is_attachable)
return -EINVAL;
*attach_type = section_names[i].attach_type;
return 0;
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index cb7154e..dbb9efb 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
+ case OPTION_ULONG:
case OPTION_U64:
default:
break;
@@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
+ case OPTION_ULONG:
case OPTION_U64:
default:
break;
@@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
return opterror(opt, "expects a numerical value", flags);
return 0;
+ case OPTION_ULONG:
+ if (unset) {
+ *(unsigned long *)opt->value = 0;
+ return 0;
+ }
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+ *(unsigned long *)opt->value = opt->defval;
+ return 0;
+ }
+ if (get_arg(p, opt, flags, &arg))
+ return -1;
+ *(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
+ if (*s)
+ return opterror(opt, "expects a numerical value", flags);
+ return 0;
+
case OPTION_U64:
if (unset) {
*(u64 *)opt->value = 0;
@@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
case OPTION_ARGUMENT:
break;
case OPTION_LONG:
+ case OPTION_ULONG:
case OPTION_U64:
case OPTION_INTEGER:
case OPTION_UINTEGER:
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 92fdbe1..6ca2a8b 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -25,6 +25,7 @@ enum parse_opt_type {
OPTION_STRING,
OPTION_INTEGER,
OPTION_LONG,
+ OPTION_ULONG,
OPTION_CALLBACK,
OPTION_U64,
OPTION_UINTEGER,
@@ -133,6 +134,7 @@ struct option {
#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt
new file mode 100644
index 0000000..6222c1e
--- /dev/null
+++ b/tools/perf/Documentation/build-xed.txt
@@ -0,0 +1,19 @@
+
+For --xed the xed tool is needed. Here is how to install it:
+
+ $ git clone https://github.com/intelxed/mbuild.git mbuild
+ $ git clone https://github.com/intelxed/xed
+ $ cd xed
+ $ ./mfile.py --share
+ $ ./mfile.py examples
+ $ sudo ./mfile.py --prefix=/usr/local install
+ $ sudo ldconfig
+ $ sudo cp obj/examples/xed /usr/local/bin
+
+Basic xed testing:
+
+ $ xed | head -3
+ ERROR: required argument(s) were missing
+ Copyright (C) 2017, Intel Corporation. All rights reserved.
+ XED version: [v10.0-328-g7d62c8c49b7b]
+ $
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 76971d2..115eaac 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -106,7 +106,7 @@
While it is possible to create scripts to analyze the data, an alternative
approach is available to export the data to a sqlite or postgresql database.
Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script call-graph-from-sql.py for an example of using the database.
+and to script exported-sql-viewer.py for an example of using the database.
There is also script intel-pt-events.py which provides an example of how to
unpack the raw data for power events and PTWRITE.
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index a3abe04..c2182cb 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -11,10 +11,11 @@
l synthesize last branch entries (use with i or x)
s skip initial number of events
- The default is all events i.e. the same as --itrace=ibxwpe
+ The default is all events i.e. the same as --itrace=ibxwpe,
+ except for perf script where it is --itrace=ce
- In addition, the period (default 100000) for instructions events
- can be specified in units of:
+ In addition, the period (default 100000, except for perf script where it is 1)
+ for instructions events can be specified in units of:
i instructions
t ticks
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index afdafe2..a2b37ce 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -383,6 +383,24 @@
will be printed. Each entry has function name and file/line. Enabled by
default, disable with --no-inline.
+--insn-trace::
+ Show instruction stream for intel_pt traces. Combine with --xed to
+ show disassembly.
+
+--xed::
+ Run xed disassembler on output. Requires installing the xed disassembler.
+
+--call-trace::
+ Show call stream for intel_pt traces. The CPUs are interleaved, but
+ can be filtered with -C.
+
+--call-ret-trace::
+ Show call and return stream for intel_pt traces.
+
+--graph-function::
+ For itrace only show specified functions and their callees for
+ itrace. Multiple functions can be separated by comma.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 114fda1..808b664 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -242,6 +242,16 @@
--hierarchy::
Enable hierarchy output.
+--overwrite::
+ Enable this to use just the most recent records, which helps in high core count
+ machines such as Knights Landing/Mill, but right now is disabled by default as
+ the pausing used in this technique is leading to loss of metadata events such
+ as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading
+ to lots of unknown samples appearing on the UI. Enable this if you are in such
+ machines and profiling a workload that doesn't creates short lived threads and/or
+ doesn't uses many executable mmap operations. Work is being planed to solve
+ this situation, till then, this will remain disabled by default.
+
--force::
Don't do ownership validation.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 115db9e..e113450 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -171,6 +171,11 @@
--kernel-syscall-graph::
Show the kernel callchains on the syscall exit path.
+--max-events=N::
+ Stop after processing N events. Note that strace-like events are considered
+ only at exit time or when a syscall is interrupted, i.e. in those cases this
+ option is equivalent to the number of lines printed.
+
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. Note that at this point
@@ -238,6 +243,68 @@
As you can see, there was major pagefault in python process, from
CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
+Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
+
+ $ perf trace -e open* --max-events 4
+ [root@jouet perf]# trace -e open* --max-events 4
+ 2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
+ 2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+ 3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+ 4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
+ $
+
+Trace the first minor page fault when running a workload:
+
+ # perf trace -F min --max-stack=7 --max-events 1 sleep 1
+ 0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
+ __clear_user ([kernel.kallsyms])
+ load_elf_binary ([kernel.kallsyms])
+ search_binary_handler ([kernel.kallsyms])
+ __do_execve_file.isra.33 ([kernel.kallsyms])
+ __x64_sys_execve ([kernel.kallsyms])
+ do_syscall_64 ([kernel.kallsyms])
+ entry_SYSCALL_64 ([kernel.kallsyms])
+ #
+
+Trace the next min page page fault to take place on the first CPU:
+
+ # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
+ 0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
+ js::gc::FreeSpan::initAsEmpty (inlined)
+ js::gc::Arena::setAsNotAllocated (inlined)
+ js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
+ js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
+ js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
+ js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
+ js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
+ js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+ js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
+ JSThinInlineString::new_<(js::AllowGC)1> (inlined)
+ AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
+ js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+ [0x18b26e6bc2bd] (/tmp/perf-17136.map)
+ #
+
+Trace the next two sched:sched_switch events, four block:*_plug events, the
+next block:*_unplug and the next three net:*dev_queue events, this last one
+with a backtrace of at most 16 entries, system wide:
+
+ # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
+ 0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
+ 0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
+ 254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
+ __dev_queue_xmit ([kernel.kallsyms])
+ 273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
+ __dev_queue_xmit ([kernel.kallsyms])
+ 274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
+ __dev_queue_xmit ([kernel.kallsyms])
+ 2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
+ 2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
+ 4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
+ 8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+ 8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+ #
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 2f3bf02..3ccb4f0 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1,4 +1,5 @@
include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
# The default target of this Makefile is...
all:
@@ -385,6 +386,8 @@
SHELL = $(SHELL_PATH)
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
+asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
+arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/
beauty_outdir := $(OUTPUT)trace/beauty/generated
beauty_ioctl_outdir := $(beauty_outdir)/ioctl
@@ -460,6 +463,18 @@
$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
+mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
+mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
+
+$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl)
+ $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+
+mount_flags_array := $(beauty_outdir)/mount_flags_array.c
+mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
+
+$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
+ $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
+
prctl_option_array := $(beauty_outdir)/prctl_option_array.c
prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -577,6 +592,8 @@
$(socket_ipproto_array) \
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
+ $(mmap_flags_array) \
+ $(mount_flags_array) \
$(perf_ioctl_array) \
$(prctl_option_array) \
$(arch_errno_name_array)
@@ -863,6 +880,8 @@
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
$(OUTPUT)pmu-events/pmu-events.c \
$(OUTPUT)$(madvise_behavior_array) \
+ $(OUTPUT)$(mmap_flags_array) \
+ $(OUTPUT)$(mount_flags_array) \
$(OUTPUT)$(drm_ioctl_array) \
$(OUTPUT)$(pkey_alloc_access_rights_array) \
$(OUTPUT)$(sndrv_ctl_ioctl_array) \
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 2dbb8cad..c88fd32 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -23,7 +23,7 @@
{
local sc nr last_sc
- create_table_exe=`mktemp /tmp/create-table-XXXXXX`
+ create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
{
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
index 7fbca17..275dea7f 100644
--- a/tools/perf/arch/sparc/Makefile
+++ b/tools/perf/arch/sparc/Makefile
@@ -1,3 +1,5 @@
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
+
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c
new file mode 100644
index 0000000..2614c01
--- /dev/null
+++ b/tools/perf/arch/sparc/annotate/instructions.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static int is_branch_cond(const char *cond)
+{
+ if (cond[0] == '\0')
+ return 1;
+
+ if (cond[0] == 'a' && cond[1] == '\0')
+ return 1;
+
+ if (cond[0] == 'c' &&
+ (cond[1] == 'c' || cond[1] == 's') &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'e' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 'q' && cond[2] == '\0')))
+ return 1;
+
+ if (cond[0] == 'g' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 't' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+ return 1;
+
+ if (cond[0] == 'l' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 't' && cond[2] == '\0') ||
+ (cond[1] == 'u' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+ return 1;
+
+ if (cond[0] == 'n' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 'e' && cond[2] == '\0') ||
+ (cond[1] == 'z' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
+ return 1;
+
+ if (cond[0] == 'b' &&
+ cond[1] == 'p' &&
+ cond[2] == 'o' &&
+ cond[3] == 's' &&
+ cond[4] == '\0')
+ return 1;
+
+ if (cond[0] == 'v' &&
+ (cond[1] == 'c' || cond[1] == 's') &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'b' &&
+ cond[1] == 'z' &&
+ cond[2] == '\0')
+ return 1;
+
+ return 0;
+}
+
+static int is_branch_reg_cond(const char *cond)
+{
+ if ((cond[0] == 'n' || cond[0] == 'l') &&
+ cond[1] == 'z' &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'z' &&
+ cond[1] == '\0')
+ return 1;
+
+ if ((cond[0] == 'g' || cond[0] == 'l') &&
+ cond[1] == 'e' &&
+ cond[2] == 'z' &&
+ cond[3] == '\0')
+ return 1;
+
+ if (cond[0] == 'g' &&
+ cond[1] == 'z' &&
+ cond[2] == '\0')
+ return 1;
+
+ return 0;
+}
+
+static int is_branch_float_cond(const char *cond)
+{
+ if (cond[0] == '\0')
+ return 1;
+
+ if ((cond[0] == 'a' || cond[0] == 'e' ||
+ cond[0] == 'z' || cond[0] == 'g' ||
+ cond[0] == 'l' || cond[0] == 'n' ||
+ cond[0] == 'o' || cond[0] == 'u') &&
+ cond[1] == '\0')
+ return 1;
+
+ if (((cond[0] == 'g' && cond[1] == 'e') ||
+ (cond[0] == 'l' && (cond[1] == 'e' ||
+ cond[1] == 'g')) ||
+ (cond[0] == 'n' && (cond[1] == 'e' ||
+ cond[1] == 'z')) ||
+ (cond[0] == 'u' && (cond[1] == 'e' ||
+ cond[1] == 'g' ||
+ cond[1] == 'l'))) &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'u' &&
+ (cond[1] == 'g' || cond[1] == 'l') &&
+ cond[2] == 'e' &&
+ cond[3] == '\0')
+ return 1;
+
+ return 0;
+}
+
+static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+ struct ins_ops *ops = NULL;
+
+ if (!strcmp(name, "call") ||
+ !strcmp(name, "jmp") ||
+ !strcmp(name, "jmpl")) {
+ ops = &call_ops;
+ } else if (!strcmp(name, "ret") ||
+ !strcmp(name, "retl") ||
+ !strcmp(name, "return")) {
+ ops = &ret_ops;
+ } else if (!strcmp(name, "mov")) {
+ ops = &mov_ops;
+ } else {
+ if (name[0] == 'c' &&
+ (name[1] == 'w' || name[1] == 'x'))
+ name += 2;
+
+ if (name[0] == 'b') {
+ const char *cond = name + 1;
+
+ if (cond[0] == 'r') {
+ if (is_branch_reg_cond(cond + 1))
+ ops = &jump_ops;
+ } else if (is_branch_cond(cond)) {
+ ops = &jump_ops;
+ }
+ } else if (name[0] == 'f' && name[1] == 'b') {
+ if (is_branch_float_cond(name + 2))
+ ops = &jump_ops;
+ }
+ }
+
+ if (ops)
+ arch__associate_ins_ops(arch, name, ops);
+
+ return ops;
+}
+
+static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+ if (!arch->initialized) {
+ arch->initialized = true;
+ arch->associate_instruction_ops = sparc__associate_instruction_ops;
+ arch->objdump.comment_char = '#';
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0980dfe..10cf889 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -592,6 +592,9 @@ static void record__init_features(struct record *rec)
if (!rec->opts.full_auxtrace)
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+ if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
+ perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@@ -897,6 +900,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__init_features(rec);
+ if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+ session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+
if (forks) {
err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
argv, data->is_pipe,
@@ -1337,6 +1343,19 @@ static const struct clockid_map clockids[] = {
CLOCKID_END,
};
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+ struct timespec res;
+
+ *res_ns = 0;
+ if (!clock_getres(clk_id, &res))
+ *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+ else
+ pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+ return 0;
+}
+
static int parse_clockid(const struct option *opt, const char *str, int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
@@ -1360,7 +1379,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
/* if its a number, we're done */
if (sscanf(str, "%d", &opts->clockid) == 1)
- return 0;
+ return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
/* allow a "CLOCK_" prefix to the name */
if (!strncasecmp(str, "CLOCK_", 6))
@@ -1369,7 +1388,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
for (cm = clockids; cm->name; cm++) {
if (!strcasecmp(str, cm->name)) {
opts->clockid = cm->clockid;
- return 0;
+ return get_clockid_res(opts->clockid,
+ &opts->clockid_res_ns);
}
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4da5e32..b5bc85b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -44,6 +44,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
+#include <subcmd/pager.h>
#include "sane_ctype.h"
@@ -912,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
- int insn, FILE *fp)
+ int insn, FILE *fp, int *total_cycles)
{
int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
dump_insn(x, ip, inbuf, len, NULL),
@@ -921,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
en->flags.in_tx ? " INTX" : "",
en->flags.abort ? " ABORT" : "");
if (en->flags.cycles) {
- printed += fprintf(fp, " %d cycles", en->flags.cycles);
+ *total_cycles += en->flags.cycles;
+ printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles);
if (insn)
printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
}
@@ -978,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
u8 buffer[MAXBB];
unsigned off;
struct symbol *lastsym = NULL;
+ int total_cycles = 0;
if (!(br && br->nr))
return 0;
@@ -998,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
x.cpumode, x.cpu, &lastsym, attr, fp);
printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
- &x, buffer, len, 0, fp);
+ &x, buffer, len, 0, fp, &total_cycles);
}
/* Print all blocks */
@@ -1026,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
- printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+ printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+ &total_cycles);
break;
} else {
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
@@ -1104,6 +1108,35 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample,
return printed;
}
+static const char *resolve_branch_sym(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct thread *thread,
+ struct addr_location *al,
+ u64 *ip)
+{
+ struct addr_location addr_al;
+ struct perf_event_attr *attr = &evsel->attr;
+ const char *name = NULL;
+
+ if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+ if (sample_addr_correlates_sym(attr)) {
+ thread__resolve(thread, &addr_al, sample);
+ if (addr_al.sym)
+ name = addr_al.sym->name;
+ else
+ *ip = sample->addr;
+ } else {
+ *ip = sample->addr;
+ }
+ } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+ if (al->sym)
+ name = al->sym->name;
+ else
+ *ip = sample->ip;
+ }
+ return name;
+}
+
static int perf_sample__fprintf_callindent(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
@@ -1111,7 +1144,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
{
struct perf_event_attr *attr = &evsel->attr;
size_t depth = thread_stack__depth(thread);
- struct addr_location addr_al;
const char *name = NULL;
static int spacing;
int len = 0;
@@ -1125,22 +1157,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
depth += 1;
- if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
- if (sample_addr_correlates_sym(attr)) {
- thread__resolve(thread, &addr_al, sample);
- if (addr_al.sym)
- name = addr_al.sym->name;
- else
- ip = sample->addr;
- } else {
- ip = sample->addr;
- }
- } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
- if (al->sym)
- name = al->sym->name;
- else
- ip = sample->ip;
- }
+ name = resolve_branch_sym(sample, evsel, thread, al, &ip);
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
dlen += fprintf(fp, "(");
@@ -1646,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
}
}
+static bool show_event(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct thread *thread,
+ struct addr_location *al)
+{
+ int depth = thread_stack__depth(thread);
+
+ if (!symbol_conf.graph_function)
+ return true;
+
+ if (thread->filter) {
+ if (depth <= thread->filter_entry_depth) {
+ thread->filter = false;
+ return false;
+ }
+ return true;
+ } else {
+ const char *s = symbol_conf.graph_function;
+ u64 ip;
+ const char *name = resolve_branch_sym(sample, evsel, thread, al,
+ &ip);
+ unsigned nlen;
+
+ if (!name)
+ return false;
+ nlen = strlen(name);
+ while (*s) {
+ unsigned len = strcspn(s, ",");
+ if (nlen == len && !strncmp(name, s, len)) {
+ thread->filter = true;
+ thread->filter_entry_depth = depth;
+ return true;
+ }
+ s += len;
+ if (*s == ',')
+ s++;
+ }
+ return false;
+ }
+}
+
static void process_event(struct perf_script *script,
struct perf_sample *sample, struct perf_evsel *evsel,
struct addr_location *al,
@@ -1660,6 +1718,9 @@ static void process_event(struct perf_script *script,
if (output[type].fields == 0)
return;
+ if (!show_event(sample, evsel, thread, al))
+ return;
+
++es->samples;
perf_sample__fprintf_start(sample, thread, evsel,
@@ -1737,6 +1798,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(METRIC))
perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+
+ if (verbose)
+ fflush(fp);
}
static struct scripting_ops *scripting_ops;
@@ -3100,6 +3164,44 @@ static int perf_script__process_auxtrace_info(struct perf_session *session,
#define perf_script__process_auxtrace_info 0
#endif
+static int parse_insn_trace(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ parse_output_fields(NULL, "+insn,-event,-period", 0);
+ itrace_parse_synth_opts(opt, "i0ns", 0);
+ nanosecs = true;
+ return 0;
+}
+
+static int parse_xed(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ force_pager("xed -F insn: -A -64 | less");
+ return 0;
+}
+
+static int parse_call_trace(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
+ itrace_parse_synth_opts(opt, "cewp", 0);
+ nanosecs = true;
+ return 0;
+}
+
+static int parse_callret_trace(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0);
+ itrace_parse_synth_opts(opt, "crewp", 0);
+ nanosecs = true;
+ return 0;
+}
+
int cmd_script(int argc, const char **argv)
{
bool show_full_info = false;
@@ -3109,7 +3211,10 @@ int cmd_script(int argc, const char **argv)
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
- struct itrace_synth_opts itrace_synth_opts = { .set = false, };
+ struct itrace_synth_opts itrace_synth_opts = {
+ .set = false,
+ .default_no_sample = true,
+ };
char *script_path = NULL;
const char **__argv;
int i, j, err = 0;
@@ -3184,6 +3289,16 @@ int cmd_script(int argc, const char **argv)
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
+ OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
+ "Decode instructions from itrace", parse_insn_trace),
+ OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
+ "Run xed disassembler on output", parse_xed),
+ OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
+ "Decode calls from from itrace", parse_call_trace),
+ OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
+ "Decode calls and returns from itrace", parse_callret_trace),
+ OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
+ "Only print symbols and callees with --call-trace/--call-ret-trace"),
OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
"Stop display of callgraph at these symbols"),
OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@@ -3417,8 +3532,10 @@ int cmd_script(int argc, const char **argv)
exit(-1);
}
- if (!script_name)
+ if (!script_name) {
setup_pager();
+ use_browser = 0;
+ }
session = perf_session__new(&data, false, &script.tool);
if (session == NULL)
@@ -3439,7 +3556,8 @@ int cmd_script(int argc, const char **argv)
script.session = session;
script__setup_sample_type(&script);
- if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+ if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
+ symbol_conf.graph_function)
itrace_synth_opts.thread_stack = true;
session->itrace_synth_opts = &itrace_synth_opts;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b86aba1..d1028d7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -409,6 +409,28 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
return leader;
}
+static bool is_target_alive(struct target *_target,
+ struct thread_map *threads)
+{
+ struct stat st;
+ int i;
+
+ if (!target__has_task(_target))
+ return true;
+
+ for (i = 0; i < threads->nr; i++) {
+ char path[PATH_MAX];
+
+ scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
+ threads->map[i].pid);
+
+ if (!stat(path, &st))
+ return true;
+ }
+
+ return false;
+}
+
static int __run_perf_stat(int argc, const char **argv, int run_idx)
{
int interval = stat_config.interval;
@@ -579,6 +601,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
enable_counters();
while (!done) {
nanosleep(&ts, NULL);
+ if (!is_target_alive(&target, evsel_list->threads))
+ break;
if (timeout)
break;
if (interval) {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index d21d875..b2838de 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1134,11 +1134,6 @@ static int __cmd_top(struct perf_top *top)
if (!target__none(&opts->target))
perf_evlist__enable(top->evlist);
- /* Wait for a minimal set of events before starting the snapshot */
- perf_evlist__poll(top->evlist, 100);
-
- perf_top__mmap_read(top);
-
ret = -1;
if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
display_thread), top)) {
@@ -1156,6 +1151,11 @@ static int __cmd_top(struct perf_top *top)
}
}
+ /* Wait for a minimal set of events before starting the snapshot */
+ perf_evlist__poll(top->evlist, 100);
+
+ perf_top__mmap_read(top);
+
while (!done) {
u64 hits = top->samples;
@@ -1257,7 +1257,14 @@ int cmd_top(int argc, const char **argv)
.uses_mmap = true,
},
.proc_map_timeout = 500,
- .overwrite = 1,
+ /*
+ * FIXME: This will lose PERF_RECORD_MMAP and other metadata
+ * when we pause, fix that and reenable. Probably using a
+ * separate evlist with a dummy event, i.e. a non-overwrite
+ * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
+ * stays in overwrite mode. -acme
+ * */
+ .overwrite = 0,
},
.max_stack = sysctl__max_stack(),
.annotation_opts = annotation__default_options,
@@ -1372,6 +1379,8 @@ int cmd_top(int argc, const char **argv)
"Show raw trace event output (do not use print fmt or plugins)"),
OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
"Show entries in a hierarchy"),
+ OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
+ "Use a backward ring buffer, default: no"),
OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
"number of thread to run event synthesize"),
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 90289f31..dc8a6c4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -89,6 +89,8 @@ struct trace {
u64 base_time;
FILE *output;
unsigned long nr_events;
+ unsigned long nr_events_printed;
+ unsigned long max_events;
struct strlist *ev_qualifier;
struct {
size_t nr;
@@ -612,6 +614,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
struct syscall_arg_fmt {
size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+ unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
void *parm;
const char *name;
bool show_zero;
@@ -723,6 +726,10 @@ static struct syscall_fmt {
.arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
[2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
[3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
+ { .name = "mount",
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+ [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
+ .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
{ .name = "mprotect",
.arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
[2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
@@ -832,7 +839,8 @@ static struct syscall_fmt {
.arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
{ .name = "tkill",
.arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
- { .name = "umount2", .alias = "umount", },
+ { .name = "umount2", .alias = "umount",
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
{ .name = "uname", .alias = "newuname", },
{ .name = "unlinkat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
@@ -856,6 +864,18 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
}
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+{
+ int i, nmemb = ARRAY_SIZE(syscall_fmts);
+
+ for (i = 0; i < nmemb; ++i) {
+ if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
+ return &syscall_fmts[i];
+ }
+
+ return NULL;
+}
+
/*
* is_exit: is this "exit" or "exit_group"?
* is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
@@ -1485,6 +1505,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
return scnprintf(bf, size, "arg%d: ", arg->idx);
}
+/*
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
+ * in tools/perf/trace/beauty/mount_flags.c
+ */
+static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
+{
+ if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
+ return sc->arg_fmt[arg->idx].mask_val(arg, val);
+
+ return val;
+}
+
static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
struct syscall_arg *arg, unsigned long val)
{
@@ -1533,6 +1566,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
continue;
val = syscall_arg__val(&arg, arg.idx);
+ /*
+ * Some syscall args need some mask, most don't and
+ * return val untouched.
+ */
+ val = syscall__mask_val(sc, &arg, val);
/*
* Suppress this argument if its value is zero and
@@ -1664,6 +1702,8 @@ static int trace__printf_interrupted_entry(struct trace *trace)
printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
ttrace->entry_pending = false;
+ ++trace->nr_events_printed;
+
return printed;
}
@@ -1810,12 +1850,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
int max_stack = evsel->attr.sample_max_stack ?
evsel->attr.sample_max_stack :
trace->max_stack;
+ int err;
- if (machine__resolve(trace->host, &al, sample) < 0 ||
- thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+ if (machine__resolve(trace->host, &al, sample) < 0)
return -1;
- return 0;
+ err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
+ addr_location__put(&al);
+ return err;
}
static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
@@ -1940,6 +1982,13 @@ errno_print: {
fputc('\n', trace->output);
+ /*
+ * We only consider an 'event' for the sake of --max-events a non-filtered
+ * sys_enter + sys_exit and other tracepoint events.
+ */
+ if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
+ interrupted = true;
+
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
@@ -2072,14 +2121,25 @@ static void bpf_output__fprintf(struct trace *trace,
{
binary__fprintf(sample->raw_data, sample->raw_size, 8,
bpf_output__printer, NULL, trace->output);
+ ++trace->nr_events_printed;
}
static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
- struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ struct thread *thread;
int callchain_ret = 0;
+ /*
+ * Check if we called perf_evsel__disable(evsel) due to, for instance,
+ * this event's max_events having been hit and this is an entry coming
+ * from the ring buffer that we should discard, since the max events
+ * have already been considered/printed.
+ */
+ if (evsel->disabled)
+ return 0;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
if (sample->callchain) {
callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
@@ -2127,6 +2187,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
event_format__fprintf(evsel->tp_format, sample->cpu,
sample->raw_data, sample->raw_size,
trace->output);
+ ++trace->nr_events_printed;
+
+ if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
+ perf_evsel__disable(evsel);
+ perf_evsel__close(evsel);
+ }
}
}
@@ -2137,8 +2203,8 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
- thread__put(thread);
out:
+ thread__put(thread);
return 0;
}
@@ -2225,6 +2291,8 @@ static int trace__pgfault(struct trace *trace,
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+
+ ++trace->nr_events_printed;
out:
err = 0;
out_put:
@@ -2402,6 +2470,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
tracepoint_handler handler = evsel->handler;
handler(trace, evsel, event, sample);
}
+
+ if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
+ interrupted = true;
}
static int trace__add_syscall_newtp(struct trace *trace)
@@ -2706,7 +2777,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
int timeout = done ? 100 : -1;
if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
- if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+ if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
draining = true;
goto again;
@@ -3138,6 +3209,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
int len = strlen(str) + 1, err = -1, list, idx;
char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
char group_name[PATH_MAX];
+ struct syscall_fmt *fmt;
if (strace_groups_dir == NULL)
return -1;
@@ -3155,12 +3227,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
if (syscalltbl__id(trace->sctbl, s) >= 0 ||
syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
list = 1;
+ goto do_concat;
+ }
+
+ fmt = syscall_fmt__find_by_alias(s);
+ if (fmt != NULL) {
+ list = 1;
+ s = fmt->name;
} else {
path__join(group_name, sizeof(group_name), strace_groups_dir, s);
if (access(group_name, R_OK) == 0)
list = 1;
}
-
+do_concat:
if (lists[list]) {
sprintf(lists[list] + strlen(lists[list]), ",%s", s);
} else {
@@ -3249,6 +3328,7 @@ int cmd_trace(int argc, const char **argv)
.trace_syscalls = false,
.kernel_syscallchains = false,
.max_stack = UINT_MAX,
+ .max_events = ULONG_MAX,
};
const char *output_name = NULL;
const struct option trace_options[] = {
@@ -3301,6 +3381,8 @@ int cmd_trace(int argc, const char **argv)
&record_parse_callchain_opt),
OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
"Show the kernel callchains on the syscall exit path"),
+ OPT_ULONG(0, "max-events", &trace.max_events,
+ "Set the maximum number of events to print, exit after that is reached. "),
OPT_UINTEGER(0, "min-stack", &trace.min_stack,
"Set the minimum stack depth when parsing the callchain, "
"anything below the specified depth will be ignored."),
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index c72cc73..9531f7b 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -5,6 +5,7 @@
include/uapi/drm/drm.h
include/uapi/drm/i915_drm.h
include/uapi/linux/fcntl.h
+include/uapi/linux/fs.h
include/uapi/linux/kcmp.h
include/uapi/linux/kvm.h
include/uapi/linux/in.h
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 21bf7f5..0ed4a34 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -81,6 +81,7 @@ struct record_opts {
unsigned initial_delay;
bool use_clockid;
clockid_t clockid;
+ u64 clockid_res_ns;
unsigned int proc_map_timeout;
};
diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
deleted file mode 100644
index b494a67a..0000000
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ /dev/null
@@ -1,339 +0,0 @@
-#!/usr/bin/python2
-# call-graph-from-sql.py: create call-graph from sql database
-# Copyright (c) 2014-2017, Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
-
-# To use this script you will need to have exported data using either the
-# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those
-# scripts for details.
-#
-# Following on from the example in the export scripts, a
-# call-graph can be displayed for the pt_example database like this:
-#
-# python tools/perf/scripts/python/call-graph-from-sql.py pt_example
-#
-# Note that for PostgreSQL, this script supports connecting to remote databases
-# by setting hostname, port, username, password, and dbname e.g.
-#
-# python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
-#
-# The result is a GUI window with a tree representing a context-sensitive
-# call-graph. Expanding a couple of levels of the tree and adjusting column
-# widths to suit will display something like:
-#
-# Call Graph: pt_example
-# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%)
-# v- ls
-# v- 2638:2638
-# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0
-# |- unknown unknown 1 13198 0.1 1 0.0
-# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3
-# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3
-# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4
-# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1
-# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0
-# >- __libc_csu_init ls 1 10354 0.1 10 0.0
-# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0
-# v- main ls 1 8182043 99.6 180254 99.9
-#
-# Points to note:
-# The top level is a command name (comm)
-# The next level is a thread (pid:tid)
-# Subsequent levels are functions
-# 'Count' is the number of calls
-# 'Time' is the elapsed time until the function returns
-# Percentages are relative to the level above
-# 'Branch Count' is the total number of branches for that function and all
-# functions that it calls
-
-import sys
-from PySide.QtCore import *
-from PySide.QtGui import *
-from PySide.QtSql import *
-from decimal import *
-
-class TreeItem():
-
- def __init__(self, db, row, parent_item):
- self.db = db
- self.row = row
- self.parent_item = parent_item
- self.query_done = False;
- self.child_count = 0
- self.child_items = []
- self.data = ["", "", "", "", "", "", ""]
- self.comm_id = 0
- self.thread_id = 0
- self.call_path_id = 1
- self.branch_count = 0
- self.time = 0
- if not parent_item:
- self.setUpRoot()
-
- def setUpRoot(self):
- self.query_done = True
- query = QSqlQuery(self.db)
- ret = query.exec_('SELECT id, comm FROM comms')
- if not ret:
- raise Exception("Query failed: " + query.lastError().text())
- while query.next():
- if not query.value(0):
- continue
- child_item = TreeItem(self.db, self.child_count, self)
- self.child_items.append(child_item)
- self.child_count += 1
- child_item.setUpLevel1(query.value(0), query.value(1))
-
- def setUpLevel1(self, comm_id, comm):
- self.query_done = True;
- self.comm_id = comm_id
- self.data[0] = comm
- self.child_items = []
- self.child_count = 0
- query = QSqlQuery(self.db)
- ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
- if not ret:
- raise Exception("Query failed: " + query.lastError().text())
- while query.next():
- child_item = TreeItem(self.db, self.child_count, self)
- self.child_items.append(child_item)
- self.child_count += 1
- child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
-
- def setUpLevel2(self, comm_id, thread_id, pid, tid):
- self.comm_id = comm_id
- self.thread_id = thread_id
- self.data[0] = str(pid) + ":" + str(tid)
-
- def getChildItem(self, row):
- return self.child_items[row]
-
- def getParentItem(self):
- return self.parent_item
-
- def getRow(self):
- return self.row
-
- def timePercent(self, b):
- if not self.time:
- return "0.0"
- x = (b * Decimal(100)) / self.time
- return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
- def branchPercent(self, b):
- if not self.branch_count:
- return "0.0"
- x = (b * Decimal(100)) / self.branch_count
- return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
- def addChild(self, call_path_id, name, dso, count, time, branch_count):
- child_item = TreeItem(self.db, self.child_count, self)
- child_item.comm_id = self.comm_id
- child_item.thread_id = self.thread_id
- child_item.call_path_id = call_path_id
- child_item.branch_count = branch_count
- child_item.time = time
- child_item.data[0] = name
- if dso == "[kernel.kallsyms]":
- dso = "[kernel]"
- child_item.data[1] = dso
- child_item.data[2] = str(count)
- child_item.data[3] = str(time)
- child_item.data[4] = self.timePercent(time)
- child_item.data[5] = str(branch_count)
- child_item.data[6] = self.branchPercent(branch_count)
- self.child_items.append(child_item)
- self.child_count += 1
-
- def selectCalls(self):
- self.query_done = True;
- query = QSqlQuery(self.db)
- ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
- '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
- '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
- '( SELECT ip FROM call_paths where id = call_path_id ) '
- 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
- ' ORDER BY call_path_id')
- if not ret:
- raise Exception("Query failed: " + query.lastError().text())
- last_call_path_id = 0
- name = ""
- dso = ""
- count = 0
- branch_count = 0
- total_branch_count = 0
- time = 0
- total_time = 0
- while query.next():
- if query.value(1) == last_call_path_id:
- count += 1
- branch_count += query.value(2)
- time += query.value(4) - query.value(3)
- else:
- if count:
- self.addChild(last_call_path_id, name, dso, count, time, branch_count)
- last_call_path_id = query.value(1)
- name = query.value(5)
- dso = query.value(6)
- count = 1
- total_branch_count += branch_count
- total_time += time
- branch_count = query.value(2)
- time = query.value(4) - query.value(3)
- if count:
- self.addChild(last_call_path_id, name, dso, count, time, branch_count)
- total_branch_count += branch_count
- total_time += time
- # Top level does not have time or branch count, so fix that here
- if total_branch_count > self.branch_count:
- self.branch_count = total_branch_count
- if self.branch_count:
- for child_item in self.child_items:
- child_item.data[6] = self.branchPercent(child_item.branch_count)
- if total_time > self.time:
- self.time = total_time
- if self.time:
- for child_item in self.child_items:
- child_item.data[4] = self.timePercent(child_item.time)
-
- def childCount(self):
- if not self.query_done:
- self.selectCalls()
- return self.child_count
-
- def columnCount(self):
- return 7
-
- def columnHeader(self, column):
- headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
- return headers[column]
-
- def getData(self, column):
- return self.data[column]
-
-class TreeModel(QAbstractItemModel):
-
- def __init__(self, db, parent=None):
- super(TreeModel, self).__init__(parent)
- self.db = db
- self.root = TreeItem(db, 0, None)
-
- def columnCount(self, parent):
- return self.root.columnCount()
-
- def rowCount(self, parent):
- if parent.isValid():
- parent_item = parent.internalPointer()
- else:
- parent_item = self.root
- return parent_item.childCount()
-
- def headerData(self, section, orientation, role):
- if role == Qt.TextAlignmentRole:
- if section > 1:
- return Qt.AlignRight
- if role != Qt.DisplayRole:
- return None
- if orientation != Qt.Horizontal:
- return None
- return self.root.columnHeader(section)
-
- def parent(self, child):
- child_item = child.internalPointer()
- if child_item is self.root:
- return QModelIndex()
- parent_item = child_item.getParentItem()
- return self.createIndex(parent_item.getRow(), 0, parent_item)
-
- def index(self, row, column, parent):
- if parent.isValid():
- parent_item = parent.internalPointer()
- else:
- parent_item = self.root
- child_item = parent_item.getChildItem(row)
- return self.createIndex(row, column, child_item)
-
- def data(self, index, role):
- if role == Qt.TextAlignmentRole:
- if index.column() > 1:
- return Qt.AlignRight
- if role != Qt.DisplayRole:
- return None
- index_item = index.internalPointer()
- return index_item.getData(index.column())
-
-class MainWindow(QMainWindow):
-
- def __init__(self, db, dbname, parent=None):
- super(MainWindow, self).__init__(parent)
-
- self.setObjectName("MainWindow")
- self.setWindowTitle("Call Graph: " + dbname)
- self.move(100, 100)
- self.resize(800, 600)
- style = self.style()
- icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
- self.setWindowIcon(icon);
-
- self.model = TreeModel(db)
-
- self.view = QTreeView()
- self.view.setModel(self.model)
-
- self.setCentralWidget(self.view)
-
-if __name__ == '__main__':
- if (len(sys.argv) < 2):
- print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
- raise Exception("Too few arguments")
-
- dbname = sys.argv[1]
-
- is_sqlite3 = False
- try:
- f = open(dbname)
- if f.read(15) == "SQLite format 3":
- is_sqlite3 = True
- f.close()
- except:
- pass
-
- if is_sqlite3:
- db = QSqlDatabase.addDatabase('QSQLITE')
- else:
- db = QSqlDatabase.addDatabase('QPSQL')
- opts = dbname.split()
- for opt in opts:
- if '=' in opt:
- opt = opt.split('=')
- if opt[0] == 'hostname':
- db.setHostName(opt[1])
- elif opt[0] == 'port':
- db.setPort(int(opt[1]))
- elif opt[0] == 'username':
- db.setUserName(opt[1])
- elif opt[0] == 'password':
- db.setPassword(opt[1])
- elif opt[0] == 'dbname':
- dbname = opt[1]
- else:
- dbname = opt
-
- db.setDatabaseName(dbname)
- if not db.open():
- raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
-
- app = QApplication(sys.argv)
- window = MainWindow(db, dbname)
- window.show()
- err = app.exec_()
- db.close()
- sys.exit(err)
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index e46f51b..0564dd7 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -59,7 +59,7 @@
# pt_example=# \q
#
# An example of using the database is provided by the script
-# call-graph-from-sql.py. Refer to that script for details.
+# exported-sql-viewer.py. Refer to that script for details.
#
# Tables:
#
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index e4bb82c..245caf2 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -40,7 +40,7 @@
# sqlite> .quit
#
# An example of using the database is provided by the script
-# call-graph-from-sql.py. Refer to that script for details.
+# exported-sql-viewer.py. Refer to that script for details.
#
# The database structure is practically the same as created by the script
# export-to-postgresql.py. Refer to that script for details. A notable
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
new file mode 100755
index 0000000..24cb0bd
--- /dev/null
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -0,0 +1,2128 @@
+#!/usr/bin/python2
+# SPDX-License-Identifier: GPL-2.0
+# exported-sql-viewer.py: view data from sql database
+# Copyright (c) 2014-2018, Intel Corporation.
+
+# To use this script you will need to have exported data using either the
+# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those
+# scripts for details.
+#
+# Following on from the example in the export scripts, a
+# call-graph can be displayed for the pt_example database like this:
+#
+# python tools/perf/scripts/python/exported-sql-viewer.py pt_example
+#
+# Note that for PostgreSQL, this script supports connecting to remote databases
+# by setting hostname, port, username, password, and dbname e.g.
+#
+# python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#
+# The result is a GUI window with a tree representing a context-sensitive
+# call-graph. Expanding a couple of levels of the tree and adjusting column
+# widths to suit will display something like:
+#
+# Call Graph: pt_example
+# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%)
+# v- ls
+# v- 2638:2638
+# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0
+# |- unknown unknown 1 13198 0.1 1 0.0
+# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3
+# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3
+# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4
+# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1
+# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0
+# >- __libc_csu_init ls 1 10354 0.1 10 0.0
+# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0
+# v- main ls 1 8182043 99.6 180254 99.9
+#
+# Points to note:
+# The top level is a command name (comm)
+# The next level is a thread (pid:tid)
+# Subsequent levels are functions
+# 'Count' is the number of calls
+# 'Time' is the elapsed time until the function returns
+# Percentages are relative to the level above
+# 'Branch Count' is the total number of branches for that function and all
+# functions that it calls
+
+# There is also a "All branches" report, which displays branches and
+# possibly disassembly. However, presently, the only supported disassembler is
+# Intel XED, and additionally the object code must be present in perf build ID
+# cache. To use Intel XED, libxed.so must be present. To build and install
+# libxed.so:
+# git clone https://github.com/intelxed/mbuild.git mbuild
+# git clone https://github.com/intelxed/xed
+# cd xed
+# ./mfile.py --share
+# sudo ./mfile.py --prefix=/usr/local install
+# sudo ldconfig
+#
+# Example report:
+#
+# Time CPU Command PID TID Branch Type In Tx Branch
+# 8107675239590 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+# 7fab593ea260 48 89 e7 mov %rsp, %rdi
+# 8107675239899 2 ls 22011 22011 hardware interrupt No 7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675241900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+# 7fab593ea260 48 89 e7 mov %rsp, %rdi
+# 7fab593ea263 e8 c8 06 00 00 callq 0x7fab593ea930
+# 8107675241900 2 ls 22011 22011 call No 7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so)
+# 7fab593ea930 55 pushq %rbp
+# 7fab593ea931 48 89 e5 mov %rsp, %rbp
+# 7fab593ea934 41 57 pushq %r15
+# 7fab593ea936 41 56 pushq %r14
+# 7fab593ea938 41 55 pushq %r13
+# 7fab593ea93a 41 54 pushq %r12
+# 7fab593ea93c 53 pushq %rbx
+# 7fab593ea93d 48 89 fb mov %rdi, %rbx
+# 7fab593ea940 48 83 ec 68 sub $0x68, %rsp
+# 7fab593ea944 0f 31 rdtsc
+# 7fab593ea946 48 c1 e2 20 shl $0x20, %rdx
+# 7fab593ea94a 89 c0 mov %eax, %eax
+# 7fab593ea94c 48 09 c2 or %rax, %rdx
+# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax
+# 8107675242232 2 ls 22011 22011 hardware interrupt No 7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675242900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so)
+# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax
+# 7fab593ea956 48 89 15 3b 13 22 00 movq %rdx, 0x22133b(%rip)
+# 8107675243232 2 ls 22011 22011 hardware interrupt No 7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+
+import sys
+import weakref
+import threading
+import string
+import cPickle
+import re
+import os
+from PySide.QtCore import *
+from PySide.QtGui import *
+from PySide.QtSql import *
+from decimal import *
+from ctypes import *
+from multiprocessing import Process, Array, Value, Event
+
+# Data formatting helpers
+
+def tohex(ip):
+ if ip < 0:
+ ip += 1 << 64
+ return "%x" % ip
+
+def offstr(offset):
+ if offset:
+ return "+0x%x" % offset
+ return ""
+
+def dsoname(name):
+ if name == "[kernel.kallsyms]":
+ return "[kernel]"
+ return name
+
+# Percent to one decimal place
+
+def PercentToOneDP(n, d):
+ if not d:
+ return "0.0"
+ x = (n * Decimal(100)) / d
+ return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP))
+
+# Helper for queries that must not fail
+
+def QueryExec(query, stmt):
+ ret = query.exec_(stmt)
+ if not ret:
+ raise Exception("Query failed: " + query.lastError().text())
+
+# Background thread
+
+class Thread(QThread):
+
+ done = Signal(object)
+
+ def __init__(self, task, param=None, parent=None):
+ super(Thread, self).__init__(parent)
+ self.task = task
+ self.param = param
+
+ def run(self):
+ while True:
+ if self.param is None:
+ done, result = self.task()
+ else:
+ done, result = self.task(self.param)
+ self.done.emit(result)
+ if done:
+ break
+
+# Tree data model
+
+class TreeModel(QAbstractItemModel):
+
+ def __init__(self, root, parent=None):
+ super(TreeModel, self).__init__(parent)
+ self.root = root
+ self.last_row_read = 0
+
+ def Item(self, parent):
+ if parent.isValid():
+ return parent.internalPointer()
+ else:
+ return self.root
+
+ def rowCount(self, parent):
+ result = self.Item(parent).childCount()
+ if result < 0:
+ result = 0
+ self.dataChanged.emit(parent, parent)
+ return result
+
+ def hasChildren(self, parent):
+ return self.Item(parent).hasChildren()
+
+ def headerData(self, section, orientation, role):
+ if role == Qt.TextAlignmentRole:
+ return self.columnAlignment(section)
+ if role != Qt.DisplayRole:
+ return None
+ if orientation != Qt.Horizontal:
+ return None
+ return self.columnHeader(section)
+
+ def parent(self, child):
+ child_item = child.internalPointer()
+ if child_item is self.root:
+ return QModelIndex()
+ parent_item = child_item.getParentItem()
+ return self.createIndex(parent_item.getRow(), 0, parent_item)
+
+ def index(self, row, column, parent):
+ child_item = self.Item(parent).getChildItem(row)
+ return self.createIndex(row, column, child_item)
+
+ def DisplayData(self, item, index):
+ return item.getData(index.column())
+
+ def FetchIfNeeded(self, row):
+ if row > self.last_row_read:
+ self.last_row_read = row
+ if row + 10 >= self.root.child_count:
+ self.fetcher.Fetch(glb_chunk_sz)
+
+ def columnAlignment(self, column):
+ return Qt.AlignLeft
+
+ def columnFont(self, column):
+ return None
+
+ def data(self, index, role):
+ if role == Qt.TextAlignmentRole:
+ return self.columnAlignment(index.column())
+ if role == Qt.FontRole:
+ return self.columnFont(index.column())
+ if role != Qt.DisplayRole:
+ return None
+ item = index.internalPointer()
+ return self.DisplayData(item, index)
+
+# Table data model
+
+class TableModel(QAbstractTableModel):
+
+ def __init__(self, parent=None):
+ super(TableModel, self).__init__(parent)
+ self.child_count = 0
+ self.child_items = []
+ self.last_row_read = 0
+
+ def Item(self, parent):
+ if parent.isValid():
+ return parent.internalPointer()
+ else:
+ return self
+
+ def rowCount(self, parent):
+ return self.child_count
+
+ def headerData(self, section, orientation, role):
+ if role == Qt.TextAlignmentRole:
+ return self.columnAlignment(section)
+ if role != Qt.DisplayRole:
+ return None
+ if orientation != Qt.Horizontal:
+ return None
+ return self.columnHeader(section)
+
+ def index(self, row, column, parent):
+ return self.createIndex(row, column, self.child_items[row])
+
+ def DisplayData(self, item, index):
+ return item.getData(index.column())
+
+ def FetchIfNeeded(self, row):
+ if row > self.last_row_read:
+ self.last_row_read = row
+ if row + 10 >= self.child_count:
+ self.fetcher.Fetch(glb_chunk_sz)
+
+ def columnAlignment(self, column):
+ return Qt.AlignLeft
+
+ def columnFont(self, column):
+ return None
+
+ def data(self, index, role):
+ if role == Qt.TextAlignmentRole:
+ return self.columnAlignment(index.column())
+ if role == Qt.FontRole:
+ return self.columnFont(index.column())
+ if role != Qt.DisplayRole:
+ return None
+ item = index.internalPointer()
+ return self.DisplayData(item, index)
+
+# Model cache
+
+model_cache = weakref.WeakValueDictionary()
+model_cache_lock = threading.Lock()
+
+def LookupCreateModel(model_name, create_fn):
+ model_cache_lock.acquire()
+ try:
+ model = model_cache[model_name]
+ except:
+ model = None
+ if model is None:
+ model = create_fn()
+ model_cache[model_name] = model
+ model_cache_lock.release()
+ return model
+
+# Find bar
+
+class FindBar():
+
+ def __init__(self, parent, finder, is_reg_expr=False):
+ self.finder = finder
+ self.context = []
+ self.last_value = None
+ self.last_pattern = None
+
+ label = QLabel("Find:")
+ label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+ self.textbox = QComboBox()
+ self.textbox.setEditable(True)
+ self.textbox.currentIndexChanged.connect(self.ValueChanged)
+
+ self.progress = QProgressBar()
+ self.progress.setRange(0, 0)
+ self.progress.hide()
+
+ if is_reg_expr:
+ self.pattern = QCheckBox("Regular Expression")
+ else:
+ self.pattern = QCheckBox("Pattern")
+ self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+ self.next_button = QToolButton()
+ self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown))
+ self.next_button.released.connect(lambda: self.NextPrev(1))
+
+ self.prev_button = QToolButton()
+ self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp))
+ self.prev_button.released.connect(lambda: self.NextPrev(-1))
+
+ self.close_button = QToolButton()
+ self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+ self.close_button.released.connect(self.Deactivate)
+
+ self.hbox = QHBoxLayout()
+ self.hbox.setContentsMargins(0, 0, 0, 0)
+
+ self.hbox.addWidget(label)
+ self.hbox.addWidget(self.textbox)
+ self.hbox.addWidget(self.progress)
+ self.hbox.addWidget(self.pattern)
+ self.hbox.addWidget(self.next_button)
+ self.hbox.addWidget(self.prev_button)
+ self.hbox.addWidget(self.close_button)
+
+ self.bar = QWidget()
+ self.bar.setLayout(self.hbox);
+ self.bar.hide()
+
+ def Widget(self):
+ return self.bar
+
+ def Activate(self):
+ self.bar.show()
+ self.textbox.setFocus()
+
+ def Deactivate(self):
+ self.bar.hide()
+
+ def Busy(self):
+ self.textbox.setEnabled(False)
+ self.pattern.hide()
+ self.next_button.hide()
+ self.prev_button.hide()
+ self.progress.show()
+
+ def Idle(self):
+ self.textbox.setEnabled(True)
+ self.progress.hide()
+ self.pattern.show()
+ self.next_button.show()
+ self.prev_button.show()
+
+ def Find(self, direction):
+ value = self.textbox.currentText()
+ pattern = self.pattern.isChecked()
+ self.last_value = value
+ self.last_pattern = pattern
+ self.finder.Find(value, direction, pattern, self.context)
+
+ def ValueChanged(self):
+ value = self.textbox.currentText()
+ pattern = self.pattern.isChecked()
+ index = self.textbox.currentIndex()
+ data = self.textbox.itemData(index)
+ # Store the pattern in the combo box to keep it with the text value
+ if data == None:
+ self.textbox.setItemData(index, pattern)
+ else:
+ self.pattern.setChecked(data)
+ self.Find(0)
+
+ def NextPrev(self, direction):
+ value = self.textbox.currentText()
+ pattern = self.pattern.isChecked()
+ if value != self.last_value:
+ index = self.textbox.findText(value)
+ # Allow for a button press before the value has been added to the combo box
+ if index < 0:
+ index = self.textbox.count()
+ self.textbox.addItem(value, pattern)
+ self.textbox.setCurrentIndex(index)
+ return
+ else:
+ self.textbox.setItemData(index, pattern)
+ elif pattern != self.last_pattern:
+ # Keep the pattern recorded in the combo box up to date
+ index = self.textbox.currentIndex()
+ self.textbox.setItemData(index, pattern)
+ self.Find(direction)
+
+ def NotFound(self):
+ QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found")
+
+# Context-sensitive call graph data model item base
+
+class CallGraphLevelItemBase(object):
+
+ def __init__(self, glb, row, parent_item):
+ self.glb = glb
+ self.row = row
+ self.parent_item = parent_item
+ self.query_done = False;
+ self.child_count = 0
+ self.child_items = []
+
+ def getChildItem(self, row):
+ return self.child_items[row]
+
+ def getParentItem(self):
+ return self.parent_item
+
+ def getRow(self):
+ return self.row
+
+ def childCount(self):
+ if not self.query_done:
+ self.Select()
+ if not self.child_count:
+ return -1
+ return self.child_count
+
+ def hasChildren(self):
+ if not self.query_done:
+ return True
+ return self.child_count > 0
+
+ def getData(self, column):
+ return self.data[column]
+
+# Context-sensitive call graph data model level 2+ item base
+
+class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
+
+ def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item):
+ super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item)
+ self.comm_id = comm_id
+ self.thread_id = thread_id
+ self.call_path_id = call_path_id
+ self.branch_count = branch_count
+ self.time = time
+
+ def Select(self):
+ self.query_done = True;
+ query = QSqlQuery(self.glb.db)
+ QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)"
+ " FROM calls"
+ " INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+ " INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+ " INNER JOIN dsos ON symbols.dso_id = dsos.id"
+ " WHERE parent_call_path_id = " + str(self.call_path_id) +
+ " AND comm_id = " + str(self.comm_id) +
+ " AND thread_id = " + str(self.thread_id) +
+ " GROUP BY call_path_id, name, short_name"
+ " ORDER BY call_path_id")
+ while query.next():
+ child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self)
+ self.child_items.append(child_item)
+ self.child_count += 1
+
+# Context-sensitive call graph data model level three item
+
+class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase):
+
+ def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item):
+ super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item)
+ dso = dsoname(dso)
+ self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+ self.dbid = call_path_id
+
+# Context-sensitive call graph data model level two item
+
+class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase):
+
+ def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item):
+ super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item)
+ self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
+ self.dbid = thread_id
+
+ def Select(self):
+ super(CallGraphLevelTwoItem, self).Select()
+ for child_item in self.child_items:
+ self.time += child_item.time
+ self.branch_count += child_item.branch_count
+ for child_item in self.child_items:
+ child_item.data[4] = PercentToOneDP(child_item.time, self.time)
+ child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
+
+# Context-sensitive call graph data model level one item
+
+class CallGraphLevelOneItem(CallGraphLevelItemBase):
+
+ def __init__(self, glb, row, comm_id, comm, parent_item):
+ super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item)
+ self.data = [comm, "", "", "", "", "", ""]
+ self.dbid = comm_id
+
+ def Select(self):
+ self.query_done = True;
+ query = QSqlQuery(self.glb.db)
+ QueryExec(query, "SELECT thread_id, pid, tid"
+ " FROM comm_threads"
+ " INNER JOIN threads ON thread_id = threads.id"
+ " WHERE comm_id = " + str(self.dbid))
+ while query.next():
+ child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
+ self.child_items.append(child_item)
+ self.child_count += 1
+
+# Context-sensitive call graph data model root item
+
+class CallGraphRootItem(CallGraphLevelItemBase):
+
+ def __init__(self, glb):
+ super(CallGraphRootItem, self).__init__(glb, 0, None)
+ self.dbid = 0
+ self.query_done = True;
+ query = QSqlQuery(glb.db)
+ QueryExec(query, "SELECT id, comm FROM comms")
+ while query.next():
+ if not query.value(0):
+ continue
+ child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self)
+ self.child_items.append(child_item)
+ self.child_count += 1
+
+# Context-sensitive call graph data model
+
+class CallGraphModel(TreeModel):
+
+ def __init__(self, glb, parent=None):
+ super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent)
+ self.glb = glb
+
+ def columnCount(self, parent=None):
+ return 7
+
+ def columnHeader(self, column):
+ headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+ return headers[column]
+
+ def columnAlignment(self, column):
+ alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+ return alignment[column]
+
+ def FindSelect(self, value, pattern, query):
+ if pattern:
+ # postgresql and sqlite pattern patching differences:
+ # postgresql LIKE is case sensitive but sqlite LIKE is not
+ # postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not
+ # postgresql supports ILIKE which is case insensitive
+ # sqlite supports GLOB (text only) which uses * and ? and is case sensitive
+ if not self.glb.dbref.is_sqlite3:
+ # Escape % and _
+ s = value.replace("%", "\%")
+ s = s.replace("_", "\_")
+ # Translate * and ? into SQL LIKE pattern characters % and _
+ trans = string.maketrans("*?", "%_")
+ match = " LIKE '" + str(s).translate(trans) + "'"
+ else:
+ match = " GLOB '" + str(value) + "'"
+ else:
+ match = " = '" + str(value) + "'"
+ QueryExec(query, "SELECT call_path_id, comm_id, thread_id"
+ " FROM calls"
+ " INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+ " INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+ " WHERE symbols.name" + match +
+ " GROUP BY comm_id, thread_id, call_path_id"
+ " ORDER BY comm_id, thread_id, call_path_id")
+
+ def FindPath(self, query):
+ # Turn the query result into a list of ids that the tree view can walk
+ # to open the tree at the right place.
+ ids = []
+ parent_id = query.value(0)
+ while parent_id:
+ ids.insert(0, parent_id)
+ q2 = QSqlQuery(self.glb.db)
+ QueryExec(q2, "SELECT parent_id"
+ " FROM call_paths"
+ " WHERE id = " + str(parent_id))
+ if not q2.next():
+ break
+ parent_id = q2.value(0)
+ # The call path root is not used
+ if ids[0] == 1:
+ del ids[0]
+ ids.insert(0, query.value(2))
+ ids.insert(0, query.value(1))
+ return ids
+
+ def Found(self, query, found):
+ if found:
+ return self.FindPath(query)
+ return []
+
+ def FindValue(self, value, pattern, query, last_value, last_pattern):
+ if last_value == value and pattern == last_pattern:
+ found = query.first()
+ else:
+ self.FindSelect(value, pattern, query)
+ found = query.next()
+ return self.Found(query, found)
+
+ def FindNext(self, query):
+ found = query.next()
+ if not found:
+ found = query.first()
+ return self.Found(query, found)
+
+ def FindPrev(self, query):
+ found = query.previous()
+ if not found:
+ found = query.last()
+ return self.Found(query, found)
+
+ def FindThread(self, c):
+ if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern:
+ ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern)
+ elif c.direction > 0:
+ ids = self.FindNext(c.query)
+ else:
+ ids = self.FindPrev(c.query)
+ return (True, ids)
+
+ def Find(self, value, direction, pattern, context, callback):
+ class Context():
+ def __init__(self, *x):
+ self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x
+ def Update(self, *x):
+ self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern)
+ if len(context):
+ context[0].Update(value, direction, pattern)
+ else:
+ context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None))
+ # Use a thread so the UI is not blocked during the SELECT
+ thread = Thread(self.FindThread, context[0])
+ thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection)
+ thread.start()
+
+ def FindDone(self, thread, callback, ids):
+ callback(ids)
+
+# Vertical widget layout
+
+class VBox():
+
+ def __init__(self, w1, w2, w3=None):
+ self.vbox = QWidget()
+ self.vbox.setLayout(QVBoxLayout());
+
+ self.vbox.layout().setContentsMargins(0, 0, 0, 0)
+
+ self.vbox.layout().addWidget(w1)
+ self.vbox.layout().addWidget(w2)
+ if w3:
+ self.vbox.layout().addWidget(w3)
+
+ def Widget(self):
+ return self.vbox
+
+# Context-sensitive call graph window
+
+class CallGraphWindow(QMdiSubWindow):
+
+ def __init__(self, glb, parent=None):
+ super(CallGraphWindow, self).__init__(parent)
+
+ self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x))
+
+ self.view = QTreeView()
+ self.view.setModel(self.model)
+
+ for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
+ self.view.setColumnWidth(c, w)
+
+ self.find_bar = FindBar(self, self)
+
+ self.vbox = VBox(self.view, self.find_bar.Widget())
+
+ self.setWidget(self.vbox.Widget())
+
+ AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph")
+
+ def DisplayFound(self, ids):
+ if not len(ids):
+ return False
+ parent = QModelIndex()
+ for dbid in ids:
+ found = False
+ n = self.model.rowCount(parent)
+ for row in xrange(n):
+ child = self.model.index(row, 0, parent)
+ if child.internalPointer().dbid == dbid:
+ found = True
+ self.view.setCurrentIndex(child)
+ parent = child
+ break
+ if not found:
+ break
+ return found
+
+ def Find(self, value, direction, pattern, context):
+ self.view.setFocus()
+ self.find_bar.Busy()
+ self.model.Find(value, direction, pattern, context, self.FindDone)
+
+ def FindDone(self, ids):
+ found = True
+ if not self.DisplayFound(ids):
+ found = False
+ self.find_bar.Idle()
+ if not found:
+ self.find_bar.NotFound()
+
+# Child data item finder
+
+class ChildDataItemFinder():
+
+ def __init__(self, root):
+ self.root = root
+ self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5
+ self.rows = []
+ self.pos = 0
+
+ def FindSelect(self):
+ self.rows = []
+ if self.pattern:
+ pattern = re.compile(self.value)
+ for child in self.root.child_items:
+ for column_data in child.data:
+ if re.search(pattern, str(column_data)) is not None:
+ self.rows.append(child.row)
+ break
+ else:
+ for child in self.root.child_items:
+ for column_data in child.data:
+ if self.value in str(column_data):
+ self.rows.append(child.row)
+ break
+
+ def FindValue(self):
+ self.pos = 0
+ if self.last_value != self.value or self.pattern != self.last_pattern:
+ self.FindSelect()
+ if not len(self.rows):
+ return -1
+ return self.rows[self.pos]
+
+ def FindThread(self):
+ if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern:
+ row = self.FindValue()
+ elif len(self.rows):
+ if self.direction > 0:
+ self.pos += 1
+ if self.pos >= len(self.rows):
+ self.pos = 0
+ else:
+ self.pos -= 1
+ if self.pos < 0:
+ self.pos = len(self.rows) - 1
+ row = self.rows[self.pos]
+ else:
+ row = -1
+ return (True, row)
+
+ def Find(self, value, direction, pattern, context, callback):
+ self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern)
+ # Use a thread so the UI is not blocked
+ thread = Thread(self.FindThread)
+ thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection)
+ thread.start()
+
+ def FindDone(self, thread, callback, row):
+ callback(row)
+
+# Number of database records to fetch in one go
+
+glb_chunk_sz = 10000
+
+# size of pickled integer big enough for record size
+
+glb_nsz = 8
+
+# Background process for SQL data fetcher
+
+class SQLFetcherProcess():
+
+ def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep):
+ # Need a unique connection name
+ conn_name = "SQLFetcher" + str(os.getpid())
+ self.db, dbname = dbref.Open(conn_name)
+ self.sql = sql
+ self.buffer = buffer
+ self.head = head
+ self.tail = tail
+ self.fetch_count = fetch_count
+ self.fetching_done = fetching_done
+ self.process_target = process_target
+ self.wait_event = wait_event
+ self.fetched_event = fetched_event
+ self.prep = prep
+ self.query = QSqlQuery(self.db)
+ self.query_limit = 0 if "$$last_id$$" in sql else 2
+ self.last_id = -1
+ self.fetched = 0
+ self.more = True
+ self.local_head = self.head.value
+ self.local_tail = self.tail.value
+
+ def Select(self):
+ if self.query_limit:
+ if self.query_limit == 1:
+ return
+ self.query_limit -= 1
+ stmt = self.sql.replace("$$last_id$$", str(self.last_id))
+ QueryExec(self.query, stmt)
+
+ def Next(self):
+ if not self.query.next():
+ self.Select()
+ if not self.query.next():
+ return None
+ self.last_id = self.query.value(0)
+ return self.prep(self.query)
+
+ def WaitForTarget(self):
+ while True:
+ self.wait_event.clear()
+ target = self.process_target.value
+ if target > self.fetched or target < 0:
+ break
+ self.wait_event.wait()
+ return target
+
+ def HasSpace(self, sz):
+ if self.local_tail <= self.local_head:
+ space = len(self.buffer) - self.local_head
+ if space > sz:
+ return True
+ if space >= glb_nsz:
+ # Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer
+ nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL)
+ self.buffer[self.local_head : self.local_head + len(nd)] = nd
+ self.local_head = 0
+ if self.local_tail - self.local_head > sz:
+ return True
+ return False
+
+ def WaitForSpace(self, sz):
+ if self.HasSpace(sz):
+ return
+ while True:
+ self.wait_event.clear()
+ self.local_tail = self.tail.value
+ if self.HasSpace(sz):
+ return
+ self.wait_event.wait()
+
+ def AddToBuffer(self, obj):
+ d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL)
+ n = len(d)
+ nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL)
+ sz = n + glb_nsz
+ self.WaitForSpace(sz)
+ pos = self.local_head
+ self.buffer[pos : pos + len(nd)] = nd
+ self.buffer[pos + glb_nsz : pos + sz] = d
+ self.local_head += sz
+
+ def FetchBatch(self, batch_size):
+ fetched = 0
+ while batch_size > fetched:
+ obj = self.Next()
+ if obj is None:
+ self.more = False
+ break
+ self.AddToBuffer(obj)
+ fetched += 1
+ if fetched:
+ self.fetched += fetched
+ with self.fetch_count.get_lock():
+ self.fetch_count.value += fetched
+ self.head.value = self.local_head
+ self.fetched_event.set()
+
+ def Run(self):
+ while self.more:
+ target = self.WaitForTarget()
+ if target < 0:
+ break
+ batch_size = min(glb_chunk_sz, target - self.fetched)
+ self.FetchBatch(batch_size)
+ self.fetching_done.value = True
+ self.fetched_event.set()
+
+def SQLFetcherFn(*x):
+ process = SQLFetcherProcess(*x)
+ process.Run()
+
+# SQL data fetcher
+
+class SQLFetcher(QObject):
+
+ done = Signal(object)
+
+ def __init__(self, glb, sql, prep, process_data, parent=None):
+ super(SQLFetcher, self).__init__(parent)
+ self.process_data = process_data
+ self.more = True
+ self.target = 0
+ self.last_target = 0
+ self.fetched = 0
+ self.buffer_size = 16 * 1024 * 1024
+ self.buffer = Array(c_char, self.buffer_size, lock=False)
+ self.head = Value(c_longlong)
+ self.tail = Value(c_longlong)
+ self.local_tail = 0
+ self.fetch_count = Value(c_longlong)
+ self.fetching_done = Value(c_bool)
+ self.last_count = 0
+ self.process_target = Value(c_longlong)
+ self.wait_event = Event()
+ self.fetched_event = Event()
+ glb.AddInstanceToShutdownOnExit(self)
+ self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep))
+ self.process.start()
+ self.thread = Thread(self.Thread)
+ self.thread.done.connect(self.ProcessData, Qt.QueuedConnection)
+ self.thread.start()
+
+ def Shutdown(self):
+ # Tell the thread and process to exit
+ self.process_target.value = -1
+ self.wait_event.set()
+ self.more = False
+ self.fetching_done.value = True
+ self.fetched_event.set()
+
+ def Thread(self):
+ if not self.more:
+ return True, 0
+ while True:
+ self.fetched_event.clear()
+ fetch_count = self.fetch_count.value
+ if fetch_count != self.last_count:
+ break
+ if self.fetching_done.value:
+ self.more = False
+ return True, 0
+ self.fetched_event.wait()
+ count = fetch_count - self.last_count
+ self.last_count = fetch_count
+ self.fetched += count
+ return False, count
+
+ def Fetch(self, nr):
+ if not self.more:
+ # -1 inidcates there are no more
+ return -1
+ result = self.fetched
+ extra = result + nr - self.target
+ if extra > 0:
+ self.target += extra
+ # process_target < 0 indicates shutting down
+ if self.process_target.value >= 0:
+ self.process_target.value = self.target
+ self.wait_event.set()
+ return result
+
+ def RemoveFromBuffer(self):
+ pos = self.local_tail
+ if len(self.buffer) - pos < glb_nsz:
+ pos = 0
+ n = cPickle.loads(self.buffer[pos : pos + glb_nsz])
+ if n == 0:
+ pos = 0
+ n = cPickle.loads(self.buffer[0 : glb_nsz])
+ pos += glb_nsz
+ obj = cPickle.loads(self.buffer[pos : pos + n])
+ self.local_tail = pos + n
+ return obj
+
+ def ProcessData(self, count):
+ for i in xrange(count):
+ obj = self.RemoveFromBuffer()
+ self.process_data(obj)
+ self.tail.value = self.local_tail
+ self.wait_event.set()
+ self.done.emit(count)
+
+# Fetch more records bar
+
+class FetchMoreRecordsBar():
+
+ def __init__(self, model, parent):
+ self.model = model
+
+ self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:")
+ self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+ self.fetch_count = QSpinBox()
+ self.fetch_count.setRange(1, 1000000)
+ self.fetch_count.setValue(10)
+ self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+ self.fetch = QPushButton("Go!")
+ self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+ self.fetch.released.connect(self.FetchMoreRecords)
+
+ self.progress = QProgressBar()
+ self.progress.setRange(0, 100)
+ self.progress.hide()
+
+ self.done_label = QLabel("All records fetched")
+ self.done_label.hide()
+
+ self.spacer = QLabel("")
+
+ self.close_button = QToolButton()
+ self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+ self.close_button.released.connect(self.Deactivate)
+
+ self.hbox = QHBoxLayout()
+ self.hbox.setContentsMargins(0, 0, 0, 0)
+
+ self.hbox.addWidget(self.label)
+ self.hbox.addWidget(self.fetch_count)
+ self.hbox.addWidget(self.fetch)
+ self.hbox.addWidget(self.spacer)
+ self.hbox.addWidget(self.progress)
+ self.hbox.addWidget(self.done_label)
+ self.hbox.addWidget(self.close_button)
+
+ self.bar = QWidget()
+ self.bar.setLayout(self.hbox);
+ self.bar.show()
+
+ self.in_progress = False
+ self.model.progress.connect(self.Progress)
+
+ self.done = False
+
+ if not model.HasMoreRecords():
+ self.Done()
+
+ def Widget(self):
+ return self.bar
+
+ def Activate(self):
+ self.bar.show()
+ self.fetch.setFocus()
+
+ def Deactivate(self):
+ self.bar.hide()
+
+ def Enable(self, enable):
+ self.fetch.setEnabled(enable)
+ self.fetch_count.setEnabled(enable)
+
+ def Busy(self):
+ self.Enable(False)
+ self.fetch.hide()
+ self.spacer.hide()
+ self.progress.show()
+
+ def Idle(self):
+ self.in_progress = False
+ self.Enable(True)
+ self.progress.hide()
+ self.fetch.show()
+ self.spacer.show()
+
+ def Target(self):
+ return self.fetch_count.value() * glb_chunk_sz
+
+ def Done(self):
+ self.done = True
+ self.Idle()
+ self.label.hide()
+ self.fetch_count.hide()
+ self.fetch.hide()
+ self.spacer.hide()
+ self.done_label.show()
+
+ def Progress(self, count):
+ if self.in_progress:
+ if count:
+ percent = ((count - self.start) * 100) / self.Target()
+ if percent >= 100:
+ self.Idle()
+ else:
+ self.progress.setValue(percent)
+ if not count:
+ # Count value of zero means no more records
+ self.Done()
+
+ def FetchMoreRecords(self):
+ if self.done:
+ return
+ self.progress.setValue(0)
+ self.Busy()
+ self.in_progress = True
+ self.start = self.model.FetchMoreRecords(self.Target())
+
+# Brance data model level two item
+
+class BranchLevelTwoItem():
+
+ def __init__(self, row, text, parent_item):
+ self.row = row
+ self.parent_item = parent_item
+ self.data = [""] * 8
+ self.data[7] = text
+ self.level = 2
+
+ def getParentItem(self):
+ return self.parent_item
+
+ def getRow(self):
+ return self.row
+
+ def childCount(self):
+ return 0
+
+ def hasChildren(self):
+ return False
+
+ def getData(self, column):
+ return self.data[column]
+
+# Brance data model level one item
+
+class BranchLevelOneItem():
+
+ def __init__(self, glb, row, data, parent_item):
+ self.glb = glb
+ self.row = row
+ self.parent_item = parent_item
+ self.child_count = 0
+ self.child_items = []
+ self.data = data[1:]
+ self.dbid = data[0]
+ self.level = 1
+ self.query_done = False
+
+ def getChildItem(self, row):
+ return self.child_items[row]
+
+ def getParentItem(self):
+ return self.parent_item
+
+ def getRow(self):
+ return self.row
+
+ def Select(self):
+ self.query_done = True
+
+ if not self.glb.have_disassembler:
+ return
+
+ query = QSqlQuery(self.glb.db)
+
+ QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip"
+ " FROM samples"
+ " INNER JOIN dsos ON samples.to_dso_id = dsos.id"
+ " INNER JOIN symbols ON samples.to_symbol_id = symbols.id"
+ " WHERE samples.id = " + str(self.dbid))
+ if not query.next():
+ return
+ cpu = query.value(0)
+ dso = query.value(1)
+ sym = query.value(2)
+ if dso == 0 or sym == 0:
+ return
+ off = query.value(3)
+ short_name = query.value(4)
+ long_name = query.value(5)
+ build_id = query.value(6)
+ sym_start = query.value(7)
+ ip = query.value(8)
+
+ QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start"
+ " FROM samples"
+ " INNER JOIN symbols ON samples.symbol_id = symbols.id"
+ " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) +
+ " ORDER BY samples.id"
+ " LIMIT 1")
+ if not query.next():
+ return
+ if query.value(0) != dso:
+ # Cannot disassemble from one dso to another
+ return
+ bsym = query.value(1)
+ boff = query.value(2)
+ bsym_start = query.value(3)
+ if bsym == 0:
+ return
+ tot = bsym_start + boff + 1 - sym_start - off
+ if tot <= 0 or tot > 16384:
+ return
+
+ inst = self.glb.disassembler.Instruction()
+ f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id)
+ if not f:
+ return
+ mode = 0 if Is64Bit(f) else 1
+ self.glb.disassembler.SetMode(inst, mode)
+
+ buf_sz = tot + 16
+ buf = create_string_buffer(tot + 16)
+ f.seek(sym_start + off)
+ buf.value = f.read(buf_sz)
+ buf_ptr = addressof(buf)
+ i = 0
+ while tot > 0:
+ cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip)
+ if cnt:
+ byte_str = tohex(ip).rjust(16)
+ for k in xrange(cnt):
+ byte_str += " %02x" % ord(buf[i])
+ i += 1
+ while k < 15:
+ byte_str += " "
+ k += 1
+ self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self))
+ self.child_count += 1
+ else:
+ return
+ buf_ptr += cnt
+ tot -= cnt
+ buf_sz -= cnt
+ ip += cnt
+
+ def childCount(self):
+ if not self.query_done:
+ self.Select()
+ if not self.child_count:
+ return -1
+ return self.child_count
+
+ def hasChildren(self):
+ if not self.query_done:
+ return True
+ return self.child_count > 0
+
+ def getData(self, column):
+ return self.data[column]
+
+# Brance data model root item
+
+class BranchRootItem():
+
+ def __init__(self):
+ self.child_count = 0
+ self.child_items = []
+ self.level = 0
+
+ def getChildItem(self, row):
+ return self.child_items[row]
+
+ def getParentItem(self):
+ return None
+
+ def getRow(self):
+ return 0
+
+ def childCount(self):
+ return self.child_count
+
+ def hasChildren(self):
+ return self.child_count > 0
+
+ def getData(self, column):
+ return ""
+
+# Branch data preparation
+
+def BranchDataPrep(query):
+ data = []
+ for i in xrange(0, 8):
+ data.append(query.value(i))
+ data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
+ " (" + dsoname(query.value(11)) + ")" + " -> " +
+ tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
+ " (" + dsoname(query.value(15)) + ")")
+ return data
+
+# Branch data model
+
+class BranchModel(TreeModel):
+
+ progress = Signal(object)
+
+ def __init__(self, glb, event_id, where_clause, parent=None):
+ super(BranchModel, self).__init__(BranchRootItem(), parent)
+ self.glb = glb
+ self.event_id = event_id
+ self.more = True
+ self.populated = 0
+ sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name,"
+ " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END,"
+ " ip, symbols.name, sym_offset, dsos.short_name,"
+ " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name"
+ " FROM samples"
+ " INNER JOIN comms ON comm_id = comms.id"
+ " INNER JOIN threads ON thread_id = threads.id"
+ " INNER JOIN branch_types ON branch_type = branch_types.id"
+ " INNER JOIN symbols ON symbol_id = symbols.id"
+ " INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id"
+ " INNER JOIN dsos ON samples.dso_id = dsos.id"
+ " INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id"
+ " WHERE samples.id > $$last_id$$" + where_clause +
+ " AND evsel_id = " + str(self.event_id) +
+ " ORDER BY samples.id"
+ " LIMIT " + str(glb_chunk_sz))
+ self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample)
+ self.fetcher.done.connect(self.Update)
+ self.fetcher.Fetch(glb_chunk_sz)
+
+ def columnCount(self, parent=None):
+ return 8
+
+ def columnHeader(self, column):
+ return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
+
+ def columnFont(self, column):
+ if column != 7:
+ return None
+ return QFont("Monospace")
+
+ def DisplayData(self, item, index):
+ if item.level == 1:
+ self.FetchIfNeeded(item.row)
+ return item.getData(index.column())
+
+ def AddSample(self, data):
+ child = BranchLevelOneItem(self.glb, self.populated, data, self.root)
+ self.root.child_items.append(child)
+ self.populated += 1
+
+ def Update(self, fetched):
+ if not fetched:
+ self.more = False
+ self.progress.emit(0)
+ child_count = self.root.child_count
+ count = self.populated - child_count
+ if count > 0:
+ parent = QModelIndex()
+ self.beginInsertRows(parent, child_count, child_count + count - 1)
+ self.insertRows(child_count, count, parent)
+ self.root.child_count += count
+ self.endInsertRows()
+ self.progress.emit(self.root.child_count)
+
+ def FetchMoreRecords(self, count):
+ current = self.root.child_count
+ if self.more:
+ self.fetcher.Fetch(count)
+ else:
+ self.progress.emit(0)
+ return current
+
+ def HasMoreRecords(self):
+ return self.more
+
+# Branch window
+
+class BranchWindow(QMdiSubWindow):
+
+ def __init__(self, glb, event_id, name, where_clause, parent=None):
+ super(BranchWindow, self).__init__(parent)
+
+ model_name = "Branch Events " + str(event_id)
+ if len(where_clause):
+ model_name = where_clause + " " + model_name
+
+ self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause))
+
+ self.view = QTreeView()
+ self.view.setUniformRowHeights(True)
+ self.view.setModel(self.model)
+
+ self.ResizeColumnsToContents()
+
+ self.find_bar = FindBar(self, self, True)
+
+ self.finder = ChildDataItemFinder(self.model.root)
+
+ self.fetch_bar = FetchMoreRecordsBar(self.model, self)
+
+ self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+ self.setWidget(self.vbox.Widget())
+
+ AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events")
+
+ def ResizeColumnToContents(self, column, n):
+ # Using the view's resizeColumnToContents() here is extrememly slow
+ # so implement a crude alternative
+ mm = "MM" if column else "MMMM"
+ font = self.view.font()
+ metrics = QFontMetrics(font)
+ max = 0
+ for row in xrange(n):
+ val = self.model.root.child_items[row].data[column]
+ len = metrics.width(str(val) + mm)
+ max = len if len > max else max
+ val = self.model.columnHeader(column)
+ len = metrics.width(str(val) + mm)
+ max = len if len > max else max
+ self.view.setColumnWidth(column, max)
+
+ def ResizeColumnsToContents(self):
+ n = min(self.model.root.child_count, 100)
+ if n < 1:
+ # No data yet, so connect a signal to notify when there is
+ self.model.rowsInserted.connect(self.UpdateColumnWidths)
+ return
+ columns = self.model.columnCount()
+ for i in xrange(columns):
+ self.ResizeColumnToContents(i, n)
+
+ def UpdateColumnWidths(self, *x):
+ # This only needs to be done once, so disconnect the signal now
+ self.model.rowsInserted.disconnect(self.UpdateColumnWidths)
+ self.ResizeColumnsToContents()
+
+ def Find(self, value, direction, pattern, context):
+ self.view.setFocus()
+ self.find_bar.Busy()
+ self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+ def FindDone(self, row):
+ self.find_bar.Idle()
+ if row >= 0:
+ self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+ else:
+ self.find_bar.NotFound()
+
+# Event list
+
+def GetEventList(db):
+ events = []
+ query = QSqlQuery(db)
+ QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id")
+ while query.next():
+ events.append(query.value(0))
+ return events
+
+# SQL data preparation
+
+def SQLTableDataPrep(query, count):
+ data = []
+ for i in xrange(count):
+ data.append(query.value(i))
+ return data
+
+# SQL table data model item
+
+class SQLTableItem():
+
+ def __init__(self, row, data):
+ self.row = row
+ self.data = data
+
+ def getData(self, column):
+ return self.data[column]
+
+# SQL table data model
+
+class SQLTableModel(TableModel):
+
+ progress = Signal(object)
+
+ def __init__(self, glb, sql, column_count, parent=None):
+ super(SQLTableModel, self).__init__(parent)
+ self.glb = glb
+ self.more = True
+ self.populated = 0
+ self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample)
+ self.fetcher.done.connect(self.Update)
+ self.fetcher.Fetch(glb_chunk_sz)
+
+ def DisplayData(self, item, index):
+ self.FetchIfNeeded(item.row)
+ return item.getData(index.column())
+
+ def AddSample(self, data):
+ child = SQLTableItem(self.populated, data)
+ self.child_items.append(child)
+ self.populated += 1
+
+ def Update(self, fetched):
+ if not fetched:
+ self.more = False
+ self.progress.emit(0)
+ child_count = self.child_count
+ count = self.populated - child_count
+ if count > 0:
+ parent = QModelIndex()
+ self.beginInsertRows(parent, child_count, child_count + count - 1)
+ self.insertRows(child_count, count, parent)
+ self.child_count += count
+ self.endInsertRows()
+ self.progress.emit(self.child_count)
+
+ def FetchMoreRecords(self, count):
+ current = self.child_count
+ if self.more:
+ self.fetcher.Fetch(count)
+ else:
+ self.progress.emit(0)
+ return current
+
+ def HasMoreRecords(self):
+ return self.more
+
+# SQL automatic table data model
+
+class SQLAutoTableModel(SQLTableModel):
+
+ def __init__(self, glb, table_name, parent=None):
+ sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz)
+ if table_name == "comm_threads_view":
+ # For now, comm_threads_view has no id column
+ sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz)
+ self.column_headers = []
+ query = QSqlQuery(glb.db)
+ if glb.dbref.is_sqlite3:
+ QueryExec(query, "PRAGMA table_info(" + table_name + ")")
+ while query.next():
+ self.column_headers.append(query.value(1))
+ if table_name == "sqlite_master":
+ sql = "SELECT * FROM " + table_name
+ else:
+ if table_name[:19] == "information_schema.":
+ sql = "SELECT * FROM " + table_name
+ select_table_name = table_name[19:]
+ schema = "information_schema"
+ else:
+ select_table_name = table_name
+ schema = "public"
+ QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
+ while query.next():
+ self.column_headers.append(query.value(0))
+ super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent)
+
+ def columnCount(self, parent=None):
+ return len(self.column_headers)
+
+ def columnHeader(self, column):
+ return self.column_headers[column]
+
+# Base class for custom ResizeColumnsToContents
+
+class ResizeColumnsToContentsBase(QObject):
+
+ def __init__(self, parent=None):
+ super(ResizeColumnsToContentsBase, self).__init__(parent)
+
+ def ResizeColumnToContents(self, column, n):
+ # Using the view's resizeColumnToContents() here is extrememly slow
+ # so implement a crude alternative
+ font = self.view.font()
+ metrics = QFontMetrics(font)
+ max = 0
+ for row in xrange(n):
+ val = self.data_model.child_items[row].data[column]
+ len = metrics.width(str(val) + "MM")
+ max = len if len > max else max
+ val = self.data_model.columnHeader(column)
+ len = metrics.width(str(val) + "MM")
+ max = len if len > max else max
+ self.view.setColumnWidth(column, max)
+
+ def ResizeColumnsToContents(self):
+ n = min(self.data_model.child_count, 100)
+ if n < 1:
+ # No data yet, so connect a signal to notify when there is
+ self.data_model.rowsInserted.connect(self.UpdateColumnWidths)
+ return
+ columns = self.data_model.columnCount()
+ for i in xrange(columns):
+ self.ResizeColumnToContents(i, n)
+
+ def UpdateColumnWidths(self, *x):
+ # This only needs to be done once, so disconnect the signal now
+ self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths)
+ self.ResizeColumnsToContents()
+
+# Table window
+
+class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
+
+ def __init__(self, glb, table_name, parent=None):
+ super(TableWindow, self).__init__(parent)
+
+ self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name))
+
+ self.model = QSortFilterProxyModel()
+ self.model.setSourceModel(self.data_model)
+
+ self.view = QTableView()
+ self.view.setModel(self.model)
+ self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
+ self.view.verticalHeader().setVisible(False)
+ self.view.sortByColumn(-1, Qt.AscendingOrder)
+ self.view.setSortingEnabled(True)
+
+ self.ResizeColumnsToContents()
+
+ self.find_bar = FindBar(self, self, True)
+
+ self.finder = ChildDataItemFinder(self.data_model)
+
+ self.fetch_bar = FetchMoreRecordsBar(self.data_model, self)
+
+ self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+ self.setWidget(self.vbox.Widget())
+
+ AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table")
+
+ def Find(self, value, direction, pattern, context):
+ self.view.setFocus()
+ self.find_bar.Busy()
+ self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+ def FindDone(self, row):
+ self.find_bar.Idle()
+ if row >= 0:
+ self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+ else:
+ self.find_bar.NotFound()
+
+# Table list
+
+def GetTableList(glb):
+ tables = []
+ query = QSqlQuery(glb.db)
+ if glb.dbref.is_sqlite3:
+ QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name")
+ else:
+ QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name")
+ while query.next():
+ tables.append(query.value(0))
+ if glb.dbref.is_sqlite3:
+ tables.append("sqlite_master")
+ else:
+ tables.append("information_schema.tables")
+ tables.append("information_schema.views")
+ tables.append("information_schema.columns")
+ return tables
+
+# Action Definition
+
+def CreateAction(label, tip, callback, parent=None, shortcut=None):
+ action = QAction(label, parent)
+ if shortcut != None:
+ action.setShortcuts(shortcut)
+ action.setStatusTip(tip)
+ action.triggered.connect(callback)
+ return action
+
+# Typical application actions
+
+def CreateExitAction(app, parent=None):
+ return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit)
+
+# Typical MDI actions
+
+def CreateCloseActiveWindowAction(mdi_area):
+ return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area)
+
+def CreateCloseAllWindowsAction(mdi_area):
+ return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area)
+
+def CreateTileWindowsAction(mdi_area):
+ return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area)
+
+def CreateCascadeWindowsAction(mdi_area):
+ return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area)
+
+def CreateNextWindowAction(mdi_area):
+ return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild)
+
+def CreatePreviousWindowAction(mdi_area):
+ return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild)
+
+# Typical MDI window menu
+
+class WindowMenu():
+
+ def __init__(self, mdi_area, menu):
+ self.mdi_area = mdi_area
+ self.window_menu = menu.addMenu("&Windows")
+ self.close_active_window = CreateCloseActiveWindowAction(mdi_area)
+ self.close_all_windows = CreateCloseAllWindowsAction(mdi_area)
+ self.tile_windows = CreateTileWindowsAction(mdi_area)
+ self.cascade_windows = CreateCascadeWindowsAction(mdi_area)
+ self.next_window = CreateNextWindowAction(mdi_area)
+ self.previous_window = CreatePreviousWindowAction(mdi_area)
+ self.window_menu.aboutToShow.connect(self.Update)
+
+ def Update(self):
+ self.window_menu.clear()
+ sub_window_count = len(self.mdi_area.subWindowList())
+ have_sub_windows = sub_window_count != 0
+ self.close_active_window.setEnabled(have_sub_windows)
+ self.close_all_windows.setEnabled(have_sub_windows)
+ self.tile_windows.setEnabled(have_sub_windows)
+ self.cascade_windows.setEnabled(have_sub_windows)
+ self.next_window.setEnabled(have_sub_windows)
+ self.previous_window.setEnabled(have_sub_windows)
+ self.window_menu.addAction(self.close_active_window)
+ self.window_menu.addAction(self.close_all_windows)
+ self.window_menu.addSeparator()
+ self.window_menu.addAction(self.tile_windows)
+ self.window_menu.addAction(self.cascade_windows)
+ self.window_menu.addSeparator()
+ self.window_menu.addAction(self.next_window)
+ self.window_menu.addAction(self.previous_window)
+ if sub_window_count == 0:
+ return
+ self.window_menu.addSeparator()
+ nr = 1
+ for sub_window in self.mdi_area.subWindowList():
+ label = str(nr) + " " + sub_window.name
+ if nr < 10:
+ label = "&" + label
+ action = self.window_menu.addAction(label)
+ action.setCheckable(True)
+ action.setChecked(sub_window == self.mdi_area.activeSubWindow())
+ action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x))
+ self.window_menu.addAction(action)
+ nr += 1
+
+ def setActiveSubWindow(self, nr):
+ self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1])
+
+# Font resize
+
+def ResizeFont(widget, diff):
+ font = widget.font()
+ sz = font.pointSize()
+ font.setPointSize(sz + diff)
+ widget.setFont(font)
+
+def ShrinkFont(widget):
+ ResizeFont(widget, -1)
+
+def EnlargeFont(widget):
+ ResizeFont(widget, 1)
+
+# Unique name for sub-windows
+
+def NumberedWindowName(name, nr):
+ if nr > 1:
+ name += " <" + str(nr) + ">"
+ return name
+
+def UniqueSubWindowName(mdi_area, name):
+ nr = 1
+ while True:
+ unique_name = NumberedWindowName(name, nr)
+ ok = True
+ for sub_window in mdi_area.subWindowList():
+ if sub_window.name == unique_name:
+ ok = False
+ break
+ if ok:
+ return unique_name
+ nr += 1
+
+# Add a sub-window
+
+def AddSubWindow(mdi_area, sub_window, name):
+ unique_name = UniqueSubWindowName(mdi_area, name)
+ sub_window.setMinimumSize(200, 100)
+ sub_window.resize(800, 600)
+ sub_window.setWindowTitle(unique_name)
+ sub_window.setAttribute(Qt.WA_DeleteOnClose)
+ sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon))
+ sub_window.name = unique_name
+ mdi_area.addSubWindow(sub_window)
+ sub_window.show()
+
+# Main window
+
+class MainWindow(QMainWindow):
+
+ def __init__(self, glb, parent=None):
+ super(MainWindow, self).__init__(parent)
+
+ self.glb = glb
+
+ self.setWindowTitle("Exported SQL Viewer: " + glb.dbname)
+ self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
+ self.setMinimumSize(200, 100)
+
+ self.mdi_area = QMdiArea()
+ self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+ self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+
+ self.setCentralWidget(self.mdi_area)
+
+ menu = self.menuBar()
+
+ file_menu = menu.addMenu("&File")
+ file_menu.addAction(CreateExitAction(glb.app, self))
+
+ edit_menu = menu.addMenu("&Edit")
+ edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
+ edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)]))
+ edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")]))
+ edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")]))
+
+ reports_menu = menu.addMenu("&Reports")
+ reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
+
+ self.EventMenu(GetEventList(glb.db), reports_menu)
+
+ self.TableMenu(GetTableList(glb), menu)
+
+ self.window_menu = WindowMenu(self.mdi_area, menu)
+
+ def Find(self):
+ win = self.mdi_area.activeSubWindow()
+ if win:
+ try:
+ win.find_bar.Activate()
+ except:
+ pass
+
+ def FetchMoreRecords(self):
+ win = self.mdi_area.activeSubWindow()
+ if win:
+ try:
+ win.fetch_bar.Activate()
+ except:
+ pass
+
+ def ShrinkFont(self):
+ win = self.mdi_area.activeSubWindow()
+ ShrinkFont(win.view)
+
+ def EnlargeFont(self):
+ win = self.mdi_area.activeSubWindow()
+ EnlargeFont(win.view)
+
+ def EventMenu(self, events, reports_menu):
+ branches_events = 0
+ for event in events:
+ event = event.split(":")[0]
+ if event == "branches":
+ branches_events += 1
+ dbid = 0
+ for event in events:
+ dbid += 1
+ event = event.split(":")[0]
+ if event == "branches":
+ label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
+ reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self))
+
+ def TableMenu(self, tables, menu):
+ table_menu = menu.addMenu("&Tables")
+ for table in tables:
+ table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self))
+
+ def NewCallGraph(self):
+ CallGraphWindow(self.glb, self)
+
+ def NewBranchView(self, event_id):
+ BranchWindow(self.glb, event_id, "", "", self)
+
+ def NewTableView(self, table_name):
+ TableWindow(self.glb, table_name, self)
+
+# XED Disassembler
+
+class xed_state_t(Structure):
+
+ _fields_ = [
+ ("mode", c_int),
+ ("width", c_int)
+ ]
+
+class XEDInstruction():
+
+ def __init__(self, libxed):
+ # Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion
+ xedd_t = c_byte * 512
+ self.xedd = xedd_t()
+ self.xedp = addressof(self.xedd)
+ libxed.xed_decoded_inst_zero(self.xedp)
+ self.state = xed_state_t()
+ self.statep = addressof(self.state)
+ # Buffer for disassembled instruction text
+ self.buffer = create_string_buffer(256)
+ self.bufferp = addressof(self.buffer)
+
+class LibXED():
+
+ def __init__(self):
+ self.libxed = CDLL("libxed.so")
+
+ self.xed_tables_init = self.libxed.xed_tables_init
+ self.xed_tables_init.restype = None
+ self.xed_tables_init.argtypes = []
+
+ self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero
+ self.xed_decoded_inst_zero.restype = None
+ self.xed_decoded_inst_zero.argtypes = [ c_void_p ]
+
+ self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode
+ self.xed_operand_values_set_mode.restype = None
+ self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ]
+
+ self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode
+ self.xed_decoded_inst_zero_keep_mode.restype = None
+ self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ]
+
+ self.xed_decode = self.libxed.xed_decode
+ self.xed_decode.restype = c_int
+ self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ]
+
+ self.xed_format_context = self.libxed.xed_format_context
+ self.xed_format_context.restype = c_uint
+ self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ]
+
+ self.xed_tables_init()
+
+ def Instruction(self):
+ return XEDInstruction(self)
+
+ def SetMode(self, inst, mode):
+ if mode:
+ inst.state.mode = 4 # 32-bit
+ inst.state.width = 4 # 4 bytes
+ else:
+ inst.state.mode = 1 # 64-bit
+ inst.state.width = 8 # 8 bytes
+ self.xed_operand_values_set_mode(inst.xedp, inst.statep)
+
+ def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip):
+ self.xed_decoded_inst_zero_keep_mode(inst.xedp)
+ err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt)
+ if err:
+ return 0, ""
+ # Use AT&T mode (2), alternative is Intel (3)
+ ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
+ if not ok:
+ return 0, ""
+ # Return instruction length and the disassembled instruction text
+ # For now, assume the length is in byte 166
+ return inst.xedd[166], inst.buffer.value
+
+def TryOpen(file_name):
+ try:
+ return open(file_name, "rb")
+ except:
+ return None
+
+def Is64Bit(f):
+ result = sizeof(c_void_p)
+ # ELF support only
+ pos = f.tell()
+ f.seek(0)
+ header = f.read(7)
+ f.seek(pos)
+ magic = header[0:4]
+ eclass = ord(header[4])
+ encoding = ord(header[5])
+ version = ord(header[6])
+ if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1:
+ result = True if eclass == 2 else False
+ return result
+
+# Global data
+
+class Glb():
+
+ def __init__(self, dbref, db, dbname):
+ self.dbref = dbref
+ self.db = db
+ self.dbname = dbname
+ self.home_dir = os.path.expanduser("~")
+ self.buildid_dir = os.getenv("PERF_BUILDID_DIR")
+ if self.buildid_dir:
+ self.buildid_dir += "/.build-id/"
+ else:
+ self.buildid_dir = self.home_dir + "/.debug/.build-id/"
+ self.app = None
+ self.mainwindow = None
+ self.instances_to_shutdown_on_exit = weakref.WeakSet()
+ try:
+ self.disassembler = LibXED()
+ self.have_disassembler = True
+ except:
+ self.have_disassembler = False
+
+ def FileFromBuildId(self, build_id):
+ file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf"
+ return TryOpen(file_name)
+
+ def FileFromNamesAndBuildId(self, short_name, long_name, build_id):
+ # Assume current machine i.e. no support for virtualization
+ if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore":
+ file_name = os.getenv("PERF_KCORE")
+ f = TryOpen(file_name) if file_name else None
+ if f:
+ return f
+ # For now, no special handling if long_name is /proc/kcore
+ f = TryOpen(long_name)
+ if f:
+ return f
+ f = self.FileFromBuildId(build_id)
+ if f:
+ return f
+ return None
+
+ def AddInstanceToShutdownOnExit(self, instance):
+ self.instances_to_shutdown_on_exit.add(instance)
+
+ # Shutdown any background processes or threads
+ def ShutdownInstances(self):
+ for x in self.instances_to_shutdown_on_exit:
+ try:
+ x.Shutdown()
+ except:
+ pass
+
+# Database reference
+
+class DBRef():
+
+ def __init__(self, is_sqlite3, dbname):
+ self.is_sqlite3 = is_sqlite3
+ self.dbname = dbname
+
+ def Open(self, connection_name):
+ dbname = self.dbname
+ if self.is_sqlite3:
+ db = QSqlDatabase.addDatabase("QSQLITE", connection_name)
+ else:
+ db = QSqlDatabase.addDatabase("QPSQL", connection_name)
+ opts = dbname.split()
+ for opt in opts:
+ if "=" in opt:
+ opt = opt.split("=")
+ if opt[0] == "hostname":
+ db.setHostName(opt[1])
+ elif opt[0] == "port":
+ db.setPort(int(opt[1]))
+ elif opt[0] == "username":
+ db.setUserName(opt[1])
+ elif opt[0] == "password":
+ db.setPassword(opt[1])
+ elif opt[0] == "dbname":
+ dbname = opt[1]
+ else:
+ dbname = opt
+
+ db.setDatabaseName(dbname)
+ if not db.open():
+ raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+ return db, dbname
+
+# Main
+
+def Main():
+ if (len(sys.argv) < 2):
+ print >> sys.stderr, "Usage is: exported-sql-viewer.py <database name>"
+ raise Exception("Too few arguments")
+
+ dbname = sys.argv[1]
+
+ is_sqlite3 = False
+ try:
+ f = open(dbname)
+ if f.read(15) == "SQLite format 3":
+ is_sqlite3 = True
+ f.close()
+ except:
+ pass
+
+ dbref = DBRef(is_sqlite3, dbname)
+ db, dbname = dbref.Open("main")
+ glb = Glb(dbref, db, dbname)
+ app = QApplication(sys.argv)
+ glb.app = app
+ mainwindow = MainWindow(glb)
+ glb.mainwindow = mainwindow
+ mainwindow.show()
+ err = app.exec_()
+ glb.ShutdownInstances()
+ db.close()
+ sys.exit(err)
+
+if __name__ == "__main__":
+ Main()
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index c3b0afd..3043130 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -5,6 +5,7 @@
libperf-y += ioctl.o
endif
libperf-y += kcmp.o
+libperf-y += mount_flags.o
libperf-y += pkey_alloc.o
libperf-y += prctl.o
libperf-y += sockaddr.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 2570152..039c290 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -24,6 +24,7 @@ struct strarray {
}
size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val);
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags);
struct trace;
struct thread;
@@ -122,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar
size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags);
+#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags
+
size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index d64d049..0104065 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/cone.c
*
* Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
*/
#include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh
index 9d38168..9aa94fd 100755
--- a/tools/perf/trace/beauty/drm_ioctl.sh
+++ b/tools/perf/trace/beauty/drm_ioctl.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
index 5d6a477..db5b9b492 100644
--- a/tools/perf/trace/beauty/eventfd.c
+++ b/tools/perf/trace/beauty/eventfd.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#ifndef EFD_SEMAPHORE
#define EFD_SEMAPHORE 1
#endif
diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c
index 9e8900c..e6de316 100644
--- a/tools/perf/trace/beauty/fcntl.c
+++ b/tools/perf/trace/beauty/fcntl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/fcntl.c
*
* Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
*/
#include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
index c4ff6ad..cf02ae5 100644
--- a/tools/perf/trace/beauty/flock.c
+++ b/tools/perf/trace/beauty/flock.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include "trace/beauty/beauty.h"
#include <linux/kernel.h>
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
index 61850fb..1136bde 100644
--- a/tools/perf/trace/beauty/futex_op.c
+++ b/tools/perf/trace/beauty/futex_op.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <linux/futex.h>
#ifndef FUTEX_WAIT_BITSET
diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c
index 26f6b32..138b7d5 100644
--- a/tools/perf/trace/beauty/futex_val3.c
+++ b/tools/perf/trace/beauty/futex_val3.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <linux/futex.h>
#ifndef FUTEX_BITSET_MATCH_ANY
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 1be3b4c..5d2a7fd 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/ioctl.c
*
* Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
*/
#include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c
index f62040e..b276a27 100644
--- a/tools/perf/trace/beauty/kcmp.c
+++ b/tools/perf/trace/beauty/kcmp.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/kcmp.c
*
* Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
*/
#include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
index a3c304c..df8b174 100755
--- a/tools/perf/trace/beauty/kcmp_type.sh
+++ b/tools/perf/trace/beauty/kcmp_type.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh
index c4699fd..4ce54f5 100755
--- a/tools/perf/trace/beauty/kvm_ioctl.sh
+++ b/tools/perf/trace/beauty/kvm_ioctl.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
index 431639e..4527d29 100755
--- a/tools/perf/trace/beauty/madvise_behavior.sh
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9f68077..c534bd9 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,5 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <uapi/linux/mman.h>
+#include <linux/log2.h>
static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
struct syscall_arg *arg)
@@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
+static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mmap_flags_array.c"
+ static DEFINE_STRARRAY(mmap_flags);
+
+ return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags);
+}
+
static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
- int printed = 0, flags = arg->val;
+ unsigned long flags = arg->val;
if (flags & MAP_ANONYMOUS)
arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
-#define P_MMAP_FLAG(n) \
- if (flags & MAP_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- flags &= ~MAP_##n; \
- }
-
- P_MMAP_FLAG(SHARED);
- P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
- P_MMAP_FLAG(32BIT);
-#endif
- P_MMAP_FLAG(ANONYMOUS);
- P_MMAP_FLAG(DENYWRITE);
- P_MMAP_FLAG(EXECUTABLE);
- P_MMAP_FLAG(FILE);
- P_MMAP_FLAG(FIXED);
-#ifdef MAP_FIXED_NOREPLACE
- P_MMAP_FLAG(FIXED_NOREPLACE);
-#endif
- P_MMAP_FLAG(GROWSDOWN);
- P_MMAP_FLAG(HUGETLB);
- P_MMAP_FLAG(LOCKED);
- P_MMAP_FLAG(NONBLOCK);
- P_MMAP_FLAG(NORESERVE);
- P_MMAP_FLAG(POPULATE);
- P_MMAP_FLAG(STACK);
- P_MMAP_FLAG(UNINITIALIZED);
-#ifdef MAP_SYNC
- P_MMAP_FLAG(SYNC);
-#endif
-#undef P_MMAP_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
+ return mmap__scnprintf_flags(flags, bf, size);
}
#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
new file mode 100755
index 0000000..22c3fdc
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 2 ] ; then
+ [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+ header_dir=tools/include/uapi/asm-generic
+ arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
+else
+ header_dir=$1
+ arch_header_dir=$2
+fi
+
+arch_mman=${arch_header_dir}/mman.h
+
+# those in egrep -vw are flags, we want just the bits
+
+printf "static const char *mmap_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep -q $regex ${arch_mman} && \
+(egrep $regex ${arch_mman} | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman-common.h | \
+ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+printf "};\n"
diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
index d929ad7..6879d36 100644
--- a/tools/perf/trace/beauty/mode_t.c
+++ b/tools/perf/trace/beauty/mode_t.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c
new file mode 100644
index 0000000..712935c
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/mount_flags.c
+ *
+ * Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <sys/mount.h>
+
+static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mount_flags_array.c"
+ static DEFINE_STRARRAY(mount_flags);
+
+ return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags);
+}
+
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags)
+{
+ // do_mount in fs/namespace.c:
+ /*
+ * Pre-0.97 versions of mount() didn't have a flags word. When the
+ * flags word was introduced its top half was required to have the
+ * magic value 0xC0ED, and this remained so until 2.4.0-test9.
+ * Therefore, if this magic number is present, it carries no
+ * information and must be discarded.
+ */
+ if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+ flags &= ~MS_MGC_MSK;
+
+ return flags;
+}
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+
+ return mount__scnprintf_flags(flags, bf, size);
+}
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
new file mode 100755
index 0000000..4554757
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *mount_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+ sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
+ xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[%s + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index c064d6a..1b9d630 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <sys/types.h>
#include <sys/socket.h>
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
index 6aec617..cc673fe 100644
--- a/tools/perf/trace/beauty/open_flags.c
+++ b/tools/perf/trace/beauty/open_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
index 2bafd7c..981185c 100644
--- a/tools/perf/trace/beauty/perf_event_open.c
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#ifndef PERF_FLAG_FD_NO_GROUP
# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
#endif
diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh
index 6492c74..9aabd97 100755
--- a/tools/perf/trace/beauty/perf_ioctl.sh
+++ b/tools/perf/trace/beauty/perf_ioctl.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
index 0313df3..1a6acc4 100644
--- a/tools/perf/trace/beauty/pid.c
+++ b/tools/perf/trace/beauty/pid.c
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
+
size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
{
int pid = arg->val;
diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c
index 2ba784a..1b8ed4c 100644
--- a/tools/perf/trace/beauty/pkey_alloc.c
+++ b/tools/perf/trace/beauty/pkey_alloc.c
@@ -1,40 +1,36 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/pkey_alloc.c
*
* Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
*/
#include "trace/beauty/beauty.h"
#include <linux/kernel.h>
#include <linux/log2.h>
-static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags)
{
int i, printed = 0;
-#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
- static DEFINE_STRARRAY(pkey_alloc_access_rights);
-
- if (access_rights == 0) {
- const char *s = strarray__pkey_alloc_access_rights.entries[0];
+ if (flags == 0) {
+ const char *s = sa->entries[0];
if (s)
return scnprintf(bf, size, "%s", s);
return scnprintf(bf, size, "%d", 0);
}
- for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) {
- int bit = 1 << (i - 1);
+ for (i = 1; i < sa->nr_entries; ++i) {
+ unsigned long bit = 1UL << (i - 1);
- if (!(access_rights & bit))
+ if (!(flags & bit))
continue;
if (printed != 0)
printed += scnprintf(bf + printed, size - printed, "|");
- if (strarray__pkey_alloc_access_rights.entries[i] != NULL)
- printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]);
+ if (sa->entries[i] != NULL)
+ printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]);
else
printed += scnprintf(bf + printed, size - printed, "0x%#", bit);
}
@@ -42,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s
return printed;
}
+static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+{
+#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
+ static DEFINE_STRARRAY(pkey_alloc_access_rights);
+
+ return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights);
+}
+
size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg)
{
unsigned long cmd = arg->val;
diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
index e0a51ae..f8f1b56 100755
--- a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
+++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
index 246130d..be7a5d3 100644
--- a/tools/perf/trace/beauty/prctl.c
+++ b/tools/perf/trace/beauty/prctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/prctl.c
*
* Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
*/
#include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index f247221..d32f8f1 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
index ba5096a..48f2b5c9 100644
--- a/tools/perf/trace/beauty/sched_policy.c
+++ b/tools/perf/trace/beauty/sched_policy.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <sched.h>
/*
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
index b7097fd..e36156b 100644
--- a/tools/perf/trace/beauty/seccomp.c
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#ifndef SECCOMP_SET_MODE_STRICT
#define SECCOMP_SET_MODE_STRICT 0
#endif
diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
index bde18a5..587fec5 100644
--- a/tools/perf/trace/beauty/signum.c
+++ b/tools/perf/trace/beauty/signum.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <signal.h>
static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
index eb511bb..e0803b9 100755
--- a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
index 6818392..7a464a7 100755
--- a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index 71a79f7..9410ad2 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
#include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c
index 65227269..d971a25 100644
--- a/tools/perf/trace/beauty/socket.c
+++ b/tools/perf/trace/beauty/socket.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/socket.c
*
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
index a3cc246..de0f2f2 100755
--- a/tools/perf/trace/beauty/socket_ipproto.sh
+++ b/tools/perf/trace/beauty/socket_ipproto.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
index bca26ae..a63a9a3 100644
--- a/tools/perf/trace/beauty/socket_type.c
+++ b/tools/perf/trace/beauty/socket_type.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <sys/types.h>
#include <sys/socket.h>
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
index 5643b69..630f276 100644
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* trace/beauty/statx.c
*
* Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
*/
#include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
index 0f6a519..439773d 100755
--- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
index 8465281..42ff58a 100644
--- a/tools/perf/trace/beauty/waitid_options.c
+++ b/tools/perf/trace/beauty/waitid_options.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
#include <sys/types.h>
#include <sys/wait.h>
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 28cd6a1..6936daf 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
#include "arch/x86/annotate/instructions.c"
#include "arch/powerpc/annotate/instructions.c"
#include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
static struct arch architectures[] = {
{
@@ -170,6 +171,13 @@ static struct arch architectures[] = {
.comment_char = '#',
},
},
+ {
+ .name = "sparc",
+ .init = sparc__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
};
static void ins__delete(struct ins_operands *ops)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c4617bc..72d5ba2 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -962,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session,
#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64
#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+ bool no_sample)
{
- synth_opts->instructions = true;
synth_opts->branches = true;
synth_opts->transactions = true;
synth_opts->ptwrites = true;
synth_opts->pwr_events = true;
synth_opts->errors = true;
- synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
- synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ if (no_sample) {
+ synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ synth_opts->period = 1;
+ synth_opts->calls = true;
+ } else {
+ synth_opts->instructions = true;
+ synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ }
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
synth_opts->initial_skip = 0;
@@ -999,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
}
if (!str) {
- itrace_synth_opts__set_default(synth_opts);
+ itrace_synth_opts__set_default(synth_opts, false);
return 0;
}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index d88f6e9e..8e50f96 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -58,6 +58,7 @@ enum itrace_period_type {
/**
* struct itrace_synth_opts - AUX area tracing synthesis options.
* @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
* @inject: indicates the event (not just the sample) must be fully synthesized
* because 'perf inject' will write it out
* @instructions: whether to synthesize 'instructions' events
@@ -82,6 +83,7 @@ enum itrace_period_type {
*/
struct itrace_synth_opts {
bool set;
+ bool default_no_sample;
bool inject;
bool instructions;
bool branches;
@@ -528,7 +530,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
union perf_event *event);
int itrace_parse_synth_opts(const struct option *opt, const char *str,
int unset);
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+ bool no_sample);
size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
void perf_session__auxtrace_error_inc(struct perf_session *session,
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 2ae6402..73430b7 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session)
zfree(&aux);
}
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+ struct machine *machine;
+
+ machine = etmq->etm->machine;
+
+ if (address >= etmq->etm->kernel_start) {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_KERNEL;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
+ } else {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_USER;
+ else if (perf_guest)
+ return PERF_RECORD_MISC_GUEST_USER;
+ else
+ return PERF_RECORD_MISC_HYPERVISOR;
+ }
+}
+
static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
size_t size, u8 *buffer)
{
@@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
return -1;
machine = etmq->etm->machine;
- if (address >= etmq->etm->kernel_start)
- cpumode = PERF_RECORD_MISC_KERNEL;
- else
- cpumode = PERF_RECORD_MISC_USER;
+ cpumode = cs_etm__cpu_mode(etmq, address);
thread = etmq->thread;
if (!thread) {
@@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
struct perf_sample sample = {.ip = 0,};
event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
event->sample.header.size = sizeof(struct perf_event_header);
sample.ip = addr;
@@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
sample.cpu = etmq->packet->cpu;
sample.flags = 0;
sample.insn_len = 1;
- sample.cpumode = event->header.misc;
+ sample.cpumode = event->sample.header.misc;
if (etm->synth_opts.last_branch) {
cs_etm__copy_last_branch_rb(etmq);
@@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
u64 nr;
struct branch_entry entries;
} dummy_bs;
+ u64 ip;
+
+ ip = cs_etm__last_executed_instr(etmq->prev_packet);
event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
event->sample.header.size = sizeof(struct perf_event_header);
- sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
+ sample.ip = ip;
sample.pid = etmq->pid;
sample.tid = etmq->tid;
sample.addr = cs_etm__first_executed_instr(etmq->packet);
@@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
sample.period = 1;
sample.cpu = etmq->packet->cpu;
sample.flags = 0;
- sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.cpumode = event->sample.header.misc;
/*
* perf report cannot handle events without a branch stack
@@ -1432,7 +1453,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
etm->synth_opts = *session->itrace_synth_opts;
} else {
- itrace_synth_opts__set_default(&etm->synth_opts);
+ itrace_synth_opts__set_default(&etm->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
etm->synth_opts.callchain = false;
}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1f3ccc36..d01b8355 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -63,6 +63,7 @@ struct perf_env {
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
+ u64 clockid_res_ns;
};
extern struct perf_env perf_env;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bc64618..e9c108a 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
event->fork.pid = tgid;
event->fork.tid = pid;
event->fork.header.type = PERF_RECORD_FORK;
+ event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index be440df..e88e6f9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
struct perf_evsel *pos;
evlist__for_each_entry(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
continue;
perf_evsel__disable(pos);
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 29d7b97..6d18705 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->leader = evsel;
evsel->unit = "";
evsel->scale = 1.0;
+ evsel->max_events = ULONG_MAX;
evsel->evlist = NULL;
evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->node);
@@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
max_stack = term->val.max_stack;
break;
+ case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+ evsel->max_events = term->val.max_events;
+ break;
case PERF_EVSEL__CONFIG_TERM_INHERIT:
/*
* attr->inherit should has already been set by
@@ -1203,16 +1207,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
int perf_evsel__enable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel,
- PERF_EVENT_IOC_ENABLE,
- 0);
+ int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+
+ if (!err)
+ evsel->disabled = false;
+
+ return err;
}
int perf_evsel__disable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel,
- PERF_EVENT_IOC_DISABLE,
- 0);
+ int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+ /*
+ * We mark it disabled here so that tools that disable a event can
+ * ignore events after they disable it. I.e. the ring buffer may have
+ * already a few more events queued up before the kernel got the stop
+ * request.
+ */
+ if (!err)
+ evsel->disabled = true;
+
+ return err;
}
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4107c39..3147ca7 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -46,6 +46,7 @@ enum term_type {
PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_INHERIT,
PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+ PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
PERF_EVSEL__CONFIG_TERM_OVERWRITE,
PERF_EVSEL__CONFIG_TERM_DRV_CFG,
PERF_EVSEL__CONFIG_TERM_BRANCH,
@@ -65,6 +66,7 @@ struct perf_evsel_config_term {
bool inherit;
bool overwrite;
char *branch;
+ unsigned long max_events;
} val;
bool weak;
};
@@ -99,6 +101,8 @@ struct perf_evsel {
struct perf_counts *prev_raw_counts;
int idx;
u32 ids;
+ unsigned long max_events;
+ unsigned long nr_events_printed;
char *name;
double scale;
const char *unit;
@@ -119,6 +123,7 @@ struct perf_evsel {
bool snapshot;
bool supported;
bool needs_swap;
+ bool disabled;
bool no_aux_samples;
bool immediate;
bool system_wide;
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index de322d5..b72440b 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#elif defined(__powerpc__)
#define GEN_ELF_ARCH EM_PPC
#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH EM_SPARCV9
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH EM_SPARC
+#define GEN_ELF_CLASS ELFCLASS32
#else
#error "unsupported architecture"
#endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1ec1d9b..4fd45be95 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff,
return err;
}
+static int write_clockid(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return do_write(ff, &ff->ph->env.clockid_res_ns,
+ sizeof(ff->ph->env.clockid_res_ns));
+}
+
static int cpu_cache_level__sort(const void *a, const void *b)
{
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# Core ID and Socket ID information is not available\n");
}
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+ ff->ph->env.clockid_res_ns * 1000);
+}
+
static void free_event_desc(struct perf_evsel *events)
{
struct perf_evsel *evsel;
@@ -2531,6 +2544,15 @@ static int process_mem_topology(struct feat_fd *ff,
return ret;
}
+static int process_clockid(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+ return -1;
+
+ return 0;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
+ FEAT_OPR(CLOCKID, clockid, false)
};
struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index e17903c..0d553dd 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -38,6 +38,7 @@ enum {
HEADER_CACHE,
HEADER_SAMPLE_TIME,
HEADER_MEM_TOPOLOGY,
+ HEADER_CLOCKID,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7f0c83b..7b27d77 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
return 0;
}
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+ return machine__kernel_ip(bts->machine, ip) ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
struct branch *branch)
{
@@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
bts->num_events++ <= bts->synth_opts.initial_skip)
return 0;
- event.sample.header.type = PERF_RECORD_SAMPLE;
- event.sample.header.misc = PERF_RECORD_MISC_USER;
- event.sample.header.size = sizeof(struct perf_event_header);
-
- sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = le64_to_cpu(branch->from);
+ sample.cpumode = intel_bts_cpumode(bts, sample.ip);
sample.pid = btsq->pid;
sample.tid = btsq->tid;
sample.addr = le64_to_cpu(branch->to);
@@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
sample.insn_len = btsq->intel_pt_insn.length;
memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = sample.cpumode;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
if (bts->synth_opts.inject) {
event.sample.header.size = bts->branches_event_size;
ret = perf_event__synthesize_sample(&event,
@@ -910,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
bts->synth_opts = *session->itrace_synth_opts;
} else {
- itrace_synth_opts__set_default(&bts->synth_opts);
+ itrace_synth_opts__set_default(&bts->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
if (session->itrace_synth_opts)
bts->synth_opts.thread_stack =
session->itrace_synth_opts->thread_stack;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 48c1d41..86cc9a6 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -407,6 +407,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}
+static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+{
+ return ip >= pt->kernel_start ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip,
uint64_t to_ip, uint64_t max_insn_cnt,
@@ -429,10 +436,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
if (to_ip && *ip == to_ip)
goto out_no_cache;
- if (*ip >= ptq->pt->kernel_start)
- cpumode = PERF_RECORD_MISC_KERNEL;
- else
- cpumode = PERF_RECORD_MISC_USER;
+ cpumode = intel_pt_cpumode(ptq->pt, *ip);
thread = ptq->thread;
if (!thread) {
@@ -759,7 +763,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
if (pt->synth_opts.callchain) {
size_t sz = sizeof(struct ip_callchain);
- sz += pt->synth_opts.callchain_sz * sizeof(u64);
+ /* Add 1 to callchain_sz for callchain context */
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
ptq->chain = zalloc(sz);
if (!ptq->chain)
goto out_free;
@@ -1058,15 +1063,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
{
- event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
- event->sample.header.size = sizeof(struct perf_event_header);
-
if (!pt->timeless_decoding)
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
- sample->cpumode = PERF_RECORD_MISC_USER;
sample->ip = ptq->state->from_ip;
+ sample->cpumode = intel_pt_cpumode(pt, sample->ip);
sample->pid = ptq->pid;
sample->tid = ptq->tid;
sample->addr = ptq->state->to_ip;
@@ -1075,6 +1076,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
sample->flags = ptq->flags;
sample->insn_len = ptq->insn_len;
memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = sample->cpumode;
+ event->sample.header.size = sizeof(struct perf_event_header);
}
static int intel_pt_inject_event(union perf_event *event,
@@ -1160,7 +1165,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
if (pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->chain,
- pt->synth_opts.callchain_sz, sample->ip);
+ pt->synth_opts.callchain_sz + 1,
+ sample->ip, pt->kernel_start);
sample->callchain = ptq->chain;
}
@@ -2559,7 +2565,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
pt->synth_opts = *session->itrace_synth_opts;
} else {
- itrace_synth_opts__set_default(&pt->synth_opts);
+ itrace_synth_opts__set_default(&pt->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
if (use_browser != -1) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae85..8f36ce8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
+ bool do_maps_clone = true;
int err = 0;
if (dump_trace)
@@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
+ /*
+ * When synthesizing FORK events, we are trying to create thread
+ * objects for the already running tasks on the machine.
+ *
+ * Normally, for a kernel FORK event, we want to clone the parent's
+ * maps because that is what the kernel just did.
+ *
+ * But when synthesizing, this should not be done. If we do, we end up
+ * with overlapping maps as we process the sythesized MMAP2 events that
+ * get delivered shortly thereafter.
+ *
+ * Use the FORK event misc flags in an internal way to signal this
+ * situation, so we can elide the map clone when appropriate.
+ */
+ if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+ do_maps_clone = false;
if (thread == NULL || parent == NULL ||
- thread__fork(thread, parent, sample->time) < 0) {
+ thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
err = -1;
}
@@ -2140,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
return 0;
}
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode, int ent)
+{
+ int err = 0;
+
+ while (--ent >= 0) {
+ u64 ip = chain->ips[ent];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, cpumode, ip,
+ false, NULL, NULL, 0);
+ break;
+ }
+ }
+ return err;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct perf_evsel *evsel,
@@ -2246,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
}
check_calls:
+ if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+ &cpumode, chain->nr - first_call);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
for (i = first_call, nr_entries = 0;
i < chain_nr && nr_entries < max_stack; i++) {
u64 ip;
@@ -2260,9 +2304,15 @@ static int thread__resolve_callchain_sample(struct thread *thread,
continue;
#endif
ip = chain->ips[j];
-
if (ip < PERF_CONTEXT_MAX)
++nr_entries;
+ else if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent,
+ root_al, &cpumode, j);
+ if (err)
+ return (err < 0) ? err : 0;
+ continue;
+ }
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f8cd3e7..59be346 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
[PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
[PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
[PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
+ [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr",
[PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
[PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
@@ -1037,6 +1038,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
CHECK_TYPE_VAL(NUM);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ CHECK_TYPE_VAL(NUM);
+ break;
default:
err->str = strdup("unknown term");
err->idx = term->err_term;
@@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_INHERIT:
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
return config_term_common(attr, term, err);
@@ -1162,6 +1167,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
+ break;
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
break;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 4473dac..5ed035c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@ enum {
PARSE_EVENTS__TERM_TYPE_NOINHERIT,
PARSE_EVENTS__TERM_TYPE_INHERIT,
PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+ PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
PARSE_EVENTS__TERM_TYPE_OVERWRITE,
PARSE_EVENTS__TERM_TYPE_DRV_CFG,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 5f761f3..7805c71 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -269,6 +269,7 @@
call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 0281d5e..66a84d5 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
plt_entry_size = 16;
break;
- default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+ case EM_SPARC:
+ plt_header_size = 48;
+ plt_entry_size = 12;
+ break;
+
+ case EM_SPARCV9:
+ plt_header_size = 128;
+ plt_entry_size = 32;
+ break;
+
+ default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
plt_header_size = shdr_plt.sh_entsize;
plt_entry_size = shdr_plt.sh_entsize;
break;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 20f4977..d026d21 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -123,7 +123,8 @@ struct symbol_conf {
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
- *field_sep;
+ *field_sep,
+ *graph_function;
const char *default_guest_vmlinux_name,
*default_guest_kallsyms,
*default_guest_modules;
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index c091635..61a4286 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -310,20 +310,46 @@ void thread_stack__free(struct thread *thread)
}
}
-void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
- size_t sz, u64 ip)
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
{
- size_t i;
+ return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
- if (!thread || !thread->ts)
- chain->nr = 1;
- else
- chain->nr = min(sz, thread->ts->cnt + 1);
+void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
+ size_t sz, u64 ip, u64 kernel_start)
+{
+ u64 context = callchain_context(ip, kernel_start);
+ u64 last_context;
+ size_t i, j;
- chain->ips[0] = ip;
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
- for (i = 1; i < chain->nr; i++)
- chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
+ chain->ips[0] = context;
+ chain->ips[1] = ip;
+
+ if (!thread || !thread->ts) {
+ chain->nr = 2;
+ return;
+ }
+
+ last_context = context;
+
+ for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
+ ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (i >= sz - 1)
+ break;
+ chain->ips[i++] = context;
+ last_context = context;
+ }
+ chain->ips[i] = ip;
+ }
+
+ chain->nr = i;
}
struct call_return_processor *
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b7e41c4..f97c00a 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr);
void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
- size_t sz, u64 ip);
+ size_t sz, u64 ip, u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
size_t thread_stack__depth(struct thread *thread);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 2048d39..3d9ed7d 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread)
}
static int thread__clone_map_groups(struct thread *thread,
- struct thread *parent)
+ struct thread *parent,
+ bool do_maps_clone)
{
/* This is new thread, we share map groups for process. */
if (thread->pid_ == parent->pid_)
@@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread,
thread->pid_, thread->tid, parent->pid_, parent->tid);
return 0;
}
-
/* But this one is new process, copy maps. */
- if (map_groups__clone(thread, parent->mg) < 0)
- return -ENOMEM;
-
- return 0;
+ return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0;
}
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
{
if (parent->comm_set) {
const char *comm = thread__comm_str(parent);
@@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
}
thread->ppid = parent->tid;
- return thread__clone_map_groups(thread, parent);
+ return thread__clone_map_groups(thread, parent, do_maps_clone);
}
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 07606aa..30e2b4c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -42,6 +42,8 @@ struct thread {
void *addr_space;
struct unwind_libunwind_ops *unwind_libunwind_ops;
#endif
+ bool filter;
+ int filter_entry_depth;
};
struct machine;
@@ -87,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
const char *thread__comm_str(const struct thread *thread);
int thread__insert_map(struct thread *thread, struct map *map);
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
size_t thread__fprintf(struct thread *thread, FILE *fp);
struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 6f318b1..5eff9bf 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip,
Dwarf_Addr s;
dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
- if (s != al->map->start)
+ if (s != al->map->start - al->map->pgoff)
mod = 0;
}
if (!mod)
mod = dwfl_report_elf(ui->dwfl, dso->short_name,
- (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
+ (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
false);
return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index d3273b5..ae8180b 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -11,6 +11,8 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_rlimit.h"
+
const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
const char *cfg_map_name = "jmp_table";
bool cfg_attach = true;
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
index 42544a9..a9bc6f8 100755
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -10,7 +10,7 @@
echo -n "Wait for testing link-local IP to become available "
for _i in $(seq ${MAX_PING_TRIES}); do
echo -n "."
- if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+ if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
echo " OK"
return
fi
@@ -58,5 +58,6 @@
BPF_PROG_SECTION="cgroup_id_logger"
BPF_PROG_ID=0
PROG="${DIR}/test_skb_cgroup_id_user"
+type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6"
main
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
index 9832a87..3b9fdb8 100755
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -4,7 +4,8 @@
ping_once()
{
- ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+ type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
+ $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
}
wait_for_ip()
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 36f3d30..6f61df6 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -76,7 +76,7 @@ struct bpf_test {
int fixup_percpu_cgroup_storage[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
- uint32_t retval;
+ uint32_t retval, retval_unpriv;
enum {
UNDEF,
ACCEPT,
@@ -3084,6 +3084,8 @@ static struct bpf_test tests[] = {
.fixup_prog1 = { 2 },
.result = ACCEPT,
.retval = 42,
+ /* Verifier rewrite for unpriv skips tail call here. */
+ .retval_unpriv = 2,
},
{
"stack pointer arithmetic",
@@ -6455,6 +6457,256 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
{
+ "map access: known scalar += value_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "map access: value_ptr += known scalar",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "map access: unknown scalar += value_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "map access: value_ptr += unknown scalar",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "map access: value_ptr += value_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = REJECT,
+ .errstr = "R0 pointer += pointer prohibited",
+ },
+ {
+ "map access: known scalar -= value_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = REJECT,
+ .errstr = "R1 tried to subtract pointer from scalar",
+ },
+ {
+ "map access: value_ptr -= known scalar",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = REJECT,
+ .errstr = "R0 min value is outside of the array range",
+ },
+ {
+ "map access: value_ptr -= known scalar, 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "map access: unknown scalar -= value_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = REJECT,
+ .errstr = "R1 tried to subtract pointer from scalar",
+ },
+ {
+ "map access: value_ptr -= unknown scalar",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = REJECT,
+ .errstr = "R0 min value is negative",
+ },
+ {
+ "map access: value_ptr -= unknown scalar, 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "map access: value_ptr -= value_ptr",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ .errstr_unpriv = "R0 pointer -= pointer prohibited",
+ },
+ {
"map lookup helper access to map",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -13899,6 +14151,33 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
}
}
+static int set_admin(bool admin)
+{
+ cap_t caps;
+ const cap_value_t cap_val = CAP_SYS_ADMIN;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return -1;
+ }
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+ admin ? CAP_SET : CAP_CLEAR)) {
+ perror("cap_set_flag");
+ goto out;
+ }
+ if (cap_set_proc(caps)) {
+ perror("cap_set_proc");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (cap_free(caps))
+ perror("cap_free");
+ return ret;
+}
+
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
@@ -13907,6 +14186,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
struct bpf_insn *prog = test->insns;
int map_fds[MAX_NR_MAPS];
const char *expected_err;
+ uint32_t expected_val;
uint32_t retval;
int i, err;
@@ -13926,6 +14206,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->result_unpriv : test->result;
expected_err = unpriv && test->errstr_unpriv ?
test->errstr_unpriv : test->errstr;
+ expected_val = unpriv && test->retval_unpriv ?
+ test->retval_unpriv : test->retval;
reject_from_alignment = fd_prog < 0 &&
(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -13959,16 +14241,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
__u8 tmp[TEST_DATA_LEN << 2];
__u32 size_tmp = sizeof(tmp);
+ if (unpriv)
+ set_admin(true);
err = bpf_prog_test_run(fd_prog, 1, test->data,
sizeof(test->data), tmp, &size_tmp,
&retval, NULL);
+ if (unpriv)
+ set_admin(false);
if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
printf("Unexpected bpf_prog_test_run error\n");
goto fail_log;
}
- if (!err && retval != test->retval &&
- test->retval != POINTER_VALUE) {
- printf("FAIL retval %d != %d\n", retval, test->retval);
+ if (!err && retval != expected_val &&
+ expected_val != POINTER_VALUE) {
+ printf("FAIL retval %d != %d\n", retval, expected_val);
goto fail_log;
}
}
@@ -14011,33 +14297,6 @@ static bool is_admin(void)
return (sysadmin == CAP_SET);
}
-static int set_admin(bool admin)
-{
- cap_t caps;
- const cap_value_t cap_val = CAP_SYS_ADMIN;
- int ret = -1;
-
- caps = cap_get_proc();
- if (!caps) {
- perror("cap_get_proc");
- return -1;
- }
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
- admin ? CAP_SET : CAP_CLEAR)) {
- perror("cap_set_flag");
- goto out;
- }
- if (cap_set_proc(caps)) {
- perror("cap_set_proc");
- goto out;
- }
- ret = 0;
-out:
- if (cap_free(caps))
- perror("cap_free");
- return ret;
-}
-
static void get_unpriv_disabled()
{
char buf[2];
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 0150bb2..117f6f3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -25,24 +25,24 @@
# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
# multicast traffic uses 8K frames.
#
-# +-----------------------+ +----------------------------------+
-# | H1 | | H2 |
-# | | | unicast --> + $h2.111 |
-# | | | traffic | 192.0.2.129/28 |
-# | multicast | | | e-qos-map 0:1 |
-# | traffic | | | |
-# | $h1 + <----- | | + $h2 |
-# +-----|-----------------+ +--------------|-------------------+
-# | |
-# +-----|-------------------------------------------------|-------------------+
-# | + $swp1 + $swp2 |
-# | | >1Gbps | >1Gbps |
-# | +---|----------------+ +----------|----------------+ |
-# | | + $swp1.1 | | + $swp2.111 | |
+# +---------------------------+ +----------------------------------+
+# | H1 | | H2 |
+# | | | unicast --> + $h2.111 |
+# | multicast | | traffic | 192.0.2.129/28 |
+# | traffic | | | e-qos-map 0:1 |
+# | $h1 + <----- | | | |
+# | 192.0.2.65/28 | | | + $h2 |
+# +---------------|-----------+ +--------------|-------------------+
+# | |
+# +---------------|---------------------------------------|-------------------+
+# | $swp1 + + $swp2 |
+# | >1Gbps | | >1Gbps |
+# | +-------------|------+ +----------|----------------+ |
+# | | $swp1.1 + | | + $swp2.111 | |
# | | BR1 | SW | BR111 | |
-# | | + $swp3.1 | | + $swp3.111 | |
-# | +---|----------------+ +----------|----------------+ |
-# | \_________________________________________________/ |
+# | | $swp3.1 + | | + $swp3.111 | |
+# | +-------------|------+ +----------|----------------+ |
+# | \_______________________________________/ |
# | | |
# | + $swp3 |
# | | 1Gbps bottleneck |
@@ -51,6 +51,7 @@
# |
# +--|-----------------+
# | + $h3 H3 |
+# | | 192.0.2.66/28 |
# | | |
# | + $h3.111 |
# | 192.0.2.130/28 |
@@ -59,6 +60,7 @@
ALL_TESTS="
ping_ipv4
test_mc_aware
+ test_uc_aware
"
lib_dir=$(dirname $0)/../../../net/forwarding
@@ -68,14 +70,14 @@
h1_create()
{
- simple_if_init $h1
+ simple_if_init $h1 192.0.2.65/28
mtu_set $h1 10000
}
h1_destroy()
{
mtu_restore $h1
- simple_if_fini $h1
+ simple_if_fini $h1 192.0.2.65/28
}
h2_create()
@@ -97,7 +99,7 @@
h3_create()
{
- simple_if_init $h3
+ simple_if_init $h3 192.0.2.66/28
mtu_set $h3 10000
vlan_create $h3 111 v$h3 192.0.2.130/28
@@ -108,7 +110,7 @@
vlan_destroy $h3 111
mtu_restore $h3
- simple_if_fini $h3
+ simple_if_fini $h3 192.0.2.66/28
}
switch_create()
@@ -251,7 +253,7 @@
# average ingress rate to somewhat mitigate this.
local min_ingress=2147483648
- mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+ $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-a own -b $h3mac -t udp -q &
sleep 1
@@ -291,7 +293,7 @@
check_err $? "Could not get high enough UC-only ingress rate"
local ucth1=${uc_rate[1]}
- mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+ $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
@@ -311,7 +313,7 @@
ret = 100 * ($ucth1 - $ucth2) / $ucth1
if (ret > 0) { ret } else { 0 }
")
- check_err $(bc <<< "$deg > 10")
+ check_err $(bc <<< "$deg > 25")
local interval=$((d1 - d0))
local mc_ir=$(rate $u0 $u1 $interval)
@@ -335,6 +337,51 @@
echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
echo " ingress MC throughput $(humanize $mc_ir)"
echo " egress MC throughput $(humanize $mc_er)"
+ echo
+}
+
+test_uc_aware()
+{
+ RET=0
+
+ $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+ -a own -b $h3mac -t udp -q &
+
+ local d0=$(date +%s)
+ local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+ sleep 1
+
+ local attempts=50
+ local passes=0
+ local i
+
+ for ((i = 0; i < attempts; ++i)); do
+ if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+ ((passes++))
+ fi
+
+ sleep 0.1
+ done
+
+ local d1=$(date +%s)
+ local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+ local interval=$((d1 - d0))
+ local uc_ir=$(rate $u0 $u1 $interval)
+ local uc_er=$(rate $t0 $t1 $interval)
+
+ ((attempts == passes))
+ check_err $?
+
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait; } 2>/dev/null
+
+ log_test "MC performace under UC overload"
+ echo " ingress UC throughput $(humanize ${uc_ir})"
+ echo " egress UC throughput $(humanize ${uc_er})"
+ echo " sent $attempts BC ARPs, got $passes responses"
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index ede4d3d..689f6c8 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -1,12 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := cache_shape
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c
+TEST_GEN_PROGS := cache_shape
top_srcdir = ../../../../..
include ../../lib.mk
-clean:
- rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index bd5dfa5..23f4caf 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -5,6 +5,9 @@
# The EBB handler is 64-bit code and everything links against it
CFLAGS += -m64
+# Toolchains may build PIE by default which breaks the assembly
+LDFLAGS += -no-pie
+
TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
cycles_with_freeze_test pmc56_overflow_test \
ebb_vs_cpu_event_test cpu_event_vs_ebb_test \
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 9b35ca8..8d3f006 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
perf-hwbreak ptrace-syscall
@@ -7,14 +7,9 @@
top_srcdir = ../../../../..
include ../../lib.mk
-all: $(TEST_PROGS)
-
CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
-ptrace-pkey core-pkey: child.h
-ptrace-pkey core-pkey: LDLIBS += -pthread
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
-$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
-
-clean:
- rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index 327fa94..dbdffa2 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -67,8 +67,8 @@ void tm_spd_gpr(void)
"3: ;"
: [res] "=r" (result), [texasr] "=r" (texasr)
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
- [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a),
- [flt_2] "r" (&b), [flt_4] "r" (&d)
+ [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+ [flt_4] "b" (&d)
: "memory", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index 44690f1..85861c4 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0+
TEST_GEN_PROGS := rfi_flush
+top_srcdir = ../../../../..
CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 564ed45..0a7d0af 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -49,6 +49,7 @@ int rfi_flush_test(void)
struct perf_event_read v;
__u64 l1d_misses_total = 0;
unsigned long iterations = 100000, zero_size = 24 * 1024;
+ unsigned long l1d_misses_expected;
int rfi_flush_org, rfi_flush;
SKIP_IF(geteuid() != 0);
@@ -71,6 +72,12 @@ int rfi_flush_test(void)
iter = repetitions;
+ /*
+ * We expect to see l1d miss for each cacheline access when rfi_flush
+ * is set. Allow a small variation on this.
+ */
+ l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
again:
FAIL_IF(perf_event_reset(fd));
@@ -78,10 +85,9 @@ int rfi_flush_test(void)
FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
- /* Expect at least zero_size/CACHELINE_SIZE misses per iteration */
- if (v.l1d_misses >= (iterations * zero_size / CACHELINE_SIZE) && rfi_flush)
+ if (rfi_flush && v.l1d_misses >= l1d_misses_expected)
passes++;
- else if (v.l1d_misses < iterations && !rfi_flush)
+ else if (!rfi_flush && v.l1d_misses < (l1d_misses_expected / 2))
passes++;
l1d_misses_total += v.l1d_misses;
@@ -92,13 +98,15 @@ int rfi_flush_test(void)
if (passes < repetitions) {
printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
- rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+ rfi_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
repetitions - passes, repetitions);
rc = 1;
} else
printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
- rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+ rfi_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
passes, repetitions);
if (rfi_flush == rfi_flush_org) {
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 1fca25c..209a958 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,15 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := signal signal_tm
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c signal.S
+TEST_GEN_PROGS := signal signal_tm
CFLAGS += -maltivec
-signal_tm: CFLAGS += -mhtm
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
top_srcdir = ../../../../..
include ../../lib.mk
-clean:
- rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index fcd2dcb..bdc081a 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -8,6 +8,7 @@
top_srcdir = ../../../../..
include ../../lib.mk
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
$(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 43c3428..ed62f41 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -25,7 +25,6 @@
#include "utils.h"
static char auxv[4096];
-extern unsigned int dscr_insn[];
int read_auxv(char *buf, ssize_t buf_size)
{
@@ -247,7 +246,8 @@ static void sigill_handler(int signr, siginfo_t *info, void *unused)
ucontext_t *ctx = (ucontext_t *)unused;
unsigned long *pc = &UCONTEXT_NIA(ctx);
- if (*pc == (unsigned long)&dscr_insn) {
+ /* mtspr 3,RS to check for move to DSCR below */
+ if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
if (!warned++)
printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
*pc += 4;
@@ -271,5 +271,5 @@ void set_dscr(unsigned long val)
init = 1;
}
- asm volatile("dscr_insn: mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
}