Merge tag 'misc-habanalabs-fixes-2021-05-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-linus
Oded writes:
This tag contains the following fixes for 5.13-rc2:
- Expose PLL information per ASIC. This also fixes some casting warnings.
- Skip reading further firmware errors in case PCI link is down.
- Security firmware error should be handled as error and not warning.
- Allow user to ignore firmware errors.
- Fix bug in timeout calculation when waiting for interrupt of CS.
- Fix bug of potential use-after-free.
* tag 'misc-habanalabs-fixes-2021-05-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
habanalabs/gaudi: Fix a potential use after free in gaudi_memset_device_memory
habanalabs: wait for interrupt wrong timeout calculation
habanalabs: ignore f/w status error
habanalabs: change error level of security not ready
habanalabs: skip reading f/w errors on bad status
habanalabs: expose ASIC specific PLL index
diff --git a/.gitignore b/.gitignore
index df8d314..7afd412 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,14 +48,11 @@
*.xz
*.zst
Module.symvers
-modules.builtin
modules.order
#
# Top-level generic files
#
-/tags
-/TAGS
/linux
/modules-only.symvers
/vmlinux
@@ -66,6 +63,7 @@
/vmlinuz
/System.map
/Module.markers
+/modules.builtin
/modules.builtin.modinfo
/modules.nsdeps
@@ -114,6 +112,10 @@
patches
series
+# ctags files
+tags
+TAGS
+
# cscope files
cscope.*
ncscope.*
diff --git a/CREDITS b/CREDITS
index b06760f..7ef7b13 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1874,6 +1874,11 @@
S: 181 00 Praha 8
S: Czech Republic
+N: Murali Karicheri
+E: m-karicheri2@ti.com
+D: Keystone NetCP driver
+D: Keystone PCIe host controller driver
+
N: Jan "Yenya" Kasprzak
E: kas@fi.muni.cz
D: Author of the COSA/SRP sync serial board driver.
diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi
index ed79f58..47e6b97 100644
--- a/Documentation/ABI/testing/sysfs-class-net-qmi
+++ b/Documentation/ABI/testing/sysfs-class-net-qmi
@@ -58,3 +58,19 @@
Indicates the mux id associated to the qmimux network interface
during its creation.
+
+What: /sys/class/net/<iface>/qmi/pass_through
+Date: January 2021
+KernelVersion: 5.12
+Contact: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Description:
+ Boolean. Default: 'N'
+
+ Set this to 'Y' to enable 'pass-through' mode, allowing packets
+ in MAP format to be passed on to the stack.
+
+ Normally the rmnet driver (CONFIG_RMNET) is then used to process
+ and demultiplex these packets.
+
+ 'Pass-through' mode can be enabled when the device is in
+ 'raw-ip' mode only.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a816935..cb89dbd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3282,6 +3282,8 @@
nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
+ nohugevmalloc [PPC] Disable kernel huge vmalloc mappings.
+
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 4fcc00a..18b8cc1 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -277,9 +277,40 @@
- SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
+ For CPUs with Advanced SIMD and floating point support:
+
+ - If EL3 is present:
+
+ - CPTR_EL3.TFP (bit 10) must be initialised to 0b0.
+
+ - If EL2 is present and the kernel is entered at EL1:
+
+ - CPTR_EL2.TFP (bit 10) must be initialised to 0b0.
+
+ For CPUs with the Scalable Vector Extension (FEAT_SVE) present:
+
+ - if EL3 is present:
+
+ - CPTR_EL3.EZ (bit 8) must be initialised to 0b1.
+
+ - ZCR_EL3.LEN must be initialised to the same value for all CPUs the
+ kernel is executed on.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - CPTR_EL2.TZ (bit 8) must be initialised to 0b0.
+
+ - CPTR_EL2.ZEN (bits 17:16) must be initialised to 0b11.
+
+ - ZCR_EL2.LEN must be initialised to the same value for all CPUs the
+ kernel will execute on.
+
The requirements described above for CPU mode, caches, MMUs, architected
timers, coherency and system registers apply to all CPUs. All CPUs must
-enter the kernel in the same exception level.
+enter the kernel in the same exception level. Where the values documented
+disable traps it is permissible for these traps to be enabled so long as
+those traps are handled transparently by higher exception levels as though
+the values documented were set.
The boot loader is expected to enter the kernel on each CPU in the
following manner:
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 8782166..ec1a5a6 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -74,7 +74,7 @@
HWCAP_EVTSTRM
The generic timer is configured to generate events at a frequency of
- approximately 100KHz.
+ approximately 10KHz.
HWCAP_AES
Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001.
diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst
index cbc4d45..459e6b6 100644
--- a/Documentation/arm64/tagged-address-abi.rst
+++ b/Documentation/arm64/tagged-address-abi.rst
@@ -113,6 +113,12 @@
- ``shmat()`` and ``shmdt()``.
+- ``brk()`` (since kernel v5.6).
+
+- ``mmap()`` (since kernel v5.6).
+
+- ``mremap()``, the ``new_address`` argument (since kernel v5.6).
+
Any attempt to use non-zero tagged pointers may result in an error code
being returned, a (fatal) signal being raised, or other modes of
failure.
diff --git a/Documentation/devicetree/bindings/.gitignore b/Documentation/devicetree/bindings/.gitignore
index 3a05b99..a777199 100644
--- a/Documentation/devicetree/bindings/.gitignore
+++ b/Documentation/devicetree/bindings/.gitignore
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
*.example.dts
-processed-schema*.yaml
-processed-schema*.json
+/processed-schema*.yaml
+/processed-schema*.json
diff --git a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
index adb5165..62f3ca6 100644
--- a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
+++ b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
@@ -49,7 +49,7 @@
examples:
- |
i3c-master@a0000000 {
- compatible = "silvaco,i3c-master";
+ compatible = "silvaco,i3c-master-v1";
clocks = <&zynqmp_clk 71>, <&fclk>, <&sclk>;
clock-names = "pclk", "fast_clk", "slow_clk";
interrupt-parent = <&gic>;
diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
index 14ea2f2..84dcdcd 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -74,7 +74,7 @@
calling set_sched_topology() with this array as the parameter.
The sched-domains debugging infrastructure can be enabled by enabling
-CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you
+CONFIG_SCHED_DEBUG and adding 'sched_verbose' to your cmdline. If you
forgot to tweak your cmdline, you can also flip the
/sys/kernel/debug/sched/verbose knob. This enables an error checking parse of
the sched domains which should catch most possible errors (described above). It
diff --git a/MAINTAINERS b/MAINTAINERS
index e845f05..bd7aff0c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -624,6 +624,7 @@
AFS FILESYSTEM
M: David Howells <dhowells@redhat.com>
+M: Marc Dionne <marc.dionne@auristor.com>
L: linux-afs@lists.infradead.org
S: Supported
W: https://www.infradead.org/~dhowells/kafs/
@@ -9552,6 +9553,7 @@
F: fs/io_uring.c
F: include/linux/io_uring.h
F: include/uapi/linux/io_uring.h
+F: tools/io_uring/
IPMI SUBSYSTEM
M: Corey Minyard <minyard@acm.org>
@@ -14098,13 +14100,6 @@
F: drivers/pci/controller/cadence/pci-j721e.c
F: drivers/pci/controller/dwc/pci-dra7xx.c
-PCI DRIVER FOR TI KEYSTONE
-M: Murali Karicheri <m-karicheri2@ti.com>
-L: linux-pci@vger.kernel.org
-L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
-F: drivers/pci/controller/dwc/pci-keystone.c
-
PCI DRIVER FOR V3 SEMICONDUCTOR V360EPC
M: Linus Walleij <linus.walleij@linaro.org>
L: linux-pci@vger.kernel.org
@@ -15890,6 +15885,7 @@
RXRPC SOCKETS (AF_RXRPC)
M: David Howells <dhowells@redhat.com>
+M: Marc Dionne <marc.dionne@auristor.com>
L: linux-afs@lists.infradead.org
S: Supported
W: https://www.infradead.org/~dhowells/kafs/
@@ -18306,13 +18302,6 @@
F: sound/soc/codecs/isabelle*
F: sound/soc/codecs/lm49453*
-TI NETCP ETHERNET DRIVER
-M: Wingman Kwok <w-kwok2@ti.com>
-M: Murali Karicheri <m-karicheri2@ti.com>
-L: netdev@vger.kernel.org
-S: Maintained
-F: drivers/net/ethernet/ti/netcp*
-
TI PCM3060 ASoC CODEC DRIVER
M: Kirill Marinushkin <kmarinushkin@birdec.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
diff --git a/Makefile b/Makefile
index 72af8e4..53d09c41 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
-PATCHLEVEL = 12
+PATCHLEVEL = 13
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Frozen Wasteland
# *DOCUMENTATION*
@@ -399,11 +399,6 @@
SRCARCH := sparc
endif
-# Additional ARCH settings for sh
-ifeq ($(ARCH),sh64)
- SRCARCH := sh
-endif
-
export cross_compiling :=
ifneq ($(SRCARCH),$(SUBARCH))
cross_compiling := 1
@@ -792,16 +787,16 @@
KBUILD_CFLAGS += -mno-global-merge
else
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += -Wno-unused-but-set-variable
-
# Warn about unmarked fall-throughs in switch statement.
# Disabled for clang while comment to attribute conversion happens and
# https://github.com/ClangBuiltLinux/linux/issues/636 is discussed.
KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough,)
endif
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+
KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
@@ -1225,7 +1220,7 @@
archprepare: outputmakefile archheaders archscripts scripts include/config/kernel.release \
asm-generic $(version_h) $(autoksyms_h) include/generated/utsrelease.h \
- include/generated/autoconf.h
+ include/generated/autoconf.h remove-stale-files
prepare0: archprepare
$(Q)$(MAKE) $(build)=scripts/mod
@@ -1234,6 +1229,10 @@
# All the preparing..
prepare: prepare0 prepare-objtool prepare-resolve_btfids
+PHONY += remove-stale-files
+remove-stale-files:
+ $(Q)$(srctree)/scripts/remove-stale-files
+
# Support for using generic headers in asm-generic
asm-generic := -f $(srctree)/scripts/Makefile.asm-generic obj
@@ -1512,9 +1511,6 @@
vmlinux-gdb.py \
*.spec
-# Directories & files removed with 'make distclean'
-DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
-
# clean - Delete most, but leave enough to build external modules
#
clean: rm-files := $(CLEAN_FILES)
@@ -1541,16 +1537,14 @@
# distclean
#
-distclean: rm-files := $(wildcard $(DISTCLEAN_FILES))
-
PHONY += distclean
distclean: mrproper
- $(call cmd,rmfiles)
- @find $(srctree) $(RCS_FIND_IGNORE) \
+ @find . $(RCS_FIND_IGNORE) \
\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
-o -name '*.bak' -o -name '#*#' -o -name '*%' \
- -o -name 'core' \) \
+ -o -name 'core' -o -name tags -o -name TAGS -o -name 'cscope*' \
+ -o -name GPATH -o -name GRTAGS -o -name GSYMS -o -name GTAGS \) \
-type f -print | xargs rm -f
@@ -1717,17 +1711,7 @@
# When building external modules the kernel used as basis is considered
# read-only, and no consistency checks are made and the make
# system is not used on the basis kernel. If updates are required
-# in the basis kernel ordinary make commands (without M=...) must
-# be used.
-#
-# The following are the only valid targets when building external
-# modules.
-# make M=dir clean Delete all automatically generated files
-# make M=dir modules Make all modules in specified dir
-# make M=dir Same as 'make M=dir modules'
-# make M=dir modules_install
-# Install the modules built in the module directory
-# Assumes install directory is already created
+# in the basis kernel ordinary make commands (without M=...) must be used.
# We are always building only modules.
KBUILD_BUILTIN :=
diff --git a/arch/.gitignore b/arch/.gitignore
index 4191da4..756c19c 100644
--- a/arch/.gitignore
+++ b/arch/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-i386
-x86_64
+/i386/
+/x86_64/
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index c1f8047..8eb70c1 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -96,13 +96,6 @@
$(foreach o, $(libfdt_objs) atags_to_fdt.o fdt_check_mem_start.o, \
$(eval CFLAGS_$(o) := -I $(srctree)/scripts/dtc/libfdt -fno-stack-protector))
-# These were previously generated C files. When you are building the kernel
-# with O=, make sure to remove the stale files in the output tree. Otherwise,
-# the build system wrongly compiles the stale ones.
-ifdef building_out_of_srctree
-$(shell rm -f $(addprefix $(obj)/, fdt_rw.c fdt_ro.c fdt_wip.c fdt.c))
-endif
-
targets := vmlinux vmlinux.lds piggy_data piggy.o \
lib1funcs.o ashldi3.o bswapsdi2.o \
head.o $(OBJS)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f0b17d7..9f1d856 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -170,7 +170,6 @@
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_CONTEXT_TRACKING
- select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
@@ -1061,15 +1060,7 @@
config ARCH_SPARSEMEM_ENABLE
def_bool y
select SPARSEMEM_VMEMMAP_ENABLE
-
-config ARCH_SPARSEMEM_DEFAULT
- def_bool ARCH_SPARSEMEM_ENABLE
-
-config ARCH_SELECT_MEMORY_MODEL
- def_bool ARCH_SPARSEMEM_ENABLE
-
-config ARCH_FLATMEM_ENABLE
- def_bool !NUMA
+ select SPARSEMEM_VMEMMAP
config HW_PERF_EVENTS
def_bool y
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 5eb7af9..55f57df 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -131,6 +131,9 @@ static inline void local_daif_inherit(struct pt_regs *regs)
if (interrupts_enabled(regs))
trace_hardirqs_on();
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(regs->pmr_save);
+
/*
* We can't use local_daif_restore(regs->pstate) here as
* system_has_prio_mask_debugging() won't restore the I bit if it can
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 587c504..d44df9d 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -136,7 +136,7 @@
* has a direct correspondence, and needs to appear sufficiently aligned
* in the virtual address space.
*/
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
+#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS)
#else
#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 6d9915d..87b90dc 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -345,7 +345,7 @@ static inline void *phys_to_virt(phys_addr_t x)
*/
#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET)
-#if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL)
+#if defined(CONFIG_DEBUG_VIRTUAL)
#define page_to_virt(x) ({ \
__typeof__(x) __page = x; \
void *__addr = __va(page_to_phys(__page)); \
@@ -365,7 +365,7 @@ static inline void *phys_to_virt(phys_addr_t x)
u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \
(struct page *)__addr; \
})
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */
+#endif /* CONFIG_DEBUG_VIRTUAL */
#define virt_addr_valid(addr) ({ \
__typeof__(addr) __addr = __tag_reset(addr); \
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index eb4a75d..4b73463 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -5,7 +5,6 @@
#ifndef __ASM_SPARSEMEM_H
#define __ASM_SPARSEMEM_H
-#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
/*
@@ -27,6 +26,4 @@
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_ARM64_64K_PAGES */
-#endif /* CONFIG_SPARSEMEM*/
-
#endif
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index abc8463..c906d20 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -133,11 +133,10 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
-static void __nocfi __apply_alternatives(void *alt_region, bool is_module,
- unsigned long *feature_mask)
+static void __nocfi __apply_alternatives(struct alt_region *region, bool is_module,
+ unsigned long *feature_mask)
{
struct alt_instr *alt;
- struct alt_region *region = alt_region;
__le32 *origptr, *updptr;
alternative_cb_t alt_cb;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 30c82d3..efed283 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -68,6 +68,7 @@
#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <linux/types.h>
+#include <linux/minmax.h>
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/kasan.h>
@@ -694,14 +695,14 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
ret = ftrp->safe_val;
break;
case FTR_LOWER_SAFE:
- ret = new < cur ? new : cur;
+ ret = min(new, cur);
break;
case FTR_HIGHER_OR_ZERO_SAFE:
if (!cur || !new)
break;
fallthrough;
case FTR_HIGHER_SAFE:
- ret = new > cur ? new : cur;
+ ret = max(new, cur);
break;
default:
BUG();
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index b512b55..03991ee 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -29,7 +29,7 @@ int arm_cpuidle_init(unsigned int cpu)
/**
* arm_cpuidle_suspend() - function to enter a low-power idle state
- * @arg: argument to pass to CPU suspend operations
+ * @index: argument to pass to CPU suspend operations
*
* Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
* operations back-end error code otherwise.
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index a1ec351..340d04e 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -230,14 +230,6 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
- /*
- * The CPU masked interrupts, and we are leaving them masked during
- * do_debug_exception(). Update PMR as if we had called
- * local_daif_mask().
- */
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
arm64_enter_el1_dbg(regs);
if (!cortex_a76_erratum_1463225_debug_handler(regs))
do_debug_exception(far, esr, regs);
@@ -404,9 +396,6 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
unsigned long far = read_sysreg(far_el1);
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
enter_from_user_mode();
do_debug_exception(far, esr, regs);
local_daif_restore(DAIF_PROCCTX_NOIRQ);
@@ -414,9 +403,6 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
static void noinstr el0_svc(struct pt_regs *regs)
{
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
enter_from_user_mode();
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc(regs);
@@ -492,9 +478,6 @@ static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
static void noinstr el0_svc_compat(struct pt_regs *regs)
{
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
enter_from_user_mode();
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc_compat(regs);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4ac5455..3513984 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -285,16 +285,16 @@
stp lr, x21, [sp, #S_LR]
/*
- * For exceptions from EL0, terminate the callchain here.
+ * For exceptions from EL0, create a terminal frame record.
* For exceptions from EL1, create a synthetic frame record so the
* interrupted code shows up in the backtrace.
*/
.if \el == 0
- mov x29, xzr
+ stp xzr, xzr, [sp, #S_STACKFRAME]
.else
stp x29, x22, [sp, #S_STACKFRAME]
- add x29, sp, #S_STACKFRAME
.endif
+ add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
alternative_if_not ARM64_HAS_PAN
@@ -314,6 +314,8 @@
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mrs_s x20, SYS_ICC_PMR_EL1
str x20, [sp, #S_PMR_SAVE]
+ mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
+ msr_s SYS_ICC_PMR_EL1, x20
alternative_else_nop_endif
/* Re-enable tag checking (TCO set on exception entry) */
@@ -550,17 +552,7 @@
#endif
.endm
- .macro gic_prio_irq_setup, pmr:req, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
- msr_s SYS_ICC_PMR_EL1, \tmp
- alternative_else_nop_endif
-#endif
- .endm
-
.macro el1_interrupt_handler, handler:req
- gic_prio_irq_setup pmr=x20, tmp=x1
enable_da
mov x0, sp
@@ -590,7 +582,6 @@
.endm
.macro el0_interrupt_handler, handler:req
- gic_prio_irq_setup pmr=x20, tmp=x0
user_exit_irqoff
enable_da
@@ -788,7 +779,6 @@
SYM_CODE_START_LOCAL(el1_error)
kernel_entry 1
mrs x1, esr_el1
- gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
@@ -799,7 +789,6 @@
kernel_entry 0
el0_error_naked:
mrs x25, esr_el1
- gic_prio_kentry_setup tmp=x2
user_exit_irqoff
enable_dbg
mov x0, sp
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index cbf5210..b4bb67f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -294,13 +294,10 @@ void __show_regs(struct pt_regs *regs)
i = top_reg;
while (i >= 0) {
- printk("x%-2d: %016llx ", i, regs->regs[i]);
- i--;
+ printk("x%-2d: %016llx", i, regs->regs[i]);
- if (i % 2 == 0) {
- pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
- i--;
- }
+ while (i-- % 3)
+ pr_cont(" x%-2d: %016llx", i, regs->regs[i]);
pr_cont("\n");
}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 84b676b..de07147 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -68,10 +68,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
unsigned long fp = frame->fp;
struct stack_info info;
- /* Terminal record; nothing to unwind */
- if (!fp)
- return -ENOENT;
-
if (fp & 0xf)
return -EINVAL;
@@ -132,6 +128,12 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
frame->pc = ptrauth_strip_insn_pac(frame->pc);
+ /*
+ * This is a terminal record, so we have finished unwinding.
+ */
+ if (!frame->fp && !frame->pc)
+ return -ENOENT;
+
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 61dbb4c..a5e61e0 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -31,6 +31,13 @@
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
+ /*
+ * Discard .note.gnu.property sections which are unused and have
+ * different alignment requirement from vDSO note sections.
+ */
+ /DISCARD/ : {
+ *(.note.GNU-stack .note.gnu.property)
+ }
.note : { *(.note.*) } :text :note
. = ALIGN(16);
@@ -48,7 +55,6 @@
PROVIDE(end = .);
/DISCARD/ : {
- *(.note.GNU-stack)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
*(.eh_frame .eh_frame_hdr)
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 789ad42..3dba0c4 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -10,15 +10,7 @@
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
-COMPAT_GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE_COMPAT)elfedit))
-COMPAT_GCC_TOOLCHAIN := $(realpath $(COMPAT_GCC_TOOLCHAIN_DIR)/..)
-
CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
-CC_COMPAT_CLANG_FLAGS += --prefix=$(COMPAT_GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE_COMPAT))
-CC_COMPAT_CLANG_FLAGS += -no-integrated-as -Qunused-arguments
-ifneq ($(COMPAT_GCC_TOOLCHAIN),)
-CC_COMPAT_CLANG_FLAGS += --gcc-toolchain=$(COMPAT_GCC_TOOLCHAIN)
-endif
CC_COMPAT ?= $(CC)
CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 0696a45..16a2b2b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -221,6 +221,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
int pfn_valid(unsigned long pfn)
{
phys_addr_t addr = PFN_PHYS(pfn);
+ struct mem_section *ms;
/*
* Ensure the upper PAGE_SHIFT bits are clear in the
@@ -231,10 +232,6 @@ int pfn_valid(unsigned long pfn)
if (PHYS_PFN(addr) != pfn)
return 0;
-#ifdef CONFIG_SPARSEMEM
-{
- struct mem_section *ms;
-
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
@@ -253,8 +250,7 @@ int pfn_valid(unsigned long pfn)
*/
if (!early_section(ms))
return pfn_section_valid(ms, pfn);
-}
-#endif
+
return memblock_is_map_memory(addr);
}
EXPORT_SYMBOL(pfn_valid);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 70fa3cd..6dd9369 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1113,7 +1113,6 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
}
#endif
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
@@ -1177,7 +1176,6 @@ void vmemmap_free(unsigned long start, unsigned long end,
free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END);
#endif
}
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
static inline pud_t *fixmap_pud(unsigned long addr)
{
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index a50e92ea..a1937df 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -51,10 +51,8 @@ static struct addr_marker address_markers[] = {
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
{ VMEMMAP_START, "vmemmap start" },
{ VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" },
-#endif
{ -1, NULL },
};
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index ea14f20..82620f1 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -16,7 +16,7 @@
KBUILD_DEFCONFIG := multi_defconfig
-ifneq ($(SUBARCH),$(ARCH))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := $(call cc-cross-prefix, \
m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index e71d587..258234c 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -50,7 +50,7 @@
UTS_MACHINE := mips64
endif
-ifneq ($(SUBARCH),$(ARCH))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := $(call cc-cross-prefix, $(tool-archpref)-linux- $(tool-archpref)-linux-gnu- $(tool-archpref)-unknown-linux-gnu-)
endif
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 7d9f71a..aed8ea2 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -41,7 +41,7 @@
export LD_BFD
-ifneq ($(SUBARCH),$(UTS_MACHINE))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
CROSS_COMPILE := $(call cc-cross-prefix, \
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cb2d44e..088dd2a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -120,28 +120,29 @@
select ARCH_32BIT_OFF_T if PPC32
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_KCOV
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
- select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
- select ARCH_HAS_MMIOWB if PPC64
- select ARCH_HAS_PHYS_TO_DMA
- select ARCH_HAS_PMEM_API
- select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
- select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
- select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
+ select ARCH_HAS_MMIOWB if PPC64
+ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ select ARCH_HAS_PHYS_TO_DMA
+ select ARCH_HAS_PMEM_API
+ select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
+ select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
- select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
@@ -164,9 +165,8 @@
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
- select DMA_OPS if PPC64
select DMA_OPS_BYPASS if PPC64
- select ARCH_HAS_DMA_MAP_DIRECT if PPC64 && PPC_PSERIES
+ select DMA_OPS if PPC64
select DYNAMIC_FTRACE if FUNCTION_TRACER
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
@@ -186,23 +186,22 @@
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
- select HAVE_ARCH_KGDB
select HAVE_ARCH_KFENCE if PPC32
+ select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_NVRAM_OPS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ASM_MODVERSIONS
- select HAVE_C_RECORDMCOUNT
- select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
- select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
+ select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
@@ -216,10 +215,13 @@
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
select HAVE_GENERIC_VDSO
+ select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
+ select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
@@ -231,26 +233,25 @@
select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
- select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP
select HAVE_OPTPROBES
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
- select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE
- select MMU_GATHER_RCU_TABLE_FREE
- select MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
+ select HAVE_RSEQ
select HAVE_SOFTIRQ_ON_OWN_STACK
+ select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
+ select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
- select HAVE_IRQ_TIME_ACCOUNTING
- select HAVE_RSEQ
+ select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
+ select MMU_GATHER_PAGE_SIZE
+ select MMU_GATHER_RCU_TABLE_FREE
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_SG_DMA_LENGTH
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 41fa0a8..cdb796b 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -191,7 +191,7 @@
kernel=vmlinux
fi
-LANG=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+LC_ALL=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
case "$elfformat" in
elf64-powerpcle) format=elf64lppc ;;
elf64-powerpc) format=elf32ppc ;;
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a6e9a55..e6b53c6 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -210,7 +210,7 @@ extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
unsigned int lpid);
extern int kvmppc_radix_init(void);
extern void kvmppc_radix_exit(void);
-extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index c761572..6ea9001 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -28,9 +28,6 @@ extern struct device_node *opal_node;
/* API functions */
int64_t opal_invalid_call(void);
-int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf);
-int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
- uint64_t bdf);
int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
uint64_t lpcr);
int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index d2a2a14..74424c1 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -126,7 +126,6 @@ struct pci_controller {
#endif /* CONFIG_PPC64 */
void *private_data;
- struct npu *npu;
};
/* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6436f0b..d1f5326 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -119,11 +119,4 @@ extern void pcibios_scan_phb(struct pci_controller *hose);
#endif /* __KERNEL__ */
-extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
-extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
-extern int pnv_npu2_init(struct pci_controller *hose);
-extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
- unsigned long msr);
-extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev);
-
#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index fab8402..3f35c8d 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -8,6 +8,7 @@
#include <linux/moduleloader.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/bug.h>
#include <asm/module.h>
#include <linux/uaccess.h>
@@ -88,17 +89,22 @@ int module_finalize(const Elf_Ehdr *hdr,
return 0;
}
-#ifdef MODULES_VADDR
static __always_inline void *
__module_alloc(unsigned long size, unsigned long start, unsigned long end)
{
- return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
- PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
- __builtin_return_address(0));
+ /*
+ * Don't do huge page allocations for modules yet until more testing
+ * is done. STRICT_MODULE_RWX may require extra work to support this
+ * too.
+ */
+ return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
+ VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
+ NUMA_NO_NODE, __builtin_return_address(0));
}
void *module_alloc(unsigned long size)
{
+#ifdef MODULES_VADDR
unsigned long limit = (unsigned long)_etext - SZ_32M;
void *ptr = NULL;
@@ -112,5 +118,7 @@ void *module_alloc(unsigned long size)
ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
return ptr;
-}
+#else
+ return __module_alloc(size, VMALLOC_START, VMALLOC_END);
#endif
+}
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index f9eb49e..5056e17 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -951,6 +951,93 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
}
/**
+ * add_node_props - Reads node properties from device node structure and add
+ * them to fdt.
+ * @fdt: Flattened device tree of the kernel
+ * @node_offset: offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+ int ret = 0;
+ struct property *pp;
+
+ if (!dn)
+ return -EINVAL;
+
+ for_each_property_of_node(dn, pp) {
+ ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+ if (ret < 0) {
+ pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ * device node.
+ * @fdt: Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_cpus_node(void *fdt)
+{
+ struct device_node *cpus_node, *dn;
+ int cpus_offset, cpus_subnode_offset, ret = 0;
+
+ cpus_offset = fdt_path_offset(fdt, "/cpus");
+ if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+ pr_err("Malformed device tree: error reading /cpus node: %s\n",
+ fdt_strerror(cpus_offset));
+ return cpus_offset;
+ }
+
+ if (cpus_offset > 0) {
+ ret = fdt_del_node(fdt, cpus_offset);
+ if (ret < 0) {
+ pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
+ return -EINVAL;
+ }
+ }
+
+ /* Add cpus node to fdt */
+ cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
+ if (cpus_offset < 0) {
+ pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
+ return -EINVAL;
+ }
+
+ /* Add cpus node properties */
+ cpus_node = of_find_node_by_path("/cpus");
+ ret = add_node_props(fdt, cpus_offset, cpus_node);
+ of_node_put(cpus_node);
+ if (ret < 0)
+ return ret;
+
+ /* Loop through all subnodes of cpus and add them to fdt */
+ for_each_node_by_type(dn, "cpu") {
+ cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+ if (cpus_subnode_offset < 0) {
+ pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+ fdt_strerror(cpus_subnode_offset));
+ ret = cpus_subnode_offset;
+ goto out;
+ }
+
+ ret = add_node_props(fdt, cpus_subnode_offset, dn);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ of_node_put(dn);
+ return ret;
+}
+
+/**
* setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
* being loaded.
* @image: kexec image being loaded.
@@ -1006,6 +1093,11 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
}
}
+ /* Update cpus nodes information to account hotplug CPUs. */
+ ret = update_cpus_node(fdt);
+ if (ret < 0)
+ goto out;
+
/* Update memory reserve map */
ret = get_reserved_memory_ranges(&rmem);
if (ret)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b7bd9ca..2d9193c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -795,7 +795,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
}
}
-static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
unsigned long i;
@@ -829,15 +829,21 @@ static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unlock_rmap(rmapp);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
- return false;
}
bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm_is_radix(kvm))
- return kvm_unmap_radix(kvm, range->slot, range->start);
+ gfn_t gfn;
- return kvm_unmap_rmapp(kvm, range->slot, range->start);
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_rmapp(kvm, range->slot, range->start);
+ }
+
+ return false;
}
void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
@@ -924,10 +930,18 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm_is_radix(kvm))
- kvm_age_radix(kvm, range->slot, range->start);
+ gfn_t gfn;
+ bool ret = false;
- return kvm_age_rmapp(kvm, range->slot, range->start);
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+ }
+
+ return ret;
}
static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -965,18 +979,24 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm_is_radix(kvm))
- kvm_test_age_radix(kvm, range->slot, range->start);
+ WARN_ON(range->start + 1 != range->end);
- return kvm_test_age_rmapp(kvm, range->slot, range->start);
+ if (kvm_is_radix(kvm))
+ return kvm_test_age_radix(kvm, range->slot, range->start);
+ else
+ return kvm_test_age_rmapp(kvm, range->slot, range->start);
}
bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm_is_radix(kvm))
- return kvm_unmap_radix(kvm, range->slot, range->start);
+ WARN_ON(range->start + 1 != range->end);
- return kvm_unmap_rmapp(kvm, range->slot, range->start);
+ if (kvm_is_radix(kvm))
+ kvm_unmap_radix(kvm, range->slot, range->start);
+ else
+ kvm_unmap_rmapp(kvm, range->slot, range->start);
+
+ return false;
}
static int vcpus_running(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index ec4f58f..d909c06 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -993,7 +993,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
}
/* Called with kvm->mmu_lock held */
-bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
pte_t *ptep;
@@ -1002,14 +1002,13 @@ bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
- return false;
+ return;
}
ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
kvm->arch.lpid);
- return false;
}
/* Called with kvm->mmu_lock held */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index f2c690e..cc1a8a0 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -5,6 +5,9 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+CFLAGS_code-patching.o += -fno-stack-protector
+CFLAGS_feature-fixups.o += -fno-stack-protector
+
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 2eb6ae1..be2546b 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -10,7 +10,7 @@
obj-$(CONFIG_FA_DUMP) += opal-fadump.o
obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o
obj-$(CONFIG_OPAL_CORE) += opal-core.o
-obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
+obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o
obj-$(CONFIG_PCI_IOV) += pci-sriov.o
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 71c1262..537a4da 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -104,8 +104,8 @@ static void memtrace_clear_range(unsigned long start_pfn,
* Before we go ahead and use this range as cache inhibited range
* flush the cache.
*/
- flush_dcache_range_chunked(PFN_PHYS(start_pfn),
- PFN_PHYS(start_pfn + nr_pages),
+ flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn),
+ (unsigned long)pfn_to_kaddr(start_pfn + nr_pages),
FLUSH_CHUNK_SIZE);
}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
deleted file mode 100644
index b711dc3..0000000
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ /dev/null
@@ -1,705 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * This file implements the DMA operations for NVLink devices. The NPU
- * devices all point to the same iommu table as the parent PCI device.
- *
- * Copyright Alistair Popple, IBM Corporation 2015.
- */
-
-#include <linux/mmu_notifier.h>
-#include <linux/mmu_context.h>
-#include <linux/of.h>
-#include <linux/pci.h>
-#include <linux/memblock.h>
-#include <linux/sizes.h>
-
-#include <asm/debugfs.h>
-#include <asm/powernv.h>
-#include <asm/ppc-pci.h>
-#include <asm/opal.h>
-
-#include "pci.h"
-
-static struct pci_dev *get_pci_dev(struct device_node *dn)
-{
- struct pci_dn *pdn = PCI_DN(dn);
- struct pci_dev *pdev;
-
- pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
- pdn->busno, pdn->devfn);
-
- /*
- * pci_get_domain_bus_and_slot() increased the reference count of
- * the PCI device, but callers don't need that actually as the PE
- * already holds a reference to the device. Since callers aren't
- * aware of the reference count change, call pci_dev_put() now to
- * avoid leaks.
- */
- if (pdev)
- pci_dev_put(pdev);
-
- return pdev;
-}
-
-/* Given a NPU device get the associated PCI device. */
-struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
-{
- struct device_node *dn;
- struct pci_dev *gpdev;
-
- if (WARN_ON(!npdev))
- return NULL;
-
- if (WARN_ON(!npdev->dev.of_node))
- return NULL;
-
- /* Get assoicated PCI device */
- dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
- if (!dn)
- return NULL;
-
- gpdev = get_pci_dev(dn);
- of_node_put(dn);
-
- return gpdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
-
-/* Given the real PCI device get a linked NPU device. */
-struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
-{
- struct device_node *dn;
- struct pci_dev *npdev;
-
- if (WARN_ON(!gpdev))
- return NULL;
-
- /* Not all PCI devices have device-tree nodes */
- if (!gpdev->dev.of_node)
- return NULL;
-
- /* Get assoicated PCI device */
- dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
- if (!dn)
- return NULL;
-
- npdev = get_pci_dev(dn);
- of_node_put(dn);
-
- return npdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_npu_dev);
-
-#ifdef CONFIG_IOMMU_API
-/*
- * Returns the PE assoicated with the PCI device of the given
- * NPU. Returns the linked pci device if pci_dev != NULL.
- */
-static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
- struct pci_dev **gpdev)
-{
- struct pnv_phb *phb;
- struct pci_controller *hose;
- struct pci_dev *pdev;
- struct pnv_ioda_pe *pe;
- struct pci_dn *pdn;
-
- pdev = pnv_pci_get_gpu_dev(npe->pdev);
- if (!pdev)
- return NULL;
-
- pdn = pci_get_pdn(pdev);
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return NULL;
-
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- pe = &phb->ioda.pe_array[pdn->pe_number];
-
- if (gpdev)
- *gpdev = pdev;
-
- return pe;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group,
- int num);
-
-static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
- struct iommu_table *tbl)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pnv_phb *phb = npe->phb;
- int64_t rc;
- const unsigned long size = tbl->it_indirect_levels ?
- tbl->it_level_size : tbl->it_size;
- const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
- const __u64 win_size = tbl->it_size << tbl->it_page_shift;
- int num2 = (num == 0) ? 1 : 0;
-
- /* NPU has just one TVE so if there is another table, remove it first */
- if (npe->table_group.tables[num2])
- pnv_npu_unset_window(&npe->table_group, num2);
-
- pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
- start_addr, start_addr + win_size - 1,
- IOMMU_PAGE_SIZE(tbl));
-
- rc = opal_pci_map_pe_dma_window(phb->opal_id,
- npe->pe_number,
- npe->pe_number,
- tbl->it_indirect_levels + 1,
- __pa(tbl->it_base),
- size << 3,
- IOMMU_PAGE_SIZE(tbl));
- if (rc) {
- pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
- return rc;
- }
- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
- /* Add the table to the list so its TCE cache will get invalidated */
- pnv_pci_link_table_and_group(phb->hose->node, num,
- tbl, &npe->table_group);
-
- return 0;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pnv_phb *phb = npe->phb;
- int64_t rc;
-
- if (!npe->table_group.tables[num])
- return 0;
-
- pe_info(npe, "Removing DMA window\n");
-
- rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
- npe->pe_number,
- 0/* levels */, 0/* table address */,
- 0/* table size */, 0/* page size */);
- if (rc) {
- pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
- return rc;
- }
- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
- pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
- &npe->table_group);
-
- return 0;
-}
-
-/* Switch ownership from platform code to external user (e.g. VFIO) */
-static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pnv_phb *phb = npe->phb;
- int64_t rc;
- struct pci_dev *gpdev = NULL;
-
- /*
- * Note: NPU has just a single TVE in the hardware which means that
- * while used by the kernel, it can have either 32bit window or
- * DMA bypass but never both. So we deconfigure 32bit window only
- * if it was enabled at the moment of ownership change.
- */
- if (npe->table_group.tables[0]) {
- pnv_npu_unset_window(&npe->table_group, 0);
- return;
- }
-
- /* Disable bypass */
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
- npe->pe_number, npe->pe_number,
- 0 /* bypass base */, 0);
- if (rc) {
- pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
- return;
- }
- pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
-
- get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (gpdev)
- pnv_npu2_unmap_lpar_dev(gpdev);
-}
-
-static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pci_dev *gpdev = NULL;
-
- get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (gpdev)
- pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
-}
-
-static struct iommu_table_group_ops pnv_pci_npu_ops = {
- .set_window = pnv_npu_set_window,
- .unset_window = pnv_npu_unset_window,
- .take_ownership = pnv_npu_take_ownership,
- .release_ownership = pnv_npu_release_ownership,
-};
-#endif /* !CONFIG_IOMMU_API */
-
-/*
- * NPU2 ATS
- */
-/* Maximum possible number of ATSD MMIO registers per NPU */
-#define NV_NMMU_ATSD_REGS 8
-#define NV_NPU_MAX_PE_NUM 16
-
-/*
- * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
- * up to 3 x (GPU + 2xNPUs) (POWER9).
- */
-struct npu_comp {
- struct iommu_table_group table_group;
- int pe_num;
- struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
-};
-
-/* An NPU descriptor, valid for POWER9 only */
-struct npu {
- int index;
- struct npu_comp npucomp;
-};
-
-#ifdef CONFIG_IOMMU_API
-static long pnv_npu_peers_create_table_userspace(
- struct iommu_table_group *table_group,
- int num, __u32 page_shift, __u64 window_size, __u32 levels,
- struct iommu_table **ptbl)
-{
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- if (!npucomp->pe_num || !npucomp->pe[0] ||
- !npucomp->pe[0]->table_group.ops ||
- !npucomp->pe[0]->table_group.ops->create_table)
- return -EFAULT;
-
- return npucomp->pe[0]->table_group.ops->create_table(
- &npucomp->pe[0]->table_group, num, page_shift,
- window_size, levels, ptbl);
-}
-
-static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
- int num, struct iommu_table *tbl)
-{
- int i, j;
- long ret = 0;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- if (!pe->table_group.ops->set_window)
- continue;
-
- ret = pe->table_group.ops->set_window(&pe->table_group,
- num, tbl);
- if (ret)
- break;
- }
-
- if (ret) {
- for (j = 0; j < i; ++j) {
- struct pnv_ioda_pe *pe = npucomp->pe[j];
-
- if (!pe->table_group.ops->unset_window)
- continue;
-
- ret = pe->table_group.ops->unset_window(
- &pe->table_group, num);
- if (ret)
- break;
- }
- } else {
- table_group->tables[num] = iommu_tce_table_get(tbl);
- }
-
- return ret;
-}
-
-static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
- int num)
-{
- int i, j;
- long ret = 0;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- WARN_ON(npucomp->table_group.tables[num] !=
- table_group->tables[num]);
- if (!npucomp->table_group.tables[num])
- continue;
-
- if (!pe->table_group.ops->unset_window)
- continue;
-
- ret = pe->table_group.ops->unset_window(&pe->table_group, num);
- if (ret)
- break;
- }
-
- if (ret) {
- for (j = 0; j < i; ++j) {
- struct pnv_ioda_pe *pe = npucomp->pe[j];
-
- if (!npucomp->table_group.tables[num])
- continue;
-
- if (!pe->table_group.ops->set_window)
- continue;
-
- ret = pe->table_group.ops->set_window(&pe->table_group,
- num, table_group->tables[num]);
- if (ret)
- break;
- }
- } else if (table_group->tables[num]) {
- iommu_tce_table_put(table_group->tables[num]);
- table_group->tables[num] = NULL;
- }
-
- return ret;
-}
-
-static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
-{
- int i;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- if (!pe->table_group.ops ||
- !pe->table_group.ops->take_ownership)
- continue;
- pe->table_group.ops->take_ownership(&pe->table_group);
- }
-}
-
-static void pnv_npu_peers_release_ownership(
- struct iommu_table_group *table_group)
-{
- int i;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- if (!pe->table_group.ops ||
- !pe->table_group.ops->release_ownership)
- continue;
- pe->table_group.ops->release_ownership(&pe->table_group);
- }
-}
-
-static struct iommu_table_group_ops pnv_npu_peers_ops = {
- .get_table_size = pnv_pci_ioda2_get_table_size,
- .create_table = pnv_npu_peers_create_table_userspace,
- .set_window = pnv_npu_peers_set_window,
- .unset_window = pnv_npu_peers_unset_window,
- .take_ownership = pnv_npu_peers_take_ownership,
- .release_ownership = pnv_npu_peers_release_ownership,
-};
-
-static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
- struct pnv_ioda_pe *pe)
-{
- if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
- return;
-
- npucomp->pe[npucomp->pe_num] = pe;
- ++npucomp->pe_num;
-}
-
-static struct iommu_table_group *
- pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
-{
- struct iommu_table_group *compound_group;
- struct npu_comp *npucomp;
- struct pci_dev *gpdev = NULL;
- struct pci_controller *hose;
- struct pci_dev *npdev = NULL;
-
- list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
- npdev = pnv_pci_get_npu_dev(gpdev, 0);
- if (npdev)
- break;
- }
-
- if (!npdev)
- /* It is not an NPU attached device, skip */
- return NULL;
-
- hose = pci_bus_to_host(npdev->bus);
-
- if (hose->npu) {
- /* P9 case: compound group is per-NPU (all gpus, all links) */
- npucomp = &hose->npu->npucomp;
- } else {
- /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
- npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
- }
-
- compound_group = &npucomp->table_group;
- if (!compound_group->group) {
- compound_group->ops = &pnv_npu_peers_ops;
- iommu_register_group(compound_group, hose->global_number,
- pe->pe_number);
-
- /* Steal capabilities from a GPU PE */
- compound_group->max_dynamic_windows_supported =
- pe->table_group.max_dynamic_windows_supported;
- compound_group->tce32_start = pe->table_group.tce32_start;
- compound_group->tce32_size = pe->table_group.tce32_size;
- compound_group->max_levels = pe->table_group.max_levels;
- if (!compound_group->pgsizes)
- compound_group->pgsizes = pe->table_group.pgsizes;
- }
-
- /*
- * The gpu would have been added to the iommu group that's created
- * for the PE. Pull it out now.
- */
- iommu_del_device(&gpdev->dev);
-
- /*
- * I'm not sure this is strictly required, but it's probably a good idea
- * since the table_group for the PE is going to be attached to the
- * compound table group. If we leave the PE's iommu group active then
- * we might have the same table_group being modifiable via two sepeate
- * iommu groups.
- */
- iommu_group_put(pe->table_group.group);
-
- /* now put the GPU into the compound group */
- pnv_comp_attach_table_group(npucomp, pe);
- iommu_add_device(compound_group, &gpdev->dev);
-
- return compound_group;
-}
-
-static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
-{
- struct iommu_table_group *table_group;
- struct npu_comp *npucomp;
- struct pci_dev *gpdev = NULL;
- struct pci_dev *npdev;
- struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
-
- WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
- if (!gpe)
- return NULL;
-
- /*
- * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
- * but NPU bridges do not have this hook defined so we do it here.
- * We do not setup other table group parameters as they won't be used
- * anyway - NVLink bridges are subordinate PEs.
- */
- pe->table_group.ops = &pnv_pci_npu_ops;
-
- table_group = iommu_group_get_iommudata(
- iommu_group_get(&gpdev->dev));
-
- /*
- * On P9 NPU PHB and PCI PHB support different page sizes,
- * keep only matching. We expect here that NVLink bridge PE pgsizes is
- * initialized by the caller.
- */
- table_group->pgsizes &= pe->table_group.pgsizes;
- npucomp = container_of(table_group, struct npu_comp, table_group);
- pnv_comp_attach_table_group(npucomp, pe);
-
- list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
- struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
-
- if (gpdevtmp != gpdev)
- continue;
-
- iommu_add_device(table_group, &npdev->dev);
- }
-
- return table_group;
-}
-
-void pnv_pci_npu_setup_iommu_groups(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- /*
- * For non-nvlink devices the IOMMU group is registered when the PE is
- * configured and devices are added to the group when the per-device
- * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
- * only initialise for "normal" IODA PHBs.
- *
- * For NVLink devices we need to ensure the NVLinks and the GPU end up
- * in the same IOMMU group, so that's handled here.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
-
- if (phb->type == PNV_PHB_IODA2)
- list_for_each_entry(pe, &phb->ioda.pe_list, list)
- pnv_try_setup_npu_table_group(pe);
- }
-
- /*
- * Now we have all PHBs discovered, time to add NPU devices to
- * the corresponding IOMMU groups.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- unsigned long pgsizes;
-
- phb = hose->private_data;
-
- if (phb->type != PNV_PHB_NPU_NVLINK)
- continue;
-
- pgsizes = pnv_ioda_parse_tce_sizes(phb);
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- /*
- * IODA2 bridges get this set up from
- * pci_controller_ops::setup_bridge but NPU bridges
- * do not have this hook defined so we do it here.
- */
- pe->table_group.pgsizes = pgsizes;
- pnv_npu_compound_attach(pe);
- }
- }
-}
-#endif /* CONFIG_IOMMU_API */
-
-int pnv_npu2_init(struct pci_controller *hose)
-{
- static int npu_index;
- struct npu *npu;
- int ret;
-
- npu = kzalloc(sizeof(*npu), GFP_KERNEL);
- if (!npu)
- return -ENOMEM;
-
- npu_index++;
- if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
- ret = -ENOSPC;
- goto fail_exit;
- }
- npu->index = npu_index;
- hose->npu = npu;
-
- return 0;
-
-fail_exit:
- kfree(npu);
- return ret;
-}
-
-int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
- unsigned long msr)
-{
- int ret;
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- struct pci_controller *hose;
- struct pnv_phb *nphb;
-
- if (!npdev)
- return -ENODEV;
-
- hose = pci_bus_to_host(npdev->bus);
- if (hose->npu == NULL) {
- dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
- return 0;
- }
-
- nphb = hose->private_data;
-
- dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
- nphb->opal_id, lparid);
- /*
- * Currently we only support radix and non-zero LPCR only makes sense
- * for hash tables so skiboot expects the LPCR parameter to be a zero.
- */
- ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
- 0 /* LPCR bits */);
- if (ret) {
- dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
- return ret;
- }
-
- dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
- nphb->opal_id, msr);
- ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
- pci_dev_id(gpdev));
- if (ret < 0)
- dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
- else
- ret = 0;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
-
-void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
-{
- struct pci_dev *gpdev;
-
- list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
- pnv_npu2_map_lpar_dev(gpdev, 0, msr);
-}
-
-int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
-{
- int ret;
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- struct pci_controller *hose;
- struct pnv_phb *nphb;
-
- if (!npdev)
- return -ENODEV;
-
- hose = pci_bus_to_host(npdev->bus);
- if (hose->npu == NULL) {
- dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
- return 0;
- }
-
- nphb = hose->private_data;
-
- dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
- nphb->opal_id);
- ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
- pci_dev_id(gpdev));
- if (ret < 0) {
- dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
- return ret;
- }
-
- /* Set LPID to 0 anyway, just to be safe */
- dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
- ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/,
- 0 /* LPCR bits */);
- if (ret)
- dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 5cd0f52..01401e3 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -267,8 +267,6 @@ OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE);
OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
-OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
-OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 66c3c33..7de4646 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -47,8 +47,7 @@
#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
- "NPU_OCAPI" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_OCAPI" };
static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
static void pnv_pci_configure_bus(struct pci_bus *bus);
@@ -192,8 +191,6 @@ void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
- WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
- kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
mutex_lock(&phb->ioda.pe_alloc_mutex);
@@ -875,7 +872,7 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
* Release from all parents PELT-V. NPUs don't have a PELTV
* table
*/
- if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+ if (phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_unset_peltv(phb, pe, parent);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
@@ -946,7 +943,7 @@ int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
* Configure PELTV. NPUs don't have a PELTV table so skip
* configuration on them.
*/
- if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+ if (phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_set_peltv(phb, pe, true);
/* Setup reverse map */
@@ -1002,8 +999,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
/* NOTE: We don't get a reference for the pointer in the PE
* data structure, both the device and PE structures should be
- * destroyed at the same time. However, removing nvlink
- * devices will need some work.
+ * destroyed at the same time.
*
* At some point we want to remove the PDN completely anyways
*/
@@ -1099,113 +1095,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
return pe;
}
-static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
-{
- int pe_num, found_pe = false, rc;
- long rid;
- struct pnv_ioda_pe *pe;
- struct pci_dev *gpu_pdev;
- struct pci_dn *npu_pdn;
- struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus);
-
- /*
- * Intentionally leak a reference on the npu device (for
- * nvlink only; this is not an opencapi path) to make sure it
- * never goes away, as it's been the case all along and some
- * work is needed otherwise.
- */
- pci_dev_get(npu_pdev);
-
- /*
- * Due to a hardware errata PE#0 on the NPU is reserved for
- * error handling. This means we only have three PEs remaining
- * which need to be assigned to four links, implying some
- * links must share PEs.
- *
- * To achieve this we assign PEs such that NPUs linking the
- * same GPU get assigned the same PE.
- */
- gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
- for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
- pe = &phb->ioda.pe_array[pe_num];
- if (!pe->pdev)
- continue;
-
- if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
- /*
- * This device has the same peer GPU so should
- * be assigned the same PE as the existing
- * peer NPU.
- */
- dev_info(&npu_pdev->dev,
- "Associating to existing PE %x\n", pe_num);
- npu_pdn = pci_get_pdn(npu_pdev);
- rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
- npu_pdn->pe_number = pe_num;
- phb->ioda.pe_rmap[rid] = pe->pe_number;
- pe->device_count++;
-
- /* Map the PE to this link */
- rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
- OpalPciBusAll,
- OPAL_COMPARE_RID_DEVICE_NUMBER,
- OPAL_COMPARE_RID_FUNCTION_NUMBER,
- OPAL_MAP_PE);
- WARN_ON(rc != OPAL_SUCCESS);
- found_pe = true;
- break;
- }
- }
-
- if (!found_pe)
- /*
- * Could not find an existing PE so allocate a new
- * one.
- */
- return pnv_ioda_setup_dev_PE(npu_pdev);
- else
- return pe;
-}
-
-static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
-{
- struct pci_dev *pdev;
-
- list_for_each_entry(pdev, &bus->devices, bus_list)
- pnv_ioda_setup_npu_PE(pdev);
-}
-
-static void pnv_pci_ioda_setup_nvlink(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type == PNV_PHB_NPU_NVLINK) {
- /* PE#0 is needed for error reporting */
- pnv_ioda_reserve_pe(phb, 0);
- pnv_ioda_setup_npu_PEs(hose->bus);
- if (phb->model == PNV_PHB_MODEL_NPU2)
- WARN_ON_ONCE(pnv_npu2_init(hose));
- }
- }
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type != PNV_PHB_IODA2)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_list, list)
- pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
- }
-
-#ifdef CONFIG_IOMMU_API
- /* setup iommu groups so we can do nvlink pass-thru */
- pnv_pci_npu_setup_iommu_groups();
-#endif
-}
-
static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe);
@@ -1468,18 +1357,6 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1)
#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2)
-static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm);
- const unsigned long val = PHB3_TCE_KILL_INVAL_ALL;
-
- mb(); /* Ensure previous TCE table stores are visible */
- if (rm)
- __raw_rm_writeq_be(val, invalidate);
- else
- __raw_writeq_be(val, invalidate);
-}
-
static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
{
/* 01xb - invalidate TCEs that match the specified PE# */
@@ -1539,20 +1416,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
struct pnv_phb *phb = pe->phb;
unsigned int shift = tbl->it_page_shift;
- /*
- * NVLink1 can use the TCE kill register directly as
- * it's the same as PHB3. NVLink2 is different and
- * should go via the OPAL call.
- */
- if (phb->model == PNV_PHB_MODEL_NPU) {
- /*
- * The NVLink hardware does not support TCE kill
- * per TCE entry so we have to invalidate
- * the entire cache for it.
- */
- pnv_pci_phb3_tce_invalidate_entire(phb, rm);
- continue;
- }
if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
pnv_pci_phb3_tce_invalidate(pe, rm, shift,
index, npages);
@@ -1564,14 +1427,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
}
}
-void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
- if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3)
- pnv_pci_phb3_tce_invalidate_entire(phb, rm);
- else
- opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0);
-}
-
static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
@@ -2451,7 +2306,6 @@ static void pnv_pci_enable_bridges(void)
static void pnv_pci_ioda_fixup(void)
{
- pnv_pci_ioda_setup_nvlink();
pnv_pci_ioda_create_dbgfs();
pnv_pci_enable_bridges();
@@ -2824,15 +2678,6 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
pnv_ioda_release_pe(pe);
}
-static void pnv_npu_disable_device(struct pci_dev *pdev)
-{
- struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
- struct eeh_pe *eehpe = edev ? edev->pe : NULL;
-
- if (eehpe && eeh_ops && eeh_ops->reset)
- eeh_ops->reset(eehpe, EEH_RESET_HOT);
-}
-
static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
@@ -2874,16 +2719,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.shutdown = pnv_pci_ioda_shutdown,
};
-static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .shutdown = pnv_pci_ioda_shutdown,
- .disable_device = pnv_npu_disable_device,
-};
-
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
.enable_device_hook = pnv_ocapi_enable_device_hook,
.release_device = pnv_pci_release_device,
@@ -2957,10 +2792,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->model = PNV_PHB_MODEL_P7IOC;
else if (of_device_is_compatible(np, "ibm,power8-pciex"))
phb->model = PNV_PHB_MODEL_PHB3;
- else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
- phb->model = PNV_PHB_MODEL_NPU;
- else if (of_device_is_compatible(np, "ibm,power9-npu-pciex"))
- phb->model = PNV_PHB_MODEL_NPU2;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
@@ -3118,9 +2949,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
switch (phb->type) {
- case PNV_PHB_NPU_NVLINK:
- hose->controller_ops = pnv_npu_ioda_controller_ops;
- break;
case PNV_PHB_NPU_OCAPI:
hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
break;
@@ -3173,11 +3001,6 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
}
-void __init pnv_pci_init_npu_phb(struct device_node *np)
-{
- pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
-}
-
void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
{
pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 9b9bca1..b18468d 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -926,17 +926,6 @@ void __init pnv_pci_init(void)
for_each_compatible_node(np, NULL, "ibm,ioda3-phb")
pnv_pci_init_ioda2_phb(np);
- /* Look for NPU PHBs */
- for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
- pnv_pci_init_npu_phb(np);
-
- /*
- * Look for NPU2 PHBs which we treat mostly as NPU PHBs with
- * the exception of TCE kill which requires an OPAL call.
- */
- for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
- pnv_pci_init_npu_phb(np);
-
/* Look for NPU2 OpenCAPI PHBs */
for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
pnv_pci_init_npu2_opencapi_phb(np);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 36d2292..c8d4f22 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -10,10 +10,9 @@
struct pci_dn;
enum pnv_phb_type {
- PNV_PHB_IODA1 = 0,
- PNV_PHB_IODA2 = 1,
- PNV_PHB_NPU_NVLINK = 2,
- PNV_PHB_NPU_OCAPI = 3,
+ PNV_PHB_IODA1,
+ PNV_PHB_IODA2,
+ PNV_PHB_NPU_OCAPI,
};
/* Precise PHB model for error management */
@@ -21,8 +20,6 @@ enum pnv_phb_model {
PNV_PHB_MODEL_UNKNOWN,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
- PNV_PHB_MODEL_NPU,
- PNV_PHB_MODEL_NPU2,
};
#define PNV_PCI_DIAG_BUF_SIZE 8192
@@ -81,7 +78,6 @@ struct pnv_ioda_pe {
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
struct iommu_table_group table_group;
- struct npu_comp *npucomp;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
@@ -289,9 +285,7 @@ extern struct iommu_table *pnv_pci_table_alloc(int nid);
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
-extern void pnv_pci_init_npu_phb(struct device_node *np);
extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
-extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
@@ -314,11 +308,6 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
#define pe_info(pe, fmt, ...) \
pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
-/* Nvlink functions */
-extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
-extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
-extern void pnv_pci_npu_setup_iommu_groups(void);
-
/* pci-ioda-tce.c */
#define POWERNV_IOMMU_DEFAULT_LEVELS 2
#define POWERNV_IOMMU_MAX_LEVELS 5
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 1bffbd1..3b6800f 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -224,8 +224,6 @@ static void __init pSeries_request_regions(void)
void __init pSeries_final_fixup(void)
{
- struct pci_controller *hose;
-
pSeries_request_regions();
eeh_show_enabled();
@@ -234,27 +232,6 @@ void __init pSeries_final_fixup(void)
ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
#endif
- list_for_each_entry(hose, &hose_list, list_node) {
- struct device_node *dn = hose->dn, *nvdn;
-
- while (1) {
- dn = of_find_all_nodes(dn);
- if (!dn)
- break;
- nvdn = of_parse_phandle(dn, "ibm,nvlink", 0);
- if (!nvdn)
- continue;
- if (!of_device_is_compatible(nvdn, "ibm,npu-link"))
- continue;
- if (!of_device_is_compatible(nvdn->parent,
- "ibm,power9-npu"))
- continue;
-#ifdef CONFIG_PPC_POWERNV
- WARN_ON_ONCE(pnv_npu2_init(hose));
-#endif
- break;
- }
- }
}
/*
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index d5d03ae..b44d6ec 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -21,7 +21,7 @@
config ERRATA_SIFIVE_CIP_453
bool "Apply SiFive errata CIP-453"
- depends on ERRATA_SIFIVE
+ depends on ERRATA_SIFIVE && 64BIT
default y
help
This will apply the SiFive CIP-453 errata to add sign extension
@@ -32,7 +32,7 @@
config ERRATA_SIFIVE_CIP_1200
bool "Apply SiFive errata CIP-1200"
- depends on ERRATA_SIFIVE
+ depends on ERRATA_SIFIVE && 64BIT
default y
help
This will apply the SiFive CIP-1200 errata to repalce all
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index a9c5677..086f757 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -17,7 +17,6 @@ int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
int set_memory_rw_nx(unsigned long addr, int numpages);
void protect_kernel_text_data(void);
-void protect_kernel_linear_mapping_text_rodata(void);
#else
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
@@ -27,6 +26,12 @@ static inline void protect_kernel_text_data(void) {}
static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
#endif
+#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
+void protect_kernel_linear_mapping_text_rodata(void);
+#else
+static inline void protect_kernel_linear_mapping_text_rodata(void) {}
+#endif
+
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 7b317791..03901d3 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -293,9 +293,7 @@ void __init setup_arch(char **cmdline_p)
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
protect_kernel_text_data();
-#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) && !defined(CONFIG_XIP_KERNEL)
protect_kernel_linear_mapping_text_rodata();
-#endif
}
#ifdef CONFIG_SWIOTLB
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 07fdded..0721b97 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -25,8 +25,6 @@
int show_unhandled_signals = 1;
-extern asmlinkage void handle_exception(void);
-
static DEFINE_SPINLOCK(die_lock);
void die(struct pt_regs *regs, const char *str)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index dfb5e4f..4faf8bd 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -135,11 +135,16 @@ void __init setup_bootmem(void)
/*
* Reserve from the start of the kernel to the end of the kernel
- * and make sure we align the reservation on PMD_SIZE since we will
+ */
+#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
+ /*
+ * Make sure we align the reservation on PMD_SIZE since we will
* map the kernel in the linear mapping as read-only: we do not want
* any allocation to happen between _end and the next pmd aligned page.
*/
- memblock_reserve(vmlinux_start, (vmlinux_end - vmlinux_start + PMD_SIZE - 1) & PMD_MASK);
+ vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
+#endif
+ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
/*
* memblock allocator is not aware of the fact that last 4K bytes of
@@ -640,7 +645,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
#endif
}
-#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
void protect_kernel_linear_mapping_text_rodata(void)
{
unsigned long text_start = (unsigned long)lm_alias(_start);
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 3bcbf52..44bcb80e 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -9,7 +9,7 @@
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
-ifneq ($(SUBARCH),$(ARCH))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := $(call cc-cross-prefix, sh-linux- sh-linux-gnu- sh-unknown-linux-gnu-)
endif
diff --git a/arch/sh/kernel/syscalls/Makefile b/arch/sh/kernel/syscalls/Makefile
index 285aaba..6713c65 100644
--- a/arch/sh/kernel/syscalls/Makefile
+++ b/arch/sh/kernel/syscalls/Makefile
@@ -6,20 +6,14 @@
$(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
syscall := $(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
quiet_cmd_syshdr = SYSHDR $@
- cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
- '$(syshdr_abis_$(basetarget))' \
- '$(syshdr_pfx_$(basetarget))' \
- '$(syshdr_offset_$(basetarget))'
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr $< $@
quiet_cmd_systbl = SYSTBL $@
- cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
- '$(systbl_abis_$(basetarget))' \
- '$(systbl_abi_$(basetarget))' \
- '$(systbl_offset_$(basetarget))'
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) $< $@
$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
diff --git a/arch/sh/kernel/syscalls/syscallhdr.sh b/arch/sh/kernel/syscalls/syscallhdr.sh
deleted file mode 100644
index 4c05198..0000000
--- a/arch/sh/kernel/syscalls/syscallhdr.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_SH_`basename "$out" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- printf "#ifndef %s\n" "${fileguard}"
- printf "#define %s\n" "${fileguard}"
- printf "\n"
-
- nxt=0
- while read nr abi name entry ; do
- if [ -z "$offset" ]; then
- printf "#define __NR_%s%s\t%s\n" \
- "${prefix}" "${name}" "${nr}"
- else
- printf "#define __NR_%s%s\t(%s + %s)\n" \
- "${prefix}" "${name}" "${offset}" "${nr}"
- fi
- nxt=$((nr+1))
- done
-
- printf "\n"
- printf "#ifdef __KERNEL__\n"
- printf "#define __NR_syscalls\t%s\n" "${nxt}"
- printf "#endif\n"
- printf "\n"
- printf "#endif /* %s */\n" "${fileguard}"
-) > "$out"
diff --git a/arch/sh/kernel/syscalls/syscalltbl.sh b/arch/sh/kernel/syscalls/syscalltbl.sh
deleted file mode 100644
index 904b8e6..0000000
--- a/arch/sh/kernel/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
- t_nxt="$1"
- t_nr="$2"
- t_entry="$3"
-
- while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
- t_nxt=$((t_nxt+1))
- done
- printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- nxt=0
- if [ -z "$offset" ]; then
- offset=0
- fi
-
- while read nr abi name entry ; do
- emit $((nxt+offset)) $((nr+offset)) $entry
- nxt=$((nr+1))
- done
-) > "$out"
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index aec2040..0b9d98c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
generated-y += syscall_table_32.h
generated-y += syscall_table_64.h
-generated-y += syscall_table_c32.h
generic-y += export.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
diff --git a/arch/sparc/kernel/syscalls/Makefile b/arch/sparc/kernel/syscalls/Makefile
index 283f644..0f2ea5b 100644
--- a/arch/sparc/kernel/syscalls/Makefile
+++ b/arch/sparc/kernel/syscalls/Makefile
@@ -6,46 +6,34 @@
$(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
syscall := $(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
quiet_cmd_syshdr = SYSHDR $@
- cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
- '$(syshdr_abis_$(basetarget))' \
- '$(syshdr_pfx_$(basetarget))' \
- '$(syshdr_offset_$(basetarget))'
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@
quiet_cmd_systbl = SYSTBL $@
- cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
- '$(systbl_abis_$(basetarget))' \
- '$(systbl_abi_$(basetarget))' \
- '$(systbl_offset_$(basetarget))'
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
-syshdr_abis_unistd_32 := common,32
+$(uapi)/unistd_32.h: abis := common,32
$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
-syshdr_abis_unistd_64 := common,64
+$(uapi)/unistd_64.h: abis := common,64
$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
-systbl_abis_syscall_table_32 := common,32
+$(kapi)/syscall_table_32.h: abis := common,32
$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
-systbl_abis_syscall_table_64 := common,64
+$(kapi)/syscall_table_64.h: abis := common,64
$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
-systbl_abis_syscall_table_c32 := common,32
-systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE
- $(call if_changed,systbl)
-
uapisyshdr-y += unistd_32.h unistd_64.h
kapisyshdr-y += syscall_table_32.h \
- syscall_table_64.h \
- syscall_table_c32.h
+ syscall_table_64.h
uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y))
diff --git a/arch/sparc/kernel/syscalls/syscallhdr.sh b/arch/sparc/kernel/syscalls/syscallhdr.sh
deleted file mode 100644
index cf50a75..0000000
--- a/arch/sparc/kernel/syscalls/syscallhdr.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_SPARC_`basename "$out" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- printf "#ifndef %s\n" "${fileguard}"
- printf "#define %s\n" "${fileguard}"
- printf "\n"
-
- nxt=0
- while read nr abi name entry compat ; do
- if [ -z "$offset" ]; then
- printf "#define __NR_%s%s\t%s\n" \
- "${prefix}" "${name}" "${nr}"
- else
- printf "#define __NR_%s%s\t(%s + %s)\n" \
- "${prefix}" "${name}" "${offset}" "${nr}"
- fi
- nxt=$((nr+1))
- done
-
- printf "\n"
- printf "#ifdef __KERNEL__\n"
- printf "#define __NR_syscalls\t%s\n" "${nxt}"
- printf "#endif\n"
- printf "\n"
- printf "#endif /* %s */\n" "${fileguard}"
-) > "$out"
diff --git a/arch/sparc/kernel/syscalls/syscalltbl.sh b/arch/sparc/kernel/syscalls/syscalltbl.sh
deleted file mode 100644
index 77cf014..0000000
--- a/arch/sparc/kernel/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
- t_nxt="$1"
- t_nr="$2"
- t_entry="$3"
-
- while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s, sys_nis_syscall, )\n" "${t_nxt}"
- t_nxt=$((t_nxt+1))
- done
- printf "__SYSCALL(%s, %s, )\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- nxt=0
- if [ -z "$offset" ]; then
- offset=0
- fi
-
- while read nr abi name entry compat ; do
- if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then
- emit $((nxt+offset)) $((nr+offset)) $compat
- else
- emit $((nxt+offset)) $((nr+offset)) $entry
- fi
- nxt=$((nr+1))
- done
-) > "$out"
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index ab9e4d5..3aaffa0 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -9,10 +9,10 @@
* Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
*/
-#define __SYSCALL(nr, entry, nargs) .long entry
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+#define __SYSCALL(nr, entry) .long entry
.data
.align 4
.globl sys_call_table
sys_call_table:
#include <asm/syscall_table_32.h> /* 32-bit native syscalls */
-#undef __SYSCALL
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index a27394b..398fe44 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -10,18 +10,20 @@
* Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
*/
-#define __SYSCALL(nr, entry, nargs) .word entry
+#define __SYSCALL(nr, entry) .word entry
.text
.align 4
#ifdef CONFIG_COMPAT
.globl sys_call_table32
sys_call_table32:
-#include <asm/syscall_table_c32.h> /* Compat syscalls */
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
+#include <asm/syscall_table_32.h> /* Compat syscalls */
+#undef __SYSCALL_WITH_COMPAT
#endif /* CONFIG_COMPAT */
.align 4
.globl sys_call_table64, sys_call_table
sys_call_table64:
sys_call_table:
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
#include <asm/syscall_table_64.h> /* 64-bit native syscalls */
-#undef __SYSCALL
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index 1c1a7e4..913745f 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -19,8 +19,6 @@
#include "../perf_event.h"
#include "iommu.h"
-#define COUNTER_SHIFT 16
-
/* iommu pmu conf masks */
#define GET_CSOURCE(x) ((x)->conf & 0xFFULL)
#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL)
@@ -286,22 +284,31 @@ static void perf_iommu_start(struct perf_event *event, int flags)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
+ /*
+ * To account for power-gating, which prevents write to
+ * the counter, we need to enable the counter
+ * before setting up counter register.
+ */
+ perf_iommu_enable_event(event);
+
if (flags & PERF_EF_RELOAD) {
- u64 prev_raw_count = local64_read(&hwc->prev_count);
+ u64 count = 0;
struct amd_iommu *iommu = perf_event_2_iommu(event);
+ /*
+ * Since the IOMMU PMU only support counting mode,
+ * the counter always start with value zero.
+ */
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
- IOMMU_PC_COUNTER_REG, &prev_raw_count);
+ IOMMU_PC_COUNTER_REG, &count);
}
- perf_iommu_enable_event(event);
perf_event_update_userpage(event);
-
}
static void perf_iommu_read(struct perf_event *event)
{
- u64 count, prev, delta;
+ u64 count;
struct hw_perf_event *hwc = &event->hw;
struct amd_iommu *iommu = perf_event_2_iommu(event);
@@ -312,14 +319,11 @@ static void perf_iommu_read(struct perf_event *event)
/* IOMMU pc counter register is only 48 bits */
count &= GENMASK_ULL(47, 0);
- prev = local64_read(&hwc->prev_count);
- if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
- return;
-
- /* Handle 48-bit counter overflow */
- delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
- delta >>= COUNTER_SHIFT;
- local64_add(delta, &event->count);
+ /*
+ * Since the counter always start with value zero,
+ * simply just accumulate the count for the event.
+ */
+ local64_add(count, &event->count);
}
static void perf_iommu_stop(struct perf_event *event, int flags)
@@ -329,15 +333,16 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
if (hwc->state & PERF_HES_UPTODATE)
return;
+ /*
+ * To account for power-gating, in which reading the counter would
+ * return zero, we need to read the register before disabling.
+ */
+ perf_iommu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+
perf_iommu_disable_event(event);
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
-
- if (hwc->state & PERF_HES_UPTODATE)
- return;
-
- perf_iommu_read(event);
- hwc->state |= PERF_HES_UPTODATE;
}
static int perf_iommu_add(struct perf_event *event, int flags)
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 297fa12..84b8753 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -7,18 +7,9 @@
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
- *
- * Since various instruction decoders/specs disagree on the encoding of
- * UD0/UD1.
*/
-
-#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
-#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
#define ASM_UD2 ".byte 0x0f, 0x0b"
-
-#define INSN_UD0 0xff0f
#define INSN_UD2 0x0b0f
-
#define LEN_UD2 2
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index e35e342..73d45b0 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check);
#endif
/* NMI */
+
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * Special NOIST entry point for VMX which invokes this on the kernel
+ * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
+ * 'executing' marker.
+ *
+ * On 32bit this just uses the regular NMI entry point because 32-bit does
+ * not have ISTs.
+ */
+DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_noist);
+#else
+#define asm_exc_nmi_noist asm_exc_nmi
+#endif
+
DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi);
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index e16cccd..a3f87f1 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -324,10 +324,6 @@ static inline int wrmsrl_safe(u32 msr, u64 val)
return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
}
-#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
-
-#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
-
struct msr *msrs_alloc(void);
void msrs_free(struct msr *msrs);
int msr_set_bit(u32 msr, u8 bit);
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 939b1cf..ca840fe 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -56,6 +56,39 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
+#ifdef CONFIG_X86_5LEVEL
+/*
+ * User space process size. This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen. This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+ */
+static inline unsigned long task_size_max(void)
+{
+ unsigned long ret;
+
+ alternative_io("movq %[small],%0","movq %[large],%0",
+ X86_FEATURE_LA57,
+ "=r" (ret),
+ [small] "i" ((1ul << 47)-PAGE_SIZE),
+ [large] "i" ((1ul << 56)-PAGE_SIZE));
+
+ return ret;
+}
+#endif /* CONFIG_X86_5LEVEL */
+
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 64297ea..a8d4ad85 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -55,30 +55,13 @@
#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
+/* See task_size_max() in <asm/page_64.h> */
#else
#define __VIRTUAL_MASK_SHIFT 47
+#define task_size_max() ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
#endif
-/*
- * User space process size. This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen. This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
+#define TASK_SIZE_MAX task_size_max()
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6bdb69a..a1b756c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1851,8 +1851,8 @@ static inline void setup_getcpu(int cpu)
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
struct desc_struct d = { };
- if (boot_cpu_has(X86_FEATURE_RDTSCP))
- write_rdtscp_aux(cpudata);
+ if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
+ wrmsr(MSR_TSC_AUX, cpudata, 0);
/* Store CPU and node number in limit. */
d.limit0 = cpudata;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index dbeaa84..f07c10b 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -84,7 +84,7 @@ unsigned int resctrl_cqm_threshold;
static const struct mbm_correction_factor_table {
u32 rmidthreshold;
u64 cf;
-} mbm_cf_table[] __initdata = {
+} mbm_cf_table[] __initconst = {
{7, CF(1.000000)},
{15, CF(1.000000)},
{15, CF(0.969650)},
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index bf250a3..2ef961c 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -524,6 +524,16 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
mds_user_clear_cpu_buffers();
}
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+DEFINE_IDTENTRY_RAW(exc_nmi_noist)
+{
+ exc_nmi(regs);
+}
+#endif
+#if IS_MODULE(CONFIG_KVM_INTEL)
+EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
+#endif
+
void stop_nmi(void)
{
ignore_nmis++;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 43cbfc8..5e1f381 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
#endif
/* Kernel thread ? */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & PF_KTHREAD)) {
memset(childregs, 0, sizeof(struct pt_regs));
kthread_frame_init(frame, sp, arg);
return 0;
@@ -172,6 +172,23 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
task_user_gs(p) = get_user_gs(current_pt_regs());
#endif
+ if (unlikely(p->flags & PF_IO_WORKER)) {
+ /*
+ * An IO thread is a user space thread, but it doesn't
+ * return to ret_after_fork().
+ *
+ * In order to indicate that to tools like gdb,
+ * we reset the stack and instruction pointers.
+ *
+ * It does the same kernel frame setup to return to a kernel
+ * function that a kernel thread does.
+ */
+ childregs->sp = 0;
+ childregs->ip = 0;
+ kthread_frame_init(frame, sp, arg);
+ return 0;
+ }
+
/* Set a new TLS for the child thread? */
if (clone_flags & CLONE_SETTLS)
ret = set_new_tls(p, tls);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7ffb0cf..0ad5214 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1865,9 +1865,6 @@ static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
return true;
}
-#include <asm/cpu_device_id.h>
-#include <asm/intel-family.h>
-
#define X86_MATCH(model) \
X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9790c73..b649f92 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3710,25 +3710,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long vmcb_pa = svm->current_vmcb->pa;
- /*
- * VMENTER enables interrupts (host state), but the kernel state is
- * interrupts disabled when this is invoked. Also tell RCU about
- * it. This is the same logic as for exit_to_user_mode().
- *
- * This ensures that e.g. latency analysis on the host observes
- * guest mode as interrupt enabled.
- *
- * guest_enter_irqoff() informs context tracking about the
- * transition to guest mode and if enabled adjusts RCU state
- * accordingly.
- */
- instrumentation_begin();
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- instrumentation_end();
-
- guest_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
+ kvm_guest_enter_irqoff();
if (sev_es_guest(vcpu->kvm)) {
__svm_sev_es_vcpu_run(vmcb_pa);
@@ -3748,24 +3730,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
vmload(__sme_page_pa(sd->save_area));
}
- /*
- * VMEXIT disables interrupts (host state), but tracing and lockdep
- * have them in state 'on' as recorded before entering guest mode.
- * Same as enter_from_user_mode().
- *
- * guest_exit_irqoff() restores host context and reinstates RCU if
- * enabled and required.
- *
- * This needs to be done before the below as native_read_msr()
- * contains a tracepoint and x86_spec_ctrl_restore_host() calls
- * into world and some more.
- */
- lockdep_hardirqs_off(CALLER_ADDR0);
- guest_exit_irqoff();
-
- instrumentation_begin();
- trace_hardirqs_off_finish();
- instrumentation_end();
+ kvm_guest_exit_irqoff();
}
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cbe0cda..d000cdd 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -36,6 +36,7 @@
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
+#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
#include <asm/kexec.h>
@@ -6415,18 +6416,17 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
+static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
+ unsigned long entry)
{
- unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
- gate_desc *desc = (gate_desc *)host_idt_base + vector;
-
kvm_before_interrupt(vcpu);
- vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
+ vmx_do_interrupt_nmi_irqoff(entry);
kvm_after_interrupt(vcpu);
}
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
+ const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_noist;
u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
/* if exit due to PF check for async PF */
@@ -6437,18 +6437,20 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
else if (is_nmi(intr_info))
- handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
+ handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
}
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
u32 intr_info = vmx_get_intr_info(vcpu);
+ unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
+ gate_desc *desc = (gate_desc *)host_idt_base + vector;
if (WARN_ONCE(!is_external_intr(intr_info),
"KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
- handle_interrupt_nmi_irqoff(vcpu, intr_info);
+ handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
}
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
@@ -6662,25 +6664,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx)
{
- /*
- * VMENTER enables interrupts (host state), but the kernel state is
- * interrupts disabled when this is invoked. Also tell RCU about
- * it. This is the same logic as for exit_to_user_mode().
- *
- * This ensures that e.g. latency analysis on the host observes
- * guest mode as interrupt enabled.
- *
- * guest_enter_irqoff() informs context tracking about the
- * transition to guest mode and if enabled adjusts RCU state
- * accordingly.
- */
- instrumentation_begin();
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- instrumentation_end();
-
- guest_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
+ kvm_guest_enter_irqoff();
/* L1D Flush includes CPU buffer clear to mitigate MDS */
if (static_branch_unlikely(&vmx_l1d_should_flush))
@@ -6696,24 +6680,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vcpu->arch.cr2 = native_read_cr2();
- /*
- * VMEXIT disables interrupts (host state), but tracing and lockdep
- * have them in state 'on' as recorded before entering guest mode.
- * Same as enter_from_user_mode().
- *
- * guest_exit_irqoff() restores host context and reinstates RCU if
- * enabled and required.
- *
- * This needs to be done before the below as native_read_msr()
- * contains a tracepoint and x86_spec_ctrl_restore_host() calls
- * into world and some more.
- */
- lockdep_hardirqs_off(CALLER_ADDR0);
- guest_exit_irqoff();
-
- instrumentation_begin();
- trace_hardirqs_off_finish();
- instrumentation_end();
+ kvm_guest_exit_irqoff();
}
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cebdaa1e..6eda283 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9315,6 +9315,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
local_irq_disable();
kvm_after_interrupt(vcpu);
+ /*
+ * Wait until after servicing IRQs to account guest time so that any
+ * ticks that occurred while running the guest are properly accounted
+ * to the guest. Waiting until IRQs are enabled degrades the accuracy
+ * of accounting via context tracking, but the loss of accuracy is
+ * acceptable for all known use cases.
+ */
+ vtime_account_guest_exit();
+
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
if (delta != S64_MIN) {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8ddd381..521f74e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,51 @@
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
+static __always_inline void kvm_guest_enter_irqoff(void)
+{
+ /*
+ * VMENTER enables interrupts (host state), but the kernel state is
+ * interrupts disabled when this is invoked. Also tell RCU about
+ * it. This is the same logic as for exit_to_user_mode().
+ *
+ * This ensures that e.g. latency analysis on the host observes
+ * guest mode as interrupt enabled.
+ *
+ * guest_enter_irqoff() informs context tracking about the
+ * transition to guest mode and if enabled adjusts RCU state
+ * accordingly.
+ */
+ instrumentation_begin();
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ instrumentation_end();
+
+ guest_enter_irqoff();
+ lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static __always_inline void kvm_guest_exit_irqoff(void)
+{
+ /*
+ * VMEXIT disables interrupts (host state), but tracing and lockdep
+ * have them in state 'on' as recorded before entering guest mode.
+ * Same as enter_from_user_mode().
+ *
+ * context_tracking_guest_exit() restores host context and reinstates
+ * RCU if enabled and required.
+ *
+ * This needs to be done immediately after VM-Exit, before any code
+ * that might contain tracepoints or call out to the greater world,
+ * e.g. before x86_spec_ctrl_restore_host().
+ */
+ lockdep_hardirqs_off(CALLER_ADDR0);
+ context_tracking_guest_exit();
+
+ instrumentation_begin();
+ trace_hardirqs_off_finish();
+ instrumentation_end();
+}
+
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
({ \
bool failed = (consistency_check); \
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index ba9fee7..e9c8f06 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -19,12 +19,8 @@
VARIANT = $(variant-y)
export VARIANT
-# Test for cross compiling
-
ifneq ($(VARIANT),)
- COMPILE_ARCH = $(shell uname -m)
-
- ifneq ($(COMPILE_ARCH), xtensa)
+ ifdef cross_compiling
ifndef CROSS_COMPILE
CROSS_COMPILE = xtensa_$(VARIANT)-
endif
diff --git a/certs/.gitignore b/certs/.gitignore
index 6cbd1f1..8c3763f 100644
--- a/certs/.gitignore
+++ b/certs/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-x509_certificate_list
-x509_revocation_list
+/x509_certificate_list
+/x509_revocation_list
diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c
index f2d0e59..0a0a982 100644
--- a/drivers/acpi/arm64/gtdt.c
+++ b/drivers/acpi/arm64/gtdt.c
@@ -329,7 +329,7 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd,
int index)
{
struct platform_device *pdev;
- int irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags);
+ int irq;
/*
* According to SBSA specification the size of refresh and control
@@ -338,7 +338,7 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd,
struct resource res[] = {
DEFINE_RES_MEM(wd->control_frame_address, SZ_4K),
DEFINE_RES_MEM(wd->refresh_frame_address, SZ_4K),
- DEFINE_RES_IRQ(irq),
+ {},
};
int nr_res = ARRAY_SIZE(res);
@@ -348,10 +348,11 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd,
if (!(wd->refresh_frame_address && wd->control_frame_address)) {
pr_err(FW_BUG "failed to get the Watchdog base address.\n");
- acpi_unregister_gsi(wd->timer_interrupt);
return -EINVAL;
}
+ irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags);
+ res[2] = (struct resource)DEFINE_RES_IRQ(irq);
if (irq <= 0) {
pr_warn("failed to map the Watchdog interrupt.\n");
nr_res--;
@@ -364,7 +365,8 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd,
*/
pdev = platform_device_register_simple("sbsa-gwdt", index, res, nr_res);
if (IS_ERR(pdev)) {
- acpi_unregister_gsi(wd->timer_interrupt);
+ if (irq > 0)
+ acpi_unregister_gsi(wd->timer_interrupt);
return PTR_ERR(pdev);
}
diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c
index e209081..c68e694 100644
--- a/drivers/acpi/irq.c
+++ b/drivers/acpi/irq.c
@@ -75,8 +75,12 @@ void acpi_unregister_gsi(u32 gsi)
{
struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
DOMAIN_BUS_ANY);
- int irq = irq_find_mapping(d, gsi);
+ int irq;
+ if (WARN_ON(acpi_irq_model == ACPI_IRQ_MODEL_GIC && gsi < 16))
+ return;
+
+ irq = irq_find_mapping(d, gsi);
irq_dispose_mapping(irq);
}
EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 0ddd611..3bc3c31 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -795,6 +795,7 @@ static void process_incoming (struct fs_dev *dev, struct queue *q)
switch (STATUS_CODE (qe)) {
case 0x1:
/* Fall through for streaming mode */
+ fallthrough;
case 0x2:/* Packet received OK.... */
if (atm_vcc) {
skb = pe->skb;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index c01786af..c604a40 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -88,7 +88,7 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
dev->secure_discard = le16_to_cpu(rsp->secure_discard);
dev->rotational = rsp->rotational;
- dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
+ dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
dev->fua = !!(rsp->cache_policy & RNBD_FUA);
dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
@@ -241,7 +241,7 @@ static bool rnbd_rerun_if_needed(struct rnbd_clt_session *sess)
cpu_q = rnbd_get_cpu_qlist(sess, nxt_cpu(cpu_q->cpu))) {
if (!spin_trylock_irqsave(&cpu_q->requeue_lock, flags))
continue;
- if (unlikely(!test_bit(cpu_q->cpu, sess->cpu_queues_bm)))
+ if (!test_bit(cpu_q->cpu, sess->cpu_queues_bm))
goto unlock;
q = list_first_entry_or_null(&cpu_q->requeue_list,
typeof(*q), requeue_list);
@@ -320,7 +320,7 @@ static struct rtrs_permit *rnbd_get_permit(struct rnbd_clt_session *sess,
struct rtrs_permit *permit;
permit = rtrs_clt_get_permit(sess->rtrs, con_type, wait);
- if (likely(permit))
+ if (permit)
/* We have a subtle rare case here, when all permits can be
* consumed before busy counter increased. This is safe,
* because loser will get NULL as a permit, observe 0 busy
@@ -351,12 +351,11 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess,
struct rtrs_permit *permit;
iu = kzalloc(sizeof(*iu), GFP_KERNEL);
- if (!iu) {
+ if (!iu)
return NULL;
- }
permit = rnbd_get_permit(sess, con_type, wait);
- if (unlikely(!permit)) {
+ if (!permit) {
kfree(iu);
return NULL;
}
@@ -692,7 +691,11 @@ static void remap_devs(struct rnbd_clt_session *sess)
return;
}
- rtrs_clt_query(sess->rtrs, &attrs);
+ err = rtrs_clt_query(sess->rtrs, &attrs);
+ if (err) {
+ pr_err("rtrs_clt_query(\"%s\"): %d\n", sess->sessname, err);
+ return;
+ }
mutex_lock(&sess->lock);
sess->max_io_size = attrs.max_io_size;
@@ -805,7 +808,7 @@ static struct rnbd_clt_session *alloc_sess(const char *sessname)
mutex_init(&sess->lock);
INIT_LIST_HEAD(&sess->devs_list);
INIT_LIST_HEAD(&sess->list);
- bitmap_zero(sess->cpu_queues_bm, NR_CPUS);
+ bitmap_zero(sess->cpu_queues_bm, num_possible_cpus());
init_waitqueue_head(&sess->rtrs_waitq);
refcount_set(&sess->refcount, 1);
@@ -1047,7 +1050,7 @@ static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev,
};
err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit,
&vec, 1, size, iu->sgt.sgl, sg_cnt);
- if (unlikely(err)) {
+ if (err) {
rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n",
err);
return err;
@@ -1078,7 +1081,7 @@ static bool rnbd_clt_dev_add_to_requeue(struct rnbd_clt_dev *dev,
cpu_q = get_cpu_ptr(sess->cpu_queues);
spin_lock_irqsave(&cpu_q->requeue_lock, flags);
- if (likely(!test_and_set_bit_lock(0, &q->in_list))) {
+ if (!test_and_set_bit_lock(0, &q->in_list)) {
if (WARN_ON(!list_empty(&q->requeue_list)))
goto unlock;
@@ -1090,7 +1093,7 @@ static bool rnbd_clt_dev_add_to_requeue(struct rnbd_clt_dev *dev,
*/
smp_mb__before_atomic();
}
- if (likely(atomic_read(&sess->busy))) {
+ if (atomic_read(&sess->busy)) {
list_add_tail(&q->requeue_list, &cpu_q->requeue_list);
} else {
/* Very unlikely, but possible: busy counter was
@@ -1118,7 +1121,7 @@ static void rnbd_clt_dev_kick_mq_queue(struct rnbd_clt_dev *dev,
if (delay != RNBD_DELAY_IFBUSY)
blk_mq_delay_run_hw_queue(hctx, delay);
- else if (unlikely(!rnbd_clt_dev_add_to_requeue(dev, q)))
+ else if (!rnbd_clt_dev_add_to_requeue(dev, q))
/*
* If session is not busy we have to restart
* the queue ourselves.
@@ -1135,12 +1138,12 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
int err;
blk_status_t ret = BLK_STS_IOERR;
- if (unlikely(dev->dev_state != DEV_STATE_MAPPED))
+ if (dev->dev_state != DEV_STATE_MAPPED)
return BLK_STS_IOERR;
iu->permit = rnbd_get_permit(dev->sess, RTRS_IO_CON,
RTRS_PERMIT_NOWAIT);
- if (unlikely(!iu->permit)) {
+ if (!iu->permit) {
rnbd_clt_dev_kick_mq_queue(dev, hctx, RNBD_DELAY_IFBUSY);
return BLK_STS_RESOURCE;
}
@@ -1148,7 +1151,8 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
iu->sgt.sgl = iu->first_sgl;
err = sg_alloc_table_chained(&iu->sgt,
/* Even-if the request has no segment,
- * sglist must have one entry at least */
+ * sglist must have one entry at least.
+ */
blk_rq_nr_phys_segments(rq) ? : 1,
iu->sgt.sgl,
RNBD_INLINE_SG_CNT);
@@ -1161,9 +1165,9 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
err = rnbd_client_xfer_request(dev, rq, iu);
- if (likely(err == 0))
+ if (err == 0)
return BLK_STS_OK;
- if (unlikely(err == -EAGAIN || err == -ENOMEM)) {
+ if (err == -EAGAIN || err == -ENOMEM) {
rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/);
ret = BLK_STS_RESOURCE;
}
@@ -1294,7 +1298,11 @@ find_and_get_or_create_sess(const char *sessname,
err = PTR_ERR(sess->rtrs);
goto wake_up_and_put;
}
- rtrs_clt_query(sess->rtrs, &attrs);
+
+ err = rtrs_clt_query(sess->rtrs, &attrs);
+ if (err)
+ goto close_rtrs;
+
sess->max_io_size = attrs.max_io_size;
sess->queue_depth = attrs.queue_depth;
sess->nr_poll_queues = nr_poll_queues;
@@ -1576,7 +1584,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rnbd_clt_dev *dev;
int ret;
- if (unlikely(exists_devpath(pathname, sessname)))
+ if (exists_devpath(pathname, sessname))
return ERR_PTR(-EEXIST);
sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index 451e738..b5322c5 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -87,7 +87,7 @@ struct rnbd_clt_session {
DECLARE_BITMAP(cpu_queues_bm, NR_CPUS);
int __percpu *cpu_rr; /* per-cpu var for CPU round-robin */
atomic_t busy;
- int queue_depth;
+ size_t queue_depth;
u32 max_io_size;
struct blk_mq_tag_set tag_set;
u32 nr_poll_queues;
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 899dd9d..aafecfe 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -104,7 +104,7 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
rcu_read_lock();
sess_dev = xa_load(&srv_sess->index_idr, dev_id);
- if (likely(sess_dev))
+ if (sess_dev)
ret = kref_get_unless_zero(&sess_dev->kref);
rcu_read_unlock();
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index f5bd0dc..3c1c5da 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -139,7 +139,7 @@ static int psci_to_linux_errno(int errno)
return -EINVAL;
case PSCI_RET_DENIED:
return -EPERM;
- };
+ }
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b4ad1c0..7d3b546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3410,19 +3410,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
- /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
- /* this will fail for cards that aren't VGA class devices, just
- * ignore it */
- if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
- vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
-
- if (amdgpu_device_supports_px(ddev)) {
- px = true;
- vga_switcheroo_register_client(adev->pdev,
- &amdgpu_switcheroo_ops, px);
- vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
- }
-
if (amdgpu_emu_mode == 1) {
/* post the asic on emulation mode */
emu_soc_asic_init(adev);
@@ -3619,6 +3606,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(pdev);
+ /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
+ /* this will fail for cards that aren't VGA class devices, just
+ * ignore it */
+ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
+
+ if (amdgpu_device_supports_px(ddev)) {
+ px = true;
+ vga_switcheroo_register_client(adev->pdev,
+ &amdgpu_switcheroo_ops, px);
+ vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
+ }
+
if (adev->gmc.xgmi.pending_reset)
queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -3630,8 +3630,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
failed:
amdgpu_vf_error_trans_all(adev);
- if (px)
- vga_switcheroo_fini_domain_pm_ops(adev->dev);
failed_unmap:
iounmap(adev->rmmio);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 2e622c1..8a1fb8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -837,6 +837,174 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
return 0;
}
+static void get_block_dimensions(unsigned int block_log2, unsigned int cpp,
+ unsigned int *width, unsigned int *height)
+{
+ unsigned int cpp_log2 = ilog2(cpp);
+ unsigned int pixel_log2 = block_log2 - cpp_log2;
+ unsigned int width_log2 = (pixel_log2 + 1) / 2;
+ unsigned int height_log2 = pixel_log2 - width_log2;
+
+ *width = 1 << width_log2;
+ *height = 1 << height_log2;
+}
+
+static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
+ bool pipe_aligned)
+{
+ unsigned int ver = AMD_FMT_MOD_GET(TILE_VERSION, modifier);
+
+ switch (ver) {
+ case AMD_FMT_MOD_TILE_VER_GFX9: {
+ /*
+ * TODO: for pipe aligned we may need to check the alignment of the
+ * total size of the surface, which may need to be bigger than the
+ * natural alignment due to some HW workarounds
+ */
+ return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
+ }
+ case AMD_FMT_MOD_TILE_VER_GFX10:
+ case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: {
+ int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
+
+ if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
+ AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
+ ++pipes_log2;
+
+ return max(8 + (pipe_aligned ? pipes_log2 : 0), 12);
+ }
+ default:
+ return 0;
+ }
+}
+
+static int amdgpu_display_verify_plane(struct amdgpu_framebuffer *rfb, int plane,
+ const struct drm_format_info *format,
+ unsigned int block_width, unsigned int block_height,
+ unsigned int block_size_log2)
+{
+ unsigned int width = rfb->base.width /
+ ((plane && plane < format->num_planes) ? format->hsub : 1);
+ unsigned int height = rfb->base.height /
+ ((plane && plane < format->num_planes) ? format->vsub : 1);
+ unsigned int cpp = plane < format->num_planes ? format->cpp[plane] : 1;
+ unsigned int block_pitch = block_width * cpp;
+ unsigned int min_pitch = ALIGN(width * cpp, block_pitch);
+ unsigned int block_size = 1 << block_size_log2;
+ uint64_t size;
+
+ if (rfb->base.pitches[plane] % block_pitch) {
+ drm_dbg_kms(rfb->base.dev,
+ "pitch %d for plane %d is not a multiple of block pitch %d\n",
+ rfb->base.pitches[plane], plane, block_pitch);
+ return -EINVAL;
+ }
+ if (rfb->base.pitches[plane] < min_pitch) {
+ drm_dbg_kms(rfb->base.dev,
+ "pitch %d for plane %d is less than minimum pitch %d\n",
+ rfb->base.pitches[plane], plane, min_pitch);
+ return -EINVAL;
+ }
+
+ /* Force at least natural alignment. */
+ if (rfb->base.offsets[plane] % block_size) {
+ drm_dbg_kms(rfb->base.dev,
+ "offset 0x%x for plane %d is not a multiple of block pitch 0x%x\n",
+ rfb->base.offsets[plane], plane, block_size);
+ return -EINVAL;
+ }
+
+ size = rfb->base.offsets[plane] +
+ (uint64_t)rfb->base.pitches[plane] / block_pitch *
+ block_size * DIV_ROUND_UP(height, block_height);
+
+ if (rfb->base.obj[0]->size < size) {
+ drm_dbg_kms(rfb->base.dev,
+ "BO size 0x%zx is less than 0x%llx required for plane %d\n",
+ rfb->base.obj[0]->size, size, plane);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
+{
+ const struct drm_format_info *format_info = drm_format_info(rfb->base.format->format);
+ uint64_t modifier = rfb->base.modifier;
+ int ret;
+ unsigned int i, block_width, block_height, block_size_log2;
+
+ if (!rfb->base.dev->mode_config.allow_fb_modifiers)
+ return 0;
+
+ for (i = 0; i < format_info->num_planes; ++i) {
+ if (modifier == DRM_FORMAT_MOD_LINEAR) {
+ block_width = 256 / format_info->cpp[i];
+ block_height = 1;
+ block_size_log2 = 8;
+ } else {
+ int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+ switch ((swizzle & ~3) + 1) {
+ case DC_SW_256B_S:
+ block_size_log2 = 8;
+ break;
+ case DC_SW_4KB_S:
+ case DC_SW_4KB_S_X:
+ block_size_log2 = 12;
+ break;
+ case DC_SW_64KB_S:
+ case DC_SW_64KB_S_T:
+ case DC_SW_64KB_S_X:
+ block_size_log2 = 16;
+ break;
+ default:
+ drm_dbg_kms(rfb->base.dev,
+ "Swizzle mode with unknown block size: %d\n", swizzle);
+ return -EINVAL;
+ }
+
+ get_block_dimensions(block_size_log2, format_info->cpp[i],
+ &block_width, &block_height);
+ }
+
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height, block_size_log2);
+ if (ret)
+ return ret;
+ }
+
+ if (AMD_FMT_MOD_GET(DCC, modifier)) {
+ if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
+ block_size_log2 = get_dcc_block_size(modifier, false, false);
+ get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+ &block_width, &block_height);
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height,
+ block_size_log2);
+ if (ret)
+ return ret;
+
+ ++i;
+ block_size_log2 = get_dcc_block_size(modifier, true, true);
+ } else {
+ bool pipe_aligned = AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
+
+ block_size_log2 = get_dcc_block_size(modifier, true, pipe_aligned);
+ }
+ get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+ &block_width, &block_height);
+ ret = amdgpu_display_verify_plane(rfb, i, format_info,
+ block_width, block_height, block_size_log2);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
uint64_t *tiling_flags, bool *tmz_surface)
{
@@ -902,10 +1070,8 @@ int amdgpu_display_gem_fb_verify_and_init(
int ret;
rfb->base.obj[0] = obj;
-
- /* Verify that bo size can fit the fb size. */
- ret = drm_gem_fb_init_with_funcs(dev, &rfb->base, file_priv, mode_cmd,
- &amdgpu_fb_funcs);
+ drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
+ ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
if (ret)
goto err;
/* Verify that the modifier is supported. */
@@ -967,9 +1133,12 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
}
}
- for (i = 1; i < rfb->base.format->num_planes; ++i) {
+ ret = amdgpu_display_verify_sizes(rfb);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < rfb->base.format->num_planes; ++i) {
drm_gem_object_get(rfb->base.obj[0]);
- drm_gem_object_put(rfb->base.obj[i]);
rfb->base.obj[i] = rfb->base.obj[0];
}
@@ -999,6 +1168,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags);
if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");
+ drm_gem_object_put(obj);
return ERR_PTR(-EINVAL);
}
@@ -1412,7 +1582,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
}
}
}
- return r;
+ return 0;
}
int amdgpu_display_resume_helper(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 9229389..f93883d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1573,6 +1573,9 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
amdgpu_device_baco_exit(drm_dev);
}
ret = amdgpu_device_resume(drm_dev, false);
+ if (ret)
+ return ret;
+
if (amdgpu_device_supports_px(drm_dev))
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
adev->in_runpm = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 94b0696..b4971e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -215,7 +215,11 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
/* Check if we have an idle VMID */
i = 0;
list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
- fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
+ /* Don't use per engine and per process VMID at the same time */
+ struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
+ NULL : ring;
+
+ fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r);
if (!fences[i])
break;
++i;
@@ -281,7 +285,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
if (updates && (*id)->flushed_updates &&
updates->context == (*id)->flushed_updates->context &&
!dma_fence_is_later(updates, (*id)->flushed_updates))
- updates = NULL;
+ updates = NULL;
if ((*id)->owner != vm->immediate.fence_context ||
job->vm_pd_addr != (*id)->pd_gpu_addr ||
@@ -290,6 +294,10 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
!dma_fence_is_signaled((*id)->last_flush))) {
struct dma_fence *tmp;
+ /* Don't use per engine and per process VMID at the same time */
+ if (adev->vm_manager.concurrent_flush)
+ ring = NULL;
+
/* to prevent one context starved by another context */
(*id)->pd_gpu_addr = 0;
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
@@ -365,12 +373,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
needs_flush = true;
- /* Concurrent flushes are only possible starting with Vega10 and
- * are broken on Navi10 and Navi14.
- */
- if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
- adev->asic_type == CHIP_NAVI10 ||
- adev->asic_type == CHIP_NAVI14))
+ if (needs_flush && !adev->vm_manager.concurrent_flush)
continue;
/* Good, we can use this VMID. Remember this submission as
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0ffdf84..9acee4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3148,6 +3148,12 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
{
unsigned i;
+ /* Concurrent flushes are only possible starting with Vega10 and
+ * are broken on Navi10 and Navi14.
+ */
+ adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
+ adev->asic_type == CHIP_NAVI10 ||
+ adev->asic_type == CHIP_NAVI14);
amdgpu_vmid_mgr_init(adev);
adev->vm_manager.fence_context =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 976a12e..4e14028 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -331,6 +331,7 @@ struct amdgpu_vm_manager {
/* Handling of VMIDs */
struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS];
unsigned int first_kfd_vmid;
+ bool concurrent_flush;
/* Handling of VM fences */
u64 fence_context;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index c1bd190..e4f27b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -59,6 +59,7 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_32_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
@@ -243,10 +244,16 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
chip_name = "polaris10";
break;
case CHIP_POLARIS12:
- if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision))
+ if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
chip_name = "polaris12_k";
- else
- chip_name = "polaris12";
+ } else {
+ WREG32(mmMC_SEQ_IO_DEBUG_INDEX, ixMC_IO_DEBUG_UP_159);
+ /* Polaris12 32bit ASIC needs a special MC firmware */
+ if (RREG32(mmMC_SEQ_IO_DEBUG_DATA) == 0x05b4dc40)
+ chip_name = "polaris12_32";
+ else
+ chip_name = "polaris12";
+ }
break;
case CHIP_FIJI:
case CHIP_CARRIZO:
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 3f15bf3..cf165ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -589,6 +589,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0),
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
+ UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b34ab76..389eff9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4015,6 +4015,23 @@ static int fill_dc_scaling_info(const struct drm_plane_state *state,
scaling_info->src_rect.x = state->src_x >> 16;
scaling_info->src_rect.y = state->src_y >> 16;
+ /*
+ * For reasons we don't (yet) fully understand a non-zero
+ * src_y coordinate into an NV12 buffer can cause a
+ * system hang. To avoid hangs (and maybe be overly cautious)
+ * let's reject both non-zero src_x and src_y.
+ *
+ * We currently know of only one use-case to reproduce a
+ * scenario with non-zero src_x and src_y for NV12, which
+ * is to gesture the YouTube Android app into full screen
+ * on ChromeOS.
+ */
+ if (state->fb &&
+ state->fb->format->format == DRM_FORMAT_NV12 &&
+ (scaling_info->src_rect.x != 0 ||
+ scaling_info->src_rect.y != 0))
+ return -EINVAL;
+
scaling_info->src_rect.width = state->src_w >> 16;
if (scaling_info->src_rect.width == 0)
return -EINVAL;
@@ -9869,6 +9886,53 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
}
#endif
+static int validate_overlay(struct drm_atomic_state *state)
+{
+ int i;
+ struct drm_plane *plane;
+ struct drm_plane_state *old_plane_state, *new_plane_state;
+ struct drm_plane_state *primary_state, *overlay_state = NULL;
+
+ /* Check if primary plane is contained inside overlay */
+ for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+ if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
+ if (drm_atomic_plane_disabling(plane->state, new_plane_state))
+ return 0;
+
+ overlay_state = new_plane_state;
+ continue;
+ }
+ }
+
+ /* check if we're making changes to the overlay plane */
+ if (!overlay_state)
+ return 0;
+
+ /* check if overlay plane is enabled */
+ if (!overlay_state->crtc)
+ return 0;
+
+ /* find the primary plane for the CRTC that the overlay is enabled on */
+ primary_state = drm_atomic_get_plane_state(state, overlay_state->crtc->primary);
+ if (IS_ERR(primary_state))
+ return PTR_ERR(primary_state);
+
+ /* check if primary plane is enabled */
+ if (!primary_state->crtc)
+ return 0;
+
+ /* Perform the bounds check to ensure the overlay plane covers the primary */
+ if (primary_state->crtc_x < overlay_state->crtc_x ||
+ primary_state->crtc_y < overlay_state->crtc_y ||
+ primary_state->crtc_x + primary_state->crtc_w > overlay_state->crtc_x + overlay_state->crtc_w ||
+ primary_state->crtc_y + primary_state->crtc_h > overlay_state->crtc_y + overlay_state->crtc_h) {
+ DRM_DEBUG_ATOMIC("Overlay plane is enabled with hardware cursor but does not fully cover primary plane\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
* @dev: The DRM device
@@ -10043,6 +10107,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
goto fail;
}
+ ret = validate_overlay(state);
+ if (ret)
+ goto fail;
+
/* Add new/modified planes */
for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
ret = dm_update_plane_state(dc, state, plane,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 5295450..1b6b157 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -3012,7 +3012,7 @@ static int trigger_hpd_mst_set(void *data, u64 val)
if (!aconnector->dc_link)
continue;
- if (!(aconnector->port && &aconnector->mst_port->mst_mgr))
+ if (!aconnector->mst_port)
continue;
link = aconnector->dc_link;
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 8128603..9a54066 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -451,7 +451,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- struct pp_states_info data;
+ struct pp_states_info data = {0};
enum amd_pm_state_type pm = 0;
int i = 0, ret = 0;
@@ -1893,6 +1893,14 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
}
}
+ if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) {
+ /* SMU MP1 does not support dcefclk level setting */
+ if (asic_type >= CHIP_NAVI10) {
+ dev_attr->attr.mode &= ~S_IWUGO;
+ dev_attr->store = NULL;
+ }
+ }
+
#undef DEVICE_ATTR_IS
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index f827096..ac13042 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1443,7 +1443,6 @@ static int navi10_force_clk_levels(struct smu_context *smu,
case SMU_SOCCLK:
case SMU_MCLK:
case SMU_UCLK:
- case SMU_DCEFCLK:
case SMU_FCLK:
/* There is only 2 levels for fine grained DPM */
if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
@@ -1463,6 +1462,10 @@ static int navi10_force_clk_levels(struct smu_context *smu,
if (ret)
return size;
break;
+ case SMU_DCEFCLK:
+ dev_info(smu->adev->dev,"Setting DCEFCLK min/max dpm level is not supported!\n");
+ break;
+
default:
break;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 72d9c1b..d2fd44b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1127,7 +1127,6 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu,
case SMU_SOCCLK:
case SMU_MCLK:
case SMU_UCLK:
- case SMU_DCEFCLK:
case SMU_FCLK:
/* There is only 2 levels for fine grained DPM */
if (sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) {
@@ -1147,6 +1146,9 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu,
if (ret)
goto forec_level_out;
break;
+ case SMU_DCEFCLK:
+ dev_info(smu->adev->dev,"Setting DCEFCLK min/max dpm level is not supported!\n");
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index a560468..6a2dee8 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -3474,7 +3474,18 @@ intel_dp_check_mst_status(struct intel_dp *intel_dp)
drm_WARN_ON_ONCE(&i915->drm, intel_dp->active_mst_links < 0);
for (;;) {
- u8 esi[DP_DPRX_ESI_LEN] = {};
+ /*
+ * The +2 is because DP_DPRX_ESI_LEN is 14, but we then
+ * pass in "esi+10" to drm_dp_channel_eq_ok(), which
+ * takes a 6-byte array. So we actually need 16 bytes
+ * here.
+ *
+ * Somebody who knows what the limits actually are
+ * should check this, but for now this is at least
+ * harmless and avoids a valid compiler warning about
+ * using more of the array than we have allocated.
+ */
+ u8 esi[DP_DPRX_ESI_LEN+2] = {};
bool handled;
int retry;
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 477badf..dda3207 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -669,8 +669,8 @@ static void vgpu_update_refresh_rate(struct intel_vgpu *vgpu)
link_n = vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A));
/* Get H/V total from transcoder timing */
- htotal = (vgpu_vreg_t(vgpu, HTOTAL(TRANSCODER_A)) >> TRANS_HTOTAL_SHIFT) + 1;
- vtotal = (vgpu_vreg_t(vgpu, VTOTAL(TRANSCODER_A)) >> TRANS_VTOTAL_SHIFT) + 1;
+ htotal = (vgpu_vreg_t(vgpu, HTOTAL(TRANSCODER_A)) >> TRANS_HTOTAL_SHIFT);
+ vtotal = (vgpu_vreg_t(vgpu, VTOTAL(TRANSCODER_A)) >> TRANS_VTOTAL_SHIFT);
if (dp_br && link_n && htotal && vtotal) {
u64 pixel_clk = 0;
@@ -682,7 +682,7 @@ static void vgpu_update_refresh_rate(struct intel_vgpu *vgpu)
pixel_clk *= MSEC_PER_SEC;
/* Calcuate refresh rate by (pixel_clk / (h_total * v_total)) */
- new_rate = DIV64_U64_ROUND_CLOSEST(pixel_clk, div64_u64(mul_u32_u32(htotal, vtotal), MSEC_PER_SEC));
+ new_rate = DIV64_U64_ROUND_CLOSEST(mul_u64_u32_shr(pixel_clk, MSEC_PER_SEC, 0), mul_u32_u32(htotal + 1, vtotal + 1));
if (*old_rate != new_rate)
*old_rate = new_rate;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index 7c29976..18bc76b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -648,16 +648,6 @@ static void dpu_crtc_atomic_flush(struct drm_crtc *crtc,
if (unlikely(!cstate->num_mixers))
return;
- /*
- * For planes without commit update, drm framework will not add
- * those planes to current state since hardware update is not
- * required. However, if those planes were power collapsed since
- * last commit cycle, driver has to restore the hardware state
- * of those planes explicitly here prior to plane flush.
- */
- drm_atomic_crtc_for_each_plane(plane, crtc)
- dpu_plane_restore(plane, state);
-
/* update performance setting before crtc kickoff */
dpu_core_perf_crtc_update(crtc, 1, false);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index df7f3d3..7a99354 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -1258,22 +1258,6 @@ static void dpu_plane_atomic_update(struct drm_plane *plane,
}
}
-void dpu_plane_restore(struct drm_plane *plane, struct drm_atomic_state *state)
-{
- struct dpu_plane *pdpu;
-
- if (!plane || !plane->state) {
- DPU_ERROR("invalid plane\n");
- return;
- }
-
- pdpu = to_dpu_plane(plane);
-
- DPU_DEBUG_PLANE(pdpu, "\n");
-
- dpu_plane_atomic_update(plane, state);
-}
-
static void dpu_plane_destroy(struct drm_plane *plane)
{
struct dpu_plane *pdpu = plane ? to_dpu_plane(plane) : NULL;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
index 03b6365..34e03ac 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
@@ -85,12 +85,6 @@ void dpu_plane_get_ctl_flush(struct drm_plane *plane, struct dpu_hw_ctl *ctl,
u32 *flush_sspp);
/**
- * dpu_plane_restore - restore hw state if previously power collapsed
- * @plane: Pointer to drm plane structure
- */
-void dpu_plane_restore(struct drm_plane *plane, struct drm_atomic_state *state);
-
-/**
* dpu_plane_flush - final plane operations before commit flush
* @plane: Pointer to drm plane structure
*/
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 42301b4..28c4413 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2120,11 +2120,14 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
return state_index;
/* last mode is usually default, array is low to high */
for (i = 0; i < num_modes; i++) {
- rdev->pm.power_state[state_index].clock_info =
- kcalloc(1, sizeof(struct radeon_pm_clock_info),
- GFP_KERNEL);
+ /* avoid memory leaks from invalid modes or unknown frev. */
+ if (!rdev->pm.power_state[state_index].clock_info) {
+ rdev->pm.power_state[state_index].clock_info =
+ kzalloc(sizeof(struct radeon_pm_clock_info),
+ GFP_KERNEL);
+ }
if (!rdev->pm.power_state[state_index].clock_info)
- return state_index;
+ goto out;
rdev->pm.power_state[state_index].num_clock_modes = 1;
rdev->pm.power_state[state_index].clock_info[0].voltage.type = VOLTAGE_NONE;
switch (frev) {
@@ -2243,17 +2246,24 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
break;
}
}
+out:
+ /* free any unused clock_info allocation. */
+ if (state_index && state_index < num_modes) {
+ kfree(rdev->pm.power_state[state_index].clock_info);
+ rdev->pm.power_state[state_index].clock_info = NULL;
+ }
+
/* last mode is usually default */
- if (rdev->pm.default_power_state_index == -1) {
+ if (state_index && rdev->pm.default_power_state_index == -1) {
rdev->pm.power_state[state_index - 1].type =
POWER_STATE_TYPE_DEFAULT;
rdev->pm.default_power_state_index = state_index - 1;
rdev->pm.power_state[state_index - 1].default_clock_mode =
&rdev->pm.power_state[state_index - 1].clock_info[0];
- rdev->pm.power_state[state_index].flags &=
+ rdev->pm.power_state[state_index - 1].flags &=
~RADEON_PM_STATE_SINGLE_DISPLAY_ONLY;
- rdev->pm.power_state[state_index].misc = 0;
- rdev->pm.power_state[state_index].misc2 = 0;
+ rdev->pm.power_state[state_index - 1].misc = 0;
+ rdev->pm.power_state[state_index - 1].misc2 = 0;
}
return state_index;
}
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index f8e9b73..e2e12a5 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -2535,7 +2535,7 @@ int i3c_master_register(struct i3c_master_controller *master,
ret = i3c_master_bus_init(master);
if (ret)
- goto err_destroy_wq;
+ goto err_put_dev;
ret = device_add(&master->dev);
if (ret)
@@ -2566,9 +2566,6 @@ int i3c_master_register(struct i3c_master_controller *master,
err_cleanup_bus:
i3c_master_bus_cleanup(master);
-err_destroy_wq:
- destroy_workqueue(master->wq);
-
err_put_dev:
put_device(&master->dev);
diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c
index 8d99069..1f6ba42 100644
--- a/drivers/i3c/master/svc-i3c-master.c
+++ b/drivers/i3c/master/svc-i3c-master.c
@@ -1124,7 +1124,6 @@ static int svc_i3c_master_send_direct_ccc_cmd(struct svc_i3c_master *master,
cmd->in = NULL;
cmd->out = &ccc->id;
cmd->len = 1;
- cmd->read_len = xfer_len;
cmd->read_len = 0;
cmd->continued = true;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 40f4383..0a794d7 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -2976,7 +2976,8 @@ EXPORT_SYMBOL(rtrs_clt_request);
int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
{
- int cnt;
+ /* If no path, return -1 for block layer not to try again */
+ int cnt = -1;
struct rtrs_con *con;
struct rtrs_clt_sess *sess;
struct path_it it;
diff --git a/drivers/memory/.gitignore b/drivers/memory/.gitignore
index caedc4c..5e84bee 100644
--- a/drivers/memory/.gitignore
+++ b/drivers/memory/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-ti-emif-asm-offsets.h
+/ti-emif-asm-offsets.h
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 34073cd..3cf6de2 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1562,6 +1562,8 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
int i;
int putidx;
+ cdev->tx_skb = NULL;
+
/* Generate ID field for TX buffer Element */
/* Common to all supported M_CAN versions */
if (cf->can_id & CAN_EFF_FLAG) {
@@ -1678,7 +1680,6 @@ static void m_can_tx_work_queue(struct work_struct *ws)
tx_work);
m_can_tx_handler(cdev);
- cdev->tx_skb = NULL;
}
static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 492f1bc..173c661 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -956,8 +956,6 @@ static int mcp251x_stop(struct net_device *net)
priv->force_quit = 1;
free_irq(spi->irq, priv);
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
mutex_lock(&priv->mcp_lock);
@@ -1224,24 +1222,15 @@ static int mcp251x_open(struct net_device *net)
goto out_close;
}
- priv->wq = alloc_workqueue("mcp251x_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM,
- 0);
- if (!priv->wq) {
- ret = -ENOMEM;
- goto out_clean;
- }
- INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
- INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
-
ret = mcp251x_hw_wake(spi);
if (ret)
- goto out_free_wq;
+ goto out_free_irq;
ret = mcp251x_setup(net, spi);
if (ret)
- goto out_free_wq;
+ goto out_free_irq;
ret = mcp251x_set_normal_mode(spi);
if (ret)
- goto out_free_wq;
+ goto out_free_irq;
can_led_event(net, CAN_LED_EVENT_OPEN);
@@ -1250,9 +1239,7 @@ static int mcp251x_open(struct net_device *net)
return 0;
-out_free_wq:
- destroy_workqueue(priv->wq);
-out_clean:
+out_free_irq:
free_irq(spi->irq, priv);
mcp251x_hw_sleep(spi);
out_close:
@@ -1373,6 +1360,15 @@ static int mcp251x_can_probe(struct spi_device *spi)
if (ret)
goto out_clk;
+ priv->wq = alloc_workqueue("mcp251x_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM,
+ 0);
+ if (!priv->wq) {
+ ret = -ENOMEM;
+ goto out_clk;
+ }
+ INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
+ INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
+
priv->spi = spi;
mutex_init(&priv->mcp_lock);
@@ -1417,6 +1413,8 @@ static int mcp251x_can_probe(struct spi_device *spi)
return 0;
error_probe:
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
mcp251x_power_enable(priv->power, 0);
out_clk:
@@ -1438,6 +1436,9 @@ static int mcp251x_can_remove(struct spi_device *spi)
mcp251x_power_enable(priv->power, 0);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+
clk_disable_unprepare(priv->clk);
free_candev(net);
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 970dc57..e0ae00e 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2885,8 +2885,8 @@ static int mcp251xfd_probe(struct spi_device *spi)
clk = devm_clk_get(&spi->dev, NULL);
if (IS_ERR(clk))
- dev_err_probe(&spi->dev, PTR_ERR(clk),
- "Failed to get Oscillator (clock)!\n");
+ return dev_err_probe(&spi->dev, PTR_ERR(clk),
+ "Failed to get Oscillator (clock)!\n");
freq = clk_get_rate(clk);
/* Sanity check */
@@ -2986,10 +2986,12 @@ static int mcp251xfd_probe(struct spi_device *spi)
err = mcp251xfd_register(priv);
if (err)
- goto out_free_candev;
+ goto out_can_rx_offload_del;
return 0;
+ out_can_rx_offload_del:
+ can_rx_offload_del(&priv->offload);
out_free_candev:
spi->max_speed_hz = priv->spi_max_speed_hz_orig;
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index 85ba12aa..ea7550d 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -41,6 +41,9 @@ static int ksz8795_spi_probe(struct spi_device *spi)
int i, ret = 0;
ksz8 = devm_kzalloc(&spi->dev, sizeof(struct ksz8), GFP_KERNEL);
+ if (!ksz8)
+ return -ENOMEM;
+
ksz8->priv = spi;
dev = ksz_switch_alloc(&spi->dev, ksz8);
diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c
index 30d97ea..1129348 100644
--- a/drivers/net/dsa/microchip/ksz8863_smi.c
+++ b/drivers/net/dsa/microchip/ksz8863_smi.c
@@ -147,11 +147,14 @@ static int ksz8863_smi_probe(struct mdio_device *mdiodev)
int i;
ksz8 = devm_kzalloc(&mdiodev->dev, sizeof(struct ksz8), GFP_KERNEL);
+ if (!ksz8)
+ return -ENOMEM;
+
ksz8->priv = mdiodev;
dev = ksz_switch_alloc(&mdiodev->dev, ksz8);
if (!dev)
- return -EINVAL;
+ return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz8863_regmap_config); i++) {
rc = ksz8863_regmap_config[i];
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 9e02f88..b3d74332 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -2016,7 +2016,7 @@ static struct pci_driver alx_driver = {
module_pci_driver(alx_driver);
MODULE_DEVICE_TABLE(pci, alx_pci_tbl);
MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
-MODULE_AUTHOR("Qualcomm Corporation, <nic-devel@qualcomm.com>");
+MODULE_AUTHOR("Qualcomm Corporation");
MODULE_DESCRIPTION(
"Qualcomm Atheros(R) AR816x/AR817x PCI-E Ethernet Network Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 1d17c24..c6263cf 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -32,7 +32,7 @@ static const struct pci_device_id atl1c_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, atl1c_pci_tbl);
MODULE_AUTHOR("Jie Yang");
-MODULE_AUTHOR("Qualcomm Atheros Inc., <nic-devel@qualcomm.com>");
+MODULE_AUTHOR("Qualcomm Atheros Inc.");
MODULE_DESCRIPTION("Qualcomm Atheros 100/1000M Ethernet Network Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9c2f51f..d21f085 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1192,7 +1192,6 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
return 0;
}
- err = -EIO;
/* verify ari is enabled */
if (!pci_ari_enabled(bp->pdev->bus)) {
BNX2X_ERR("ARI not supported (check pci bridge ARI forwarding), SRIOV can not be enabled\n");
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 7e4e831..ba47777 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -1764,7 +1764,7 @@ bnad_dim_timeout(struct timer_list *t)
}
}
- /* Check for BNAD_CF_DIM_ENABLED, does not eleminate a race */
+ /* Check for BNAD_CF_DIM_ENABLED, does not eliminate a race */
if (test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags))
mod_timer(&bnad->dim_timer,
jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ));
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 0e94db9..6bc7d41 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4852,7 +4852,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
{
struct net_device *netdev = dev_get_drvdata(dev);
struct macb *bp = netdev_priv(netdev);
- struct macb_queue *queue = bp->queues;
+ struct macb_queue *queue;
unsigned long flags;
unsigned int q;
int err;
@@ -4939,7 +4939,7 @@ static int __maybe_unused macb_resume(struct device *dev)
{
struct net_device *netdev = dev_get_drvdata(dev);
struct macb *bp = netdev_priv(netdev);
- struct macb_queue *queue = bp->queues;
+ struct macb_queue *queue;
unsigned long flags;
unsigned int q;
int err;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 256fae1..1e5f2ed 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2563,12 +2563,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
spin_lock_bh(&eosw_txq->lock);
if (tc != FW_SCHED_CLS_NONE) {
if (eosw_txq->state != CXGB4_EO_STATE_CLOSED)
- goto out_unlock;
+ goto out_free_skb;
next_state = CXGB4_EO_STATE_FLOWC_OPEN_SEND;
} else {
if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
- goto out_unlock;
+ goto out_free_skb;
next_state = CXGB4_EO_STATE_FLOWC_CLOSE_SEND;
}
@@ -2604,17 +2604,19 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
eosw_txq_flush_pending_skbs(eosw_txq);
ret = eosw_txq_enqueue(eosw_txq, skb);
- if (ret) {
- dev_consume_skb_any(skb);
- goto out_unlock;
- }
+ if (ret)
+ goto out_free_skb;
eosw_txq->state = next_state;
eosw_txq->flowc_idx = eosw_txq->pidx;
eosw_txq_advance(eosw_txq, 1);
ethofld_xmit(dev, eosw_txq);
-out_unlock:
+ spin_unlock_bh(&eosw_txq->lock);
+ return 0;
+
+out_free_skb:
+ dev_consume_skb_any(skb);
spin_unlock_bh(&eosw_txq->lock);
return ret;
}
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index f48957a..d0a8f71 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -768,7 +768,7 @@ static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq,
return err;
}
-static inline void enic_queue_wq_skb(struct enic *enic,
+static inline int enic_queue_wq_skb(struct enic *enic,
struct vnic_wq *wq, struct sk_buff *skb)
{
unsigned int mss = skb_shinfo(skb)->gso_size;
@@ -814,6 +814,7 @@ static inline void enic_queue_wq_skb(struct enic *enic,
wq->to_use = buf->next;
dev_kfree_skb(skb);
}
+ return err;
}
/* netif_tx_lock held, process context with BHs disabled, or BH */
@@ -857,7 +858,8 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
- enic_queue_wq_skb(enic, wq, skb);
+ if (enic_queue_wq_skb(enic, wq, skb))
+ goto error;
if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)
netif_tx_stop_queue(txq);
@@ -865,6 +867,7 @@ static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
if (!netdev_xmit_more() || netif_xmit_stopped(txq))
vnic_wq_doorbell(wq);
+error:
spin_unlock(&enic->wq_lock[txq_map]);
return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index c21dd11..783fdaf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -575,8 +575,8 @@ static int hns3_nic_net_stop(struct net_device *netdev)
if (h->ae_algo->ops->set_timer_task)
h->ae_algo->ops->set_timer_task(priv->ae_handle, false);
- netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
hns3_nic_net_down(netdev);
@@ -824,7 +824,7 @@ static int hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto,
* and it is udp packet, which has a dest port as the IANA assigned.
* the hardware is expected to do the checksum offload, but the
* hardware will not do the checksum offload when udp dest port is
- * 4789 or 6081.
+ * 4789, 4790 or 6081.
*/
static bool hns3_tunnel_csum_bug(struct sk_buff *skb)
{
@@ -842,7 +842,8 @@ static bool hns3_tunnel_csum_bug(struct sk_buff *skb)
if (!(!skb->encapsulation &&
(l4.udp->dest == htons(IANA_VXLAN_UDP_PORT) ||
- l4.udp->dest == htons(GENEVE_UDP_PORT))))
+ l4.udp->dest == htons(GENEVE_UDP_PORT) ||
+ l4.udp->dest == htons(4790))))
return false;
skb_checksum_help(skb);
@@ -4616,6 +4617,11 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
int ret = 0;
+ if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
+ netdev_err(kinfo->netdev, "device is not initialized yet\n");
+ return -EFAULT;
+ }
+
clear_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
if (netif_running(kinfo->netdev)) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index d252919..8223d69 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -753,8 +753,9 @@ static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en)
/* configure IGU,EGU error interrupts */
hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false);
+ desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE);
if (en)
- desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
+ desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 608fe26..d647f3c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -32,7 +32,8 @@
#define HCLGE_TQP_ECC_ERR_INT_EN_MASK 0x0FFF
#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK 0x0F000000
#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN 0x0F000000
-#define HCLGE_IGU_ERR_INT_EN 0x0000066F
+#define HCLGE_IGU_ERR_INT_EN 0x0000000F
+#define HCLGE_IGU_ERR_INT_TYPE 0x00000660
#define HCLGE_IGU_ERR_INT_EN_MASK 0x000F
#define HCLGE_IGU_TNL_ERR_INT_EN 0x0002AABF
#define HCLGE_IGU_TNL_ERR_INT_EN_MASK 0x003F
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c296ab6..6304aed 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3978,6 +3978,12 @@ static void hclge_update_reset_level(struct hclge_dev *hdev)
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
enum hnae3_reset_type reset_level;
+ /* reset request will not be set during reset, so clear
+ * pending reset request to avoid unnecessary reset
+ * caused by the same reason.
+ */
+ hclge_get_reset_level(ae_dev, &hdev->reset_request);
+
/* if default_reset_request has a higher level reset request,
* it should be handled as soon as possible. since some errors
* need this kind of reset to fix.
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 5512ffe..8e5f9dc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -533,7 +533,7 @@ static void hclge_get_link_mode(struct hclge_vport *vport,
unsigned long advertising;
unsigned long supported;
unsigned long send_data;
- u8 msg_data[10];
+ u8 msg_data[10] = {};
u8 dest_vfid;
advertising = hdev->hw.mac.advertising[0];
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 08e88d9..1231c34 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -255,6 +255,8 @@ void hclge_mac_start_phy(struct hclge_dev *hdev)
if (!phydev)
return;
+ phy_loopback(phydev, false);
+
phy_start(phydev);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 9067cd3..85d3dd3a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1144,7 +1144,6 @@ static inline bool i40e_is_sw_dcb(struct i40e_pf *pf)
return !!(pf->flags & I40E_FLAG_DISABLE_FW_LLDP);
}
-void i40e_set_lldp_forwarding(struct i40e_pf *pf, bool enable);
#ifdef CONFIG_I40E_DCB
void i40e_dcbnl_flush_apps(struct i40e_pf *pf,
struct i40e_dcbx_config *old_cfg,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index ce626ea..140b677 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1566,8 +1566,10 @@ enum i40e_aq_phy_type {
I40E_PHY_TYPE_25GBASE_LR = 0x22,
I40E_PHY_TYPE_25GBASE_AOC = 0x23,
I40E_PHY_TYPE_25GBASE_ACC = 0x24,
- I40E_PHY_TYPE_2_5GBASE_T = 0x30,
- I40E_PHY_TYPE_5GBASE_T = 0x31,
+ I40E_PHY_TYPE_2_5GBASE_T = 0x26,
+ I40E_PHY_TYPE_5GBASE_T = 0x27,
+ I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS = 0x30,
+ I40E_PHY_TYPE_5GBASE_T_LINK_STATUS = 0x31,
I40E_PHY_TYPE_MAX,
I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD,
I40E_PHY_TYPE_EMPTY = 0xFE,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index a2dba32..32f3fac 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -375,6 +375,7 @@ void i40e_client_subtask(struct i40e_pf *pf)
clear_bit(__I40E_CLIENT_INSTANCE_OPENED,
&cdev->state);
i40e_client_del_instance(pf);
+ return;
}
}
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 41b813f..67cb0b4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1154,8 +1154,8 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
break;
case I40E_PHY_TYPE_100BASE_TX:
case I40E_PHY_TYPE_1000BASE_T:
- case I40E_PHY_TYPE_2_5GBASE_T:
- case I40E_PHY_TYPE_5GBASE_T:
+ case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS:
+ case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS:
case I40E_PHY_TYPE_10GBASE_T:
media = I40E_MEDIA_TYPE_BASET;
break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 040a014..ccd5b94 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -841,8 +841,8 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
10000baseT_Full);
break;
case I40E_PHY_TYPE_10GBASE_T:
- case I40E_PHY_TYPE_5GBASE_T:
- case I40E_PHY_TYPE_2_5GBASE_T:
+ case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS:
+ case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS:
case I40E_PHY_TYPE_1000BASE_T:
case I40E_PHY_TYPE_100BASE_TX:
ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
@@ -1409,7 +1409,8 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
memset(&config, 0, sizeof(config));
config.phy_type = abilities.phy_type;
- config.abilities = abilities.abilities;
+ config.abilities = abilities.abilities |
+ I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
config.phy_type_ext = abilities.phy_type_ext;
config.link_speed = abilities.link_speed;
config.eee_capability = abilities.eee_capability;
@@ -5281,7 +5282,6 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
i40e_aq_cfg_lldp_mib_change_event(&pf->hw, false, NULL);
i40e_aq_stop_lldp(&pf->hw, true, false, NULL);
} else {
- i40e_set_lldp_forwarding(pf, false);
status = i40e_aq_start_lldp(&pf->hw, false, NULL);
if (status) {
adq_err = pf->hw.aq.asq_last_status;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index c2d145a..704e4748 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6880,40 +6880,6 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
#endif /* CONFIG_I40E_DCB */
/**
- * i40e_set_lldp_forwarding - set forwarding of lldp frames
- * @pf: PF being configured
- * @enable: if forwarding to OS shall be enabled
- *
- * Toggle forwarding of lldp frames behavior,
- * When passing DCB control from firmware to software
- * lldp frames must be forwarded to the software based
- * lldp agent.
- */
-void i40e_set_lldp_forwarding(struct i40e_pf *pf, bool enable)
-{
- if (pf->lan_vsi == I40E_NO_VSI)
- return;
-
- if (!pf->vsi[pf->lan_vsi])
- return;
-
- /* No need to check the outcome, commands may fail
- * if desired value is already set
- */
- i40e_aq_add_rem_control_packet_filter(&pf->hw, NULL, ETH_P_LLDP,
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX |
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC,
- pf->vsi[pf->lan_vsi]->seid, 0,
- enable, NULL, NULL);
-
- i40e_aq_add_rem_control_packet_filter(&pf->hw, NULL, ETH_P_LLDP,
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX |
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC,
- pf->vsi[pf->lan_vsi]->seid, 0,
- enable, NULL, NULL);
-}
-
-/**
* i40e_print_link_message - print link up or down
* @vsi: the VSI for which link needs a message
* @isup: true of link is up, false otherwise
@@ -10736,10 +10702,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
*/
i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
pf->main_vsi_seid);
-#ifdef CONFIG_I40E_DCB
- if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)
- i40e_set_lldp_forwarding(pf, true);
-#endif /* CONFIG_I40E_DCB */
/* restart the VSIs that were rebuilt and running before the reset */
i40e_pf_unquiesce_all_vsi(pf);
@@ -15772,10 +15734,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
pf->main_vsi_seid);
-#ifdef CONFIG_I40E_DCB
- if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)
- i40e_set_lldp_forwarding(pf, true);
-#endif /* CONFIG_I40E_DCB */
if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 121cd99..de70c16 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1961,10 +1961,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
union i40e_rx_desc *rx_desc)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
/* ERR_MASK will only have valid bits if EOP set, and
* what we are doing here is actually checking
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
@@ -2534,7 +2530,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
}
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
@@ -2547,7 +2543,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
if (i40e_is_non_eop(rx_ring, rx_desc))
continue;
- if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
+ if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
skb = NULL;
continue;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 5c10faac..c81109a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -239,11 +239,8 @@ struct i40e_phy_info {
#define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \
I40E_PHY_TYPE_OFFSET)
/* Offset for 2.5G/5G PHY Types value to bit number conversion */
-#define I40E_PHY_TYPE_OFFSET2 (-10)
-#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T + \
- I40E_PHY_TYPE_OFFSET2)
-#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T + \
- I40E_PHY_TYPE_OFFSET2)
+#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T)
+#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T)
#define I40E_HW_CAP_MAX_GPIO 30
/* Capabilities of a PF or a VF or the whole device */
struct i40e_hw_capabilities {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index 7846a21..1f6bc0c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -535,6 +535,16 @@ mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
u16 erif_index = 0;
int err;
+ /* Add the eRIF */
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+ erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ err = mr->mr_ops->route_erif_add(mlxsw_sp,
+ rve->mr_route->route_priv,
+ erif_index);
+ if (err)
+ return err;
+ }
+
/* Update the route action, as the new eVIF can be a tunnel or a pimreg
* device which will require updating the action.
*/
@@ -544,17 +554,7 @@ mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
rve->mr_route->route_priv,
route_action);
if (err)
- return err;
- }
-
- /* Add the eRIF */
- if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
- erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
- err = mr->mr_ops->route_erif_add(mlxsw_sp,
- rve->mr_route->route_priv,
- erif_index);
- if (err)
- goto err_route_erif_add;
+ goto err_route_action_update;
}
/* Update the minimum MTU */
@@ -572,14 +572,14 @@ mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
return 0;
err_route_min_mtu_update:
- if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
- mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
- erif_index);
-err_route_erif_add:
if (route_action != rve->mr_route->route_action)
mr->mr_ops->route_action_update(mlxsw_sp,
rve->mr_route->route_priv,
rve->mr_route->route_action);
+err_route_action_update:
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
+ erif_index);
return err;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 95864f0..f35c03c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -642,6 +642,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
value &= ~GMAC_PACKET_FILTER_PCF;
value &= ~GMAC_PACKET_FILTER_PM;
value &= ~GMAC_PACKET_FILTER_PR;
+ value &= ~GMAC_PACKET_FILTER_RA;
if (dev->flags & IFF_PROMISC) {
/* VLAN Tag Filter Fail Packets Queuing */
if (hw->vlan_fail_q_en) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index a602d16..5be8e6a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -232,7 +232,7 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
u32 channel, int fifosz, u8 qmode)
{
unsigned int rqs = fifosz / 256 - 1;
- u32 mtl_rx_op, mtl_rx_int;
+ u32 mtl_rx_op;
mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
@@ -293,11 +293,6 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
}
writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(channel));
-
- /* Enable MTL RX overflow */
- mtl_rx_int = readl(ioaddr + MTL_CHAN_INT_CTRL(channel));
- writel(mtl_rx_int | MTL_RX_OVERFLOW_INT_EN,
- ioaddr + MTL_CHAN_INT_CTRL(channel));
}
static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 2cc9175..6d5e0f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -564,7 +564,6 @@ struct stmmac_mode_ops {
#define stmmac_clean_desc3(__priv, __args...) \
stmmac_do_void_callback(__priv, mode, clean_desc3, __args)
-struct stmmac_priv;
struct tc_cls_u32_offload;
struct tc_cbs_qopt_offload;
struct flow_cls_offload;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a9a984c..345b4c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3180,6 +3180,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
char *name;
clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+ clear_bit(__FPE_REMOVING, &priv->fpe_task_state);
name = priv->wq_name;
sprintf(name, "%s-fpe", priv->dev->name);
@@ -5586,7 +5587,6 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
/* To handle GMAC own interrupts */
if ((priv->plat->has_gmac) || xmac) {
int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
- int mtl_status;
if (unlikely(status)) {
/* For LPI we need to save the tx status */
@@ -5597,17 +5597,8 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
}
for (queue = 0; queue < queues_count; queue++) {
- struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-
- mtl_status = stmmac_host_mtl_irq_status(priv, priv->hw,
- queue);
- if (mtl_status != -EINVAL)
- status |= mtl_status;
-
- if (status & CORE_IRQ_MTL_RX_OVERFLOW)
- stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
- rx_q->rx_tail_addr,
- queue);
+ status = stmmac_host_mtl_irq_status(priv, priv->hw,
+ queue);
}
/* PCS link status */
diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 9f06663..e374079 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -211,8 +211,8 @@ static void gsi_irq_setup(struct gsi *gsi)
iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
/* The inter-EE registers are in the non-adjusted address range */
- iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_CH_IRQ_OFFSET);
- iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_EV_CH_IRQ_OFFSET);
+ iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET);
+ iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET);
iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
}
diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h
index b4ac025..cb42c5a 100644
--- a/drivers/net/ipa/gsi_reg.h
+++ b/drivers/net/ipa/gsi_reg.h
@@ -53,15 +53,15 @@
#define GSI_EE_REG_ADJUST 0x0000d000 /* IPA v4.5+ */
/* The two inter-EE IRQ register offsets are relative to gsi->virt_raw */
-#define GSI_INTER_EE_SRC_CH_IRQ_OFFSET \
- GSI_INTER_EE_N_SRC_CH_IRQ_OFFSET(GSI_EE_AP)
-#define GSI_INTER_EE_N_SRC_CH_IRQ_OFFSET(ee) \
- (0x0000c018 + 0x1000 * (ee))
+#define GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET \
+ GSI_INTER_EE_N_SRC_CH_IRQ_MSK_OFFSET(GSI_EE_AP)
+#define GSI_INTER_EE_N_SRC_CH_IRQ_MSK_OFFSET(ee) \
+ (0x0000c020 + 0x1000 * (ee))
-#define GSI_INTER_EE_SRC_EV_CH_IRQ_OFFSET \
- GSI_INTER_EE_N_SRC_EV_CH_IRQ_OFFSET(GSI_EE_AP)
-#define GSI_INTER_EE_N_SRC_EV_CH_IRQ_OFFSET(ee) \
- (0x0000c01c + 0x1000 * (ee))
+#define GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET \
+ GSI_INTER_EE_N_SRC_EV_CH_IRQ_MSK_OFFSET(GSI_EE_AP)
+#define GSI_INTER_EE_N_SRC_EV_CH_IRQ_MSK_OFFSET(ee) \
+ (0x0000c024 + 0x1000 * (ee))
/* All other register offsets are relative to gsi->virt */
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 0b2cccb..e6721c1 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1088,6 +1088,38 @@ static int m88e1011_set_tunable(struct phy_device *phydev,
}
}
+static int m88e1112_config_init(struct phy_device *phydev)
+{
+ int err;
+
+ err = m88e1011_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
+
+ return m88e1111_config_init(phydev);
+}
+
+static int m88e1111gbe_config_init(struct phy_device *phydev)
+{
+ int err;
+
+ err = m88e1111_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
+
+ return m88e1111_config_init(phydev);
+}
+
+static int marvell_1011gbe_config_init(struct phy_device *phydev)
+{
+ int err;
+
+ err = m88e1011_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
+
+ return marvell_config_init(phydev);
+}
static int m88e1116r_config_init(struct phy_device *phydev)
{
int err;
@@ -1168,6 +1200,9 @@ static int m88e1510_config_init(struct phy_device *phydev)
if (err < 0)
return err;
}
+ err = m88e1011_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
return m88e1318_config_init(phydev);
}
@@ -1320,6 +1355,9 @@ static int m88e1145_config_init(struct phy_device *phydev)
if (err < 0)
return err;
}
+ err = m88e1111_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
err = marvell_of_reg_init(phydev);
if (err < 0)
@@ -2698,7 +2736,7 @@ static struct phy_driver marvell_drivers[] = {
.name = "Marvell 88E1112",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1112_config_init,
.config_aneg = marvell_config_aneg,
.config_intr = marvell_config_intr,
.handle_interrupt = marvell_handle_interrupt,
@@ -2718,7 +2756,7 @@ static struct phy_driver marvell_drivers[] = {
.name = "Marvell 88E1111",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1111gbe_config_init,
.config_aneg = m88e1111_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -2739,7 +2777,7 @@ static struct phy_driver marvell_drivers[] = {
.name = "Marvell 88E1111 (Finisar)",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1111gbe_config_init,
.config_aneg = m88e1111_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -2779,7 +2817,7 @@ static struct phy_driver marvell_drivers[] = {
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1121_hwmon_ops),
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1121_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -2859,7 +2897,7 @@ static struct phy_driver marvell_drivers[] = {
.name = "Marvell 88E1240",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1112_config_init,
.config_aneg = marvell_config_aneg,
.config_intr = marvell_config_intr,
.handle_interrupt = marvell_handle_interrupt,
@@ -2929,7 +2967,7 @@ static struct phy_driver marvell_drivers[] = {
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -2955,7 +2993,7 @@ static struct phy_driver marvell_drivers[] = {
.probe = marvell_probe,
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -3000,7 +3038,7 @@ static struct phy_driver marvell_drivers[] = {
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e6390_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -3026,7 +3064,7 @@ static struct phy_driver marvell_drivers[] = {
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e6390_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -3052,7 +3090,7 @@ static struct phy_driver marvell_drivers[] = {
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -3077,7 +3115,7 @@ static struct phy_driver marvell_drivers[] = {
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
.probe = marvell_probe,
/* PHY_GBIT_FEATURES */
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
@@ -3099,7 +3137,7 @@ static struct phy_driver marvell_drivers[] = {
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
.probe = marvell_probe,
.features = PHY_GBIT_FIBRE_FEATURES,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 4d9dc7d..0720f5f 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -415,7 +415,7 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev)
if (pad > 0) { /* Pad the frame with zeros */
if (__skb_pad(skb, pad, false))
- goto out;
+ goto drop;
skb_put(skb, pad);
}
}
@@ -448,9 +448,8 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
drop:
- kfree_skb(skb);
-out:
dev->stats.tx_dropped++;
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b6f7815..522c9b2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -576,6 +576,11 @@ static void nvme_free_ns(struct kref *kref)
kfree(ns);
}
+static inline bool nvme_get_ns(struct nvme_ns *ns)
+{
+ return kref_get_unless_zero(&ns->kref);
+}
+
void nvme_put_ns(struct nvme_ns *ns)
{
kref_put(&ns->kref, nvme_free_ns);
@@ -584,9 +589,6 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);
static inline void nvme_clear_nvme_request(struct request *req)
{
- struct nvme_command *cmd = nvme_req(req)->cmd;
-
- memset(cmd, 0, sizeof(*cmd));
nvme_req(req)->retries = 0;
nvme_req(req)->flags = 0;
req->rq_flags |= RQF_DONTPREP;
@@ -637,6 +639,66 @@ static struct request *nvme_alloc_request_qid(struct request_queue *q,
return req;
}
+/*
+ * For something we're not in a state to send to the device the default action
+ * is to busy it and retry it after the controller state is recovered. However,
+ * if the controller is deleting or if anything is marked for failfast or
+ * nvme multipath it is immediately failed.
+ *
+ * Note: commands used to initialize the controller will be marked for failfast.
+ * Note: nvme cli/ioctl commands are marked for failfast.
+ */
+blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
+ struct request *rq)
+{
+ if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
+ ctrl->state != NVME_CTRL_DEAD &&
+ !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
+ !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
+ return BLK_STS_RESOURCE;
+ return nvme_host_path_error(rq);
+}
+EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
+
+bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+ bool queue_live)
+{
+ struct nvme_request *req = nvme_req(rq);
+
+ /*
+ * currently we have a problem sending passthru commands
+ * on the admin_q if the controller is not LIVE because we can't
+ * make sure that they are going out after the admin connect,
+ * controller enable and/or other commands in the initialization
+ * sequence. until the controller will be LIVE, fail with
+ * BLK_STS_RESOURCE so that they will be rescheduled.
+ */
+ if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
+ return false;
+
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
+ /*
+ * Only allow commands on a live queue, except for the connect
+ * command, which is require to set the queue live in the
+ * appropinquate states.
+ */
+ switch (ctrl->state) {
+ case NVME_CTRL_CONNECTING:
+ if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
+ req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
+ return true;
+ break;
+ default:
+ break;
+ case NVME_CTRL_DEAD:
+ return false;
+ }
+ }
+
+ return queue_live;
+}
+EXPORT_SYMBOL_GPL(__nvme_check_ready);
+
static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
{
struct nvme_command c;
@@ -898,8 +960,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
struct nvme_command *cmd = nvme_req(req)->cmd;
blk_status_t ret = BLK_STS_OK;
- if (!(req->rq_flags & RQF_DONTPREP))
+ if (!(req->rq_flags & RQF_DONTPREP)) {
nvme_clear_nvme_request(req);
+ memset(cmd, 0, sizeof(*cmd));
+ }
switch (req_op(req)) {
case REQ_OP_DRV_IN:
@@ -1494,7 +1558,7 @@ static int nvme_ns_open(struct nvme_ns *ns)
/* should never be called due to GENHD_FL_HIDDEN */
if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head)))
goto fail;
- if (!kref_get_unless_zero(&ns->kref))
+ if (!nvme_get_ns(ns))
goto fail;
if (!try_module_get(ns->ctrl->ops->module))
goto fail_put_ns;
@@ -1999,28 +2063,6 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops,
};
-#ifdef CONFIG_NVME_MULTIPATH
-struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys)
-{
- struct nvme_ctrl *ctrl;
- int ret;
-
- ret = mutex_lock_killable(&nvme_subsystems_lock);
- if (ret)
- return ERR_PTR(ret);
- list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
- if (ctrl->state == NVME_CTRL_LIVE)
- goto found;
- }
- mutex_unlock(&nvme_subsystems_lock);
- return ERR_PTR(-EWOULDBLOCK);
-found:
- nvme_get_ctrl(ctrl);
- mutex_unlock(&nvme_subsystems_lock);
- return ctrl;
-}
-#endif /* CONFIG_NVME_MULTIPATH */
-
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
{
unsigned long timeout =
@@ -3604,7 +3646,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->head->ns_id == nsid) {
- if (!kref_get_unless_zero(&ns->kref))
+ if (!nvme_get_ns(ns))
continue;
ret = ns;
break;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 13c2747..a2bb7fc 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -533,63 +533,6 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
return NULL;
}
-/*
- * For something we're not in a state to send to the device the default action
- * is to busy it and retry it after the controller state is recovered. However,
- * if the controller is deleting or if anything is marked for failfast or
- * nvme multipath it is immediately failed.
- *
- * Note: commands used to initialize the controller will be marked for failfast.
- * Note: nvme cli/ioctl commands are marked for failfast.
- */
-blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
- struct request *rq)
-{
- if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
- ctrl->state != NVME_CTRL_DEAD &&
- !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
- !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
- return BLK_STS_RESOURCE;
- return nvme_host_path_error(rq);
-}
-EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
-
-bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live)
-{
- struct nvme_request *req = nvme_req(rq);
-
- /*
- * currently we have a problem sending passthru commands
- * on the admin_q if the controller is not LIVE because we can't
- * make sure that they are going out after the admin connect,
- * controller enable and/or other commands in the initialization
- * sequence. until the controller will be LIVE, fail with
- * BLK_STS_RESOURCE so that they will be rescheduled.
- */
- if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
- return false;
-
- /*
- * Only allow commands on a live queue, except for the connect command,
- * which is require to set the queue live in the appropinquate states.
- */
- switch (ctrl->state) {
- case NVME_CTRL_CONNECTING:
- if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
- req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
- return true;
- break;
- default:
- break;
- case NVME_CTRL_DEAD:
- return false;
- }
-
- return queue_live;
-}
-EXPORT_SYMBOL_GPL(__nvmf_check_ready);
-
static const match_table_t opt_tokens = {
{ NVMF_OPT_TRANSPORT, "transport=%s" },
{ NVMF_OPT_TRADDR, "traddr=%s" },
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 888b108..d7f7974 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -184,20 +184,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
-blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
- struct request *rq);
-bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live);
bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts);
-static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live)
-{
- if (likely(ctrl->state == NVME_CTRL_LIVE ||
- ctrl->state == NVME_CTRL_DELETING))
- return true;
- return __nvmf_check_ready(ctrl, rq, queue_live);
-}
-
#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9b9b7be..d9ab9e7 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2766,8 +2766,8 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t ret;
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
- !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_setup_cmd(ns, rq);
if (ret)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 502f8e4a..9557ead 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -370,41 +370,45 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
#ifdef CONFIG_NVME_MULTIPATH
-static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head,
- unsigned int cmd, void __user *argp)
+static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
+ void __user *argp, struct nvme_ns_head *head, int srcu_idx)
{
- struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys);
+ struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
- ret = nvme_ctrl_ioctl(ctrl, cmd, argp);
+ nvme_get_ctrl(ns->ctrl);
+ nvme_put_ns_from_disk(head, srcu_idx);
+ ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+
nvme_put_ctrl(ctrl);
return ret;
}
-static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head,
- unsigned int cmd, void __user *argp)
-{
- int srcu_idx = srcu_read_lock(&head->srcu);
- struct nvme_ns *ns = nvme_find_path(head);
- int ret = -EWOULDBLOCK;
-
- if (ns)
- ret = nvme_ns_ioctl(ns, cmd, argp);
- srcu_read_unlock(&head->srcu, srcu_idx);
- return ret;
-}
-
int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ struct nvme_ns_head *head = NULL;
void __user *argp = (void __user *)arg;
+ struct nvme_ns *ns;
+ int srcu_idx, ret;
+ ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+ if (unlikely(!ns))
+ return -EWOULDBLOCK;
+
+ /*
+ * Handle ioctls that apply to the controller instead of the namespace
+ * seperately and drop the ns SRCU reference early. This avoids a
+ * deadlock when deleting namespaces using the passthrough interface.
+ */
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
- return nvme_ns_head_ns_ioctl(head, cmd, argp);
+ ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ else {
+ ret = nvme_ns_ioctl(ns, cmd, argp);
+ nvme_put_ns_from_disk(head, srcu_idx);
+ }
+
+ return ret;
}
long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
@@ -414,10 +418,23 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
struct nvme_ns_head *head =
container_of(cdev, struct nvme_ns_head, cdev);
void __user *argp = (void __user *)arg;
+ struct nvme_ns *ns;
+ int srcu_idx, ret;
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+ if (!ns) {
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return -EWOULDBLOCK;
+ }
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
- return nvme_ns_head_ns_ioctl(head, cmd, argp);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+
+ ret = nvme_ns_ioctl(ns, cmd, argp);
+ nvme_put_ns_from_disk(head, srcu_idx);
+
+ return ret;
}
#endif /* CONFIG_NVME_MULTIPATH */
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0d0de34..0551796 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -70,6 +70,7 @@ void nvme_failover_req(struct request *req)
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status & 0x7ff;
unsigned long flags;
+ struct bio *bio;
nvme_mpath_clear_current_path(ns);
@@ -84,6 +85,8 @@ void nvme_failover_req(struct request *req)
}
spin_lock_irqsave(&ns->head->requeue_lock, flags);
+ for (bio = req->bio; bio; bio = bio->bi_next)
+ bio_set_dev(bio, ns->head->disk->part0);
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 773dde5b..05f31a2 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -638,6 +638,21 @@ struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, blk_mq_req_flags_t flags);
void nvme_cleanup_cmd(struct request *req);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
+blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
+ struct request *req);
+bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+ bool queue_live);
+
+static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+ bool queue_live)
+{
+ if (likely(ctrl->state == NVME_CTRL_LIVE))
+ return true;
+ if (ctrl->ops->flags & NVME_F_FABRICS &&
+ ctrl->state == NVME_CTRL_DELETING)
+ return true;
+ return __nvme_check_ready(ctrl, rq, queue_live);
+}
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
@@ -664,7 +679,6 @@ struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
bool nvme_tryget_ns_head(struct nvme_ns_head *head);
void nvme_put_ns_head(struct nvme_ns_head *head);
-struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys);
int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
const struct file_operations *fops, struct module *owner);
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 09d4c5f..a29b170 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -933,6 +933,9 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
return BLK_STS_IOERR;
+ if (!nvme_check_ready(&dev->ctrl, req, true))
+ return nvme_fail_nonready_command(&dev->ctrl, req);
+
ret = nvme_setup_cmd(ns, req);
if (ret)
return ret;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 660c774..37943dc 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2050,8 +2050,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
WARN_ON_ONCE(rq->tag < 0);
- if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
dev = queue->device->dev;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 75435cd..0222e23 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2338,8 +2338,8 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
blk_status_t ret;
- if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_tcp_setup_cmd_pdu(ns, rq);
if (unlikely(ret))
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index d2a26ff..e7a367c 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -307,7 +307,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
case NVME_LOG_ANA:
return nvmet_execute_get_log_page_ana(req);
}
- pr_err("unhandled lid %d on qid %d\n",
+ pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
@@ -659,7 +659,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
return nvmet_execute_identify_desclist(req);
}
- pr_err("unhandled identify cns %d on qid %d\n",
+ pr_debug("unhandled identify cns %d on qid %d\n",
req->cmd->identify.cns, req->sq->qid);
req->error_loc = offsetof(struct nvme_identify, cns);
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
@@ -977,7 +977,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
return 0;
}
- pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+ pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 6665da3..74b3b15 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -138,8 +138,8 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
blk_status_t ret;
- if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req);
+ if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, req);
ret = nvme_setup_cmd(ns, req);
if (ret)
diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c
index 651a36b..983ba98 100644
--- a/drivers/pinctrl/pinctrl-ingenic.c
+++ b/drivers/pinctrl/pinctrl-ingenic.c
@@ -11,6 +11,7 @@
#include <linux/gpio/driver.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/kernel.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
@@ -3854,6 +3855,8 @@ static int __init ingenic_pinctrl_probe(struct platform_device *pdev)
return 0;
}
+#define IF_ENABLED(cfg, ptr) PTR_IF(IS_ENABLED(cfg), (ptr))
+
static const struct of_device_id ingenic_pinctrl_of_match[] = {
{
.compatible = "ingenic,jz4730-pinctrl",
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.c b/drivers/platform/chrome/cros_ec_lpc_mec.c
index 9035b17..bbc2884 100644
--- a/drivers/platform/chrome/cros_ec_lpc_mec.c
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.c
@@ -14,7 +14,7 @@
* This mutex must be held while accessing the EMI unit. We can't rely on the
* EC mutex because memmap data may be accessed without it being held.
*/
-static struct mutex io_mutex;
+static DEFINE_MUTEX(io_mutex);
static u16 mec_emi_base, mec_emi_end;
/**
@@ -142,7 +142,6 @@ EXPORT_SYMBOL(cros_ec_lpc_io_bytes_mec);
void cros_ec_lpc_mec_init(unsigned int base, unsigned int end)
{
- mutex_init(&io_mutex);
mec_emi_base = base;
mec_emi_end = end;
}
diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c
index 0811562..27c068c 100644
--- a/drivers/platform/chrome/cros_ec_typec.c
+++ b/drivers/platform/chrome/cros_ec_typec.c
@@ -58,6 +58,7 @@ struct cros_typec_port {
/* Variables keeping track of switch state. */
struct typec_mux_state state;
uint8_t mux_flags;
+ uint8_t role;
/* Port alt modes. */
struct typec_altmode p_altmode[CROS_EC_ALTMODE_MAX];
@@ -220,6 +221,9 @@ static void cros_typec_remove_partner(struct cros_typec_data *typec,
{
struct cros_typec_port *port = typec->ports[port_num];
+ if (!port->partner)
+ return;
+
cros_typec_unregister_altmodes(typec, port_num, true);
cros_typec_usb_disconnect_state(port);
@@ -235,6 +239,9 @@ static void cros_typec_remove_cable(struct cros_typec_data *typec,
{
struct cros_typec_port *port = typec->ports[port_num];
+ if (!port->cable)
+ return;
+
cros_typec_unregister_altmodes(typec, port_num, false);
typec_unregister_plug(port->plug);
@@ -253,11 +260,8 @@ static void cros_unregister_ports(struct cros_typec_data *typec)
if (!typec->ports[i])
continue;
- if (typec->ports[i]->partner)
- cros_typec_remove_partner(typec, i);
-
- if (typec->ports[i]->cable)
- cros_typec_remove_cable(typec, i);
+ cros_typec_remove_partner(typec, i);
+ cros_typec_remove_cable(typec, i);
usb_role_switch_put(typec->ports[i]->role_sw);
typec_switch_put(typec->ports[i]->ori_sw);
@@ -483,6 +487,11 @@ static int cros_typec_enable_dp(struct cros_typec_data *typec,
return -ENOTSUPP;
}
+ if (!pd_ctrl->dp_mode) {
+ dev_err(typec->dev, "No valid DP mode provided.\n");
+ return -EINVAL;
+ }
+
/* Status VDO. */
dp_data.status = DP_STATUS_ENABLED;
if (port->mux_flags & USB_PD_MUX_HPD_IRQ)
@@ -647,11 +656,8 @@ static void cros_typec_set_port_params_v1(struct cros_typec_data *typec,
"Failed to register partner on port: %d\n",
port_num);
} else {
- if (typec->ports[port_num]->partner)
- cros_typec_remove_partner(typec, port_num);
-
- if (typec->ports[port_num]->cable)
- cros_typec_remove_cable(typec, port_num);
+ cros_typec_remove_partner(typec, port_num);
+ cros_typec_remove_cable(typec, port_num);
}
}
@@ -905,6 +911,19 @@ static void cros_typec_handle_status(struct cros_typec_data *typec, int port_num
return;
}
+ /* If we got a hard reset, unregister everything and return. */
+ if (resp.events & PD_STATUS_EVENT_HARD_RESET) {
+ cros_typec_remove_partner(typec, port_num);
+ cros_typec_remove_cable(typec, port_num);
+
+ ret = cros_typec_send_clear_event(typec, port_num,
+ PD_STATUS_EVENT_HARD_RESET);
+ if (ret < 0)
+ dev_warn(typec->dev,
+ "Failed hard reset event clear, port: %d\n", port_num);
+ return;
+ }
+
/* Handle any events appropriately. */
if (resp.events & PD_STATUS_EVENT_SOP_DISC_DONE && !typec->ports[port_num]->sop_disc_done) {
u16 sop_revision;
@@ -995,10 +1014,12 @@ static int cros_typec_port_update(struct cros_typec_data *typec, int port_num)
}
/* No change needs to be made, let's exit early. */
- if (typec->ports[port_num]->mux_flags == mux_resp.flags)
+ if (typec->ports[port_num]->mux_flags == mux_resp.flags &&
+ typec->ports[port_num]->role == resp.role)
return 0;
typec->ports[port_num]->mux_flags = mux_resp.flags;
+ typec->ports[port_num]->role = resp.role;
ret = cros_typec_configure_mux(typec, port_num, mux_resp.flags, &resp);
if (ret)
dev_warn(typec->dev, "Configure muxes failed, err = %d\n", ret);
@@ -1027,8 +1048,8 @@ static int cros_typec_get_cmd_version(struct cros_typec_data *typec)
else
typec->pd_ctrl_ver = 0;
- dev_dbg(typec->dev, "PD Control has version mask 0x%hhx\n",
- typec->pd_ctrl_ver);
+ dev_dbg(typec->dev, "PD Control has version mask 0x%02x\n",
+ typec->pd_ctrl_ver & 0xff);
return 0;
}
diff --git a/drivers/platform/chrome/cros_usbpd_notify.c b/drivers/platform/chrome/cros_usbpd_notify.c
index 7f36142..48a6617 100644
--- a/drivers/platform/chrome/cros_usbpd_notify.c
+++ b/drivers/platform/chrome/cros_usbpd_notify.c
@@ -220,7 +220,8 @@ static int cros_usbpd_notify_plat(struct notifier_block *nb,
if (!host_event)
return NOTIFY_DONE;
- if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU)) {
+ if (host_event & (EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU) |
+ EC_HOST_EVENT_MASK(EC_HOST_EVENT_USB_MUX))) {
cros_usbpd_get_event_and_notify(pdnotify->dev, ec_dev);
return NOTIFY_OK;
}
diff --git a/drivers/platform/chrome/wilco_ec/telemetry.c b/drivers/platform/chrome/wilco_ec/telemetry.c
index e06d96f..60da7a2 100644
--- a/drivers/platform/chrome/wilco_ec/telemetry.c
+++ b/drivers/platform/chrome/wilco_ec/telemetry.c
@@ -256,7 +256,7 @@ static int telem_open(struct inode *inode, struct file *filp)
sess_data->dev_data = dev_data;
sess_data->has_msg = false;
- nonseekable_open(inode, filp);
+ stream_open(inode, filp);
filp->private_data = sess_data;
return 0;
diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h
index ca24a78..7365121 100644
--- a/drivers/s390/block/dasd_eckd.h
+++ b/drivers/s390/block/dasd_eckd.h
@@ -52,7 +52,7 @@
#define DASD_ECKD_CCW_RCD 0xFA
#define DASD_ECKD_CCW_DSO 0xF7
-/* Define Subssystem Function / Orders */
+/* Define Subsystem Function / Orders */
#define DSO_ORDER_RAS 0x81
/*
@@ -110,7 +110,7 @@
#define DASD_ECKD_PG_GROUPED 0x10
/*
- * Size that is reportet for large volumes in the old 16-bit no_cyl field
+ * Size that is reported for large volumes in the old 16-bit no_cyl field
*/
#define LV_COMPAT_CYL 0xFFFE
@@ -555,7 +555,7 @@ struct dasd_dso_ras_ext_range {
} __packed;
/*
- * Define Subsytem Operation - Release Allocated Space
+ * Define Subsystem Operation - Release Allocated Space
*/
struct dasd_dso_ras_data {
__u8 order;
@@ -676,7 +676,7 @@ struct dasd_eckd_private {
struct dasd_ext_pool_sum eps;
u32 real_cyl;
- /* alias managemnet */
+ /* alias management */
struct dasd_uid uid;
struct alias_pav_group *pavgroup;
struct alias_lcu *lcu;
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index e619a82..762cc8b 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -102,7 +102,7 @@ static const char *fnic_fcpio_status_to_str(unsigned int status)
return fcpio_status_str[status];
}
-static void fnic_cleanup_io(struct fnic *fnic, int exclude_id);
+static void fnic_cleanup_io(struct fnic *fnic);
static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
struct scsi_cmnd *sc)
@@ -638,7 +638,7 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic,
atomic64_inc(&reset_stats->fw_reset_completions);
/* Clean up all outstanding io requests */
- fnic_cleanup_io(fnic, SCSI_NO_TAG);
+ fnic_cleanup_io(fnic);
atomic64_set(&fnic->fnic_stats.fw_stats.active_fw_reqs, 0);
atomic64_set(&fnic->fnic_stats.io_stats.active_ios, 0);
@@ -1361,93 +1361,90 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
return wq_work_done;
}
-static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
+static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data,
+ bool reserved)
{
- int i;
+ struct fnic *fnic = data;
struct fnic_io_req *io_req;
unsigned long flags = 0;
- struct scsi_cmnd *sc;
spinlock_t *io_lock;
unsigned long start_time = 0;
struct fnic_stats *fnic_stats = &fnic->fnic_stats;
- for (i = 0; i < fnic->fnic_max_tag_id; i++) {
- if (i == exclude_id)
- continue;
+ io_lock = fnic_io_lock_tag(fnic, sc->request->tag);
+ spin_lock_irqsave(io_lock, flags);
- io_lock = fnic_io_lock_tag(fnic, i);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, i);
- if (!sc) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- io_req = (struct fnic_io_req *)CMD_SP(sc);
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
- /*
- * We will be here only when FW completes reset
- * without sending completions for outstanding ios.
- */
- CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
- if (io_req && io_req->dr_done)
- complete(io_req->dr_done);
- else if (io_req && io_req->abts_done)
- complete(io_req->abts_done);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if (!io_req) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- CMD_SP(sc) = NULL;
-
- spin_unlock_irqrestore(io_lock, flags);
-
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+ !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
/*
- * If there is a scsi_cmnd associated with this io_req, then
- * free the corresponding state
+ * We will be here only when FW completes reset
+ * without sending completions for outstanding ios.
*/
- start_time = io_req->start_time;
- fnic_release_ioreq_buf(fnic, io_req, sc);
- mempool_free(io_req, fnic->io_req_pool);
-
- sc->result = DID_TRANSPORT_DISRUPTED << 16;
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "%s: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n",
- __func__, sc->request->tag, sc,
- (jiffies - start_time));
-
- if (atomic64_read(&fnic->io_cmpl_skip))
- atomic64_dec(&fnic->io_cmpl_skip);
- else
- atomic64_inc(&fnic_stats->io_stats.io_completions);
-
- /* Complete the command to SCSI */
- if (sc->scsi_done) {
- if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED))
- shost_printk(KERN_ERR, fnic->lport->host,
- "Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n",
- sc->request->tag, sc);
-
- FNIC_TRACE(fnic_cleanup_io,
- sc->device->host->host_no, i, sc,
- jiffies_to_msecs(jiffies - start_time),
- 0, ((u64)sc->cmnd[0] << 32 |
- (u64)sc->cmnd[2] << 24 |
- (u64)sc->cmnd[3] << 16 |
- (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
- (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
-
- sc->scsi_done(sc);
- }
+ CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
+ if (io_req && io_req->dr_done)
+ complete(io_req->dr_done);
+ else if (io_req && io_req->abts_done)
+ complete(io_req->abts_done);
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
}
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ goto cleanup_scsi_cmd;
+ }
+
+ CMD_SP(sc) = NULL;
+
+ spin_unlock_irqrestore(io_lock, flags);
+
+ /*
+ * If there is a scsi_cmnd associated with this io_req, then
+ * free the corresponding state
+ */
+ start_time = io_req->start_time;
+ fnic_release_ioreq_buf(fnic, io_req, sc);
+ mempool_free(io_req, fnic->io_req_pool);
+
+cleanup_scsi_cmd:
+ sc->result = DID_TRANSPORT_DISRUPTED << 16;
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "fnic_cleanup_io: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n",
+ sc->request->tag, sc, (jiffies - start_time));
+
+ if (atomic64_read(&fnic->io_cmpl_skip))
+ atomic64_dec(&fnic->io_cmpl_skip);
+ else
+ atomic64_inc(&fnic_stats->io_stats.io_completions);
+
+ /* Complete the command to SCSI */
+ if (sc->scsi_done) {
+ if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED))
+ shost_printk(KERN_ERR, fnic->lport->host,
+ "Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n",
+ sc->request->tag, sc);
+
+ FNIC_TRACE(fnic_cleanup_io,
+ sc->device->host->host_no, sc->request->tag, sc,
+ jiffies_to_msecs(jiffies - start_time),
+ 0, ((u64)sc->cmnd[0] << 32 |
+ (u64)sc->cmnd[2] << 24 |
+ (u64)sc->cmnd[3] << 16 |
+ (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+ (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
+ sc->scsi_done(sc);
+ }
+ return true;
+}
+
+static void fnic_cleanup_io(struct fnic *fnic)
+{
+ scsi_host_busy_iter(fnic->lport->host,
+ fnic_cleanup_io_iter, fnic);
}
void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
@@ -1558,20 +1555,121 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
return 0;
}
-static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+struct fnic_rport_abort_io_iter_data {
+ struct fnic *fnic;
+ u32 port_id;
+ int term_cnt;
+};
+
+static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data,
+ bool reserved)
{
- int tag;
- int abt_tag;
- int term_cnt = 0;
+ struct fnic_rport_abort_io_iter_data *iter_data = data;
+ struct fnic *fnic = iter_data->fnic;
+ int abt_tag = sc->request->tag;
struct fnic_io_req *io_req;
spinlock_t *io_lock;
unsigned long flags;
- struct scsi_cmnd *sc;
struct reset_stats *reset_stats = &fnic->fnic_stats.reset_stats;
struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats;
struct scsi_lun fc_lun;
enum fnic_ioreq_state old_ioreq_state;
+ io_lock = fnic_io_lock_tag(fnic, abt_tag);
+ spin_lock_irqsave(io_lock, flags);
+
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+
+ if (!io_req || io_req->port_id != iter_data->port_id) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+
+ if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+ (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "fnic_rport_exch_reset dev rst not pending sc 0x%p\n",
+ sc);
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+
+ /*
+ * Found IO that is still pending with firmware and
+ * belongs to rport that went away
+ */
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+ if (io_req->abts_done) {
+ shost_printk(KERN_ERR, fnic->lport->host,
+ "fnic_rport_exch_reset: io_req->abts_done is set "
+ "state is %s\n",
+ fnic_ioreq_state_to_str(CMD_STATE(sc)));
+ }
+
+ if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
+ shost_printk(KERN_ERR, fnic->lport->host,
+ "rport_exch_reset "
+ "IO not yet issued %p tag 0x%x flags "
+ "%x state %d\n",
+ sc, abt_tag, CMD_FLAGS(sc), CMD_STATE(sc));
+ }
+ old_ioreq_state = CMD_STATE(sc);
+ CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+ CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+ atomic64_inc(&reset_stats->device_reset_terminates);
+ abt_tag |= FNIC_TAG_DEV_RST;
+ }
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "fnic_rport_exch_reset dev rst sc 0x%p\n", sc);
+ BUG_ON(io_req->abts_done);
+
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "fnic_rport_reset_exch: Issuing abts\n");
+
+ spin_unlock_irqrestore(io_lock, flags);
+
+ /* Now queue the abort command to firmware */
+ int_to_scsilun(sc->device->lun, &fc_lun);
+
+ if (fnic_queue_abort_io_req(fnic, abt_tag,
+ FCPIO_ITMF_ABT_TASK_TERM,
+ fc_lun.scsi_lun, io_req)) {
+ /*
+ * Revert the cmd state back to old state, if
+ * it hasn't changed in between. This cmd will get
+ * aborted later by scsi_eh, or cleaned up during
+ * lun reset
+ */
+ spin_lock_irqsave(io_lock, flags);
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+ CMD_STATE(sc) = old_ioreq_state;
+ spin_unlock_irqrestore(io_lock, flags);
+ } else {
+ spin_lock_irqsave(io_lock, flags);
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+ CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+ else
+ CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+ spin_unlock_irqrestore(io_lock, flags);
+ atomic64_inc(&term_stats->terminates);
+ iter_data->term_cnt++;
+ }
+ return true;
+}
+
+static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+{
+ struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats;
+ struct fnic_rport_abort_io_iter_data iter_data = {
+ .fnic = fnic,
+ .port_id = port_id,
+ .term_cnt = 0,
+ };
+
FNIC_SCSI_DBG(KERN_DEBUG,
fnic->lport->host,
"fnic_rport_exch_reset called portid 0x%06x\n",
@@ -1580,121 +1678,18 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
if (fnic->in_remove)
return;
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- abt_tag = tag;
- io_lock = fnic_io_lock_tag(fnic, tag);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- if (!sc) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- io_req = (struct fnic_io_req *)CMD_SP(sc);
-
- if (!io_req || io_req->port_id != port_id) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_rport_exch_reset dev rst not pending sc 0x%p\n",
- sc);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- /*
- * Found IO that is still pending with firmware and
- * belongs to rport that went away
- */
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if (io_req->abts_done) {
- shost_printk(KERN_ERR, fnic->lport->host,
- "fnic_rport_exch_reset: io_req->abts_done is set "
- "state is %s\n",
- fnic_ioreq_state_to_str(CMD_STATE(sc)));
- }
-
- if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
- shost_printk(KERN_ERR, fnic->lport->host,
- "rport_exch_reset "
- "IO not yet issued %p tag 0x%x flags "
- "%x state %d\n",
- sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
- }
- old_ioreq_state = CMD_STATE(sc);
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
- CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- atomic64_inc(&reset_stats->device_reset_terminates);
- abt_tag = (tag | FNIC_TAG_DEV_RST);
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_rport_exch_reset dev rst sc 0x%p\n",
- sc);
- }
-
- BUG_ON(io_req->abts_done);
-
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_rport_reset_exch: Issuing abts\n");
-
- spin_unlock_irqrestore(io_lock, flags);
-
- /* Now queue the abort command to firmware */
- int_to_scsilun(sc->device->lun, &fc_lun);
-
- if (fnic_queue_abort_io_req(fnic, abt_tag,
- FCPIO_ITMF_ABT_TASK_TERM,
- fc_lun.scsi_lun, io_req)) {
- /*
- * Revert the cmd state back to old state, if
- * it hasn't changed in between. This cmd will get
- * aborted later by scsi_eh, or cleaned up during
- * lun reset
- */
- spin_lock_irqsave(io_lock, flags);
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- CMD_STATE(sc) = old_ioreq_state;
- spin_unlock_irqrestore(io_lock, flags);
- } else {
- spin_lock_irqsave(io_lock, flags);
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
- CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
- else
- CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
- spin_unlock_irqrestore(io_lock, flags);
- atomic64_inc(&term_stats->terminates);
- term_cnt++;
- }
- }
- if (term_cnt > atomic64_read(&term_stats->max_terminates))
- atomic64_set(&term_stats->max_terminates, term_cnt);
+ scsi_host_busy_iter(fnic->lport->host, fnic_rport_abort_io_iter,
+ &iter_data);
+ if (iter_data.term_cnt > atomic64_read(&term_stats->max_terminates))
+ atomic64_set(&term_stats->max_terminates, iter_data.term_cnt);
}
void fnic_terminate_rport_io(struct fc_rport *rport)
{
- int tag;
- int abt_tag;
- int term_cnt = 0;
- struct fnic_io_req *io_req;
- spinlock_t *io_lock;
- unsigned long flags;
- struct scsi_cmnd *sc;
- struct scsi_lun fc_lun;
struct fc_rport_libfc_priv *rdata;
struct fc_lport *lport;
struct fnic *fnic;
- struct fc_rport *cmd_rport;
- struct reset_stats *reset_stats;
- struct terminate_stats *term_stats;
- enum fnic_ioreq_state old_ioreq_state;
if (!rport) {
printk(KERN_ERR "fnic_terminate_rport_io: rport is NULL\n");
@@ -1722,108 +1717,7 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
if (fnic->in_remove)
return;
- reset_stats = &fnic->fnic_stats.reset_stats;
- term_stats = &fnic->fnic_stats.term_stats;
-
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- abt_tag = tag;
- io_lock = fnic_io_lock_tag(fnic, tag);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- if (!sc) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- io_req = (struct fnic_io_req *)CMD_SP(sc);
- if (!io_req) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- cmd_rport = starget_to_rport(scsi_target(sc->device));
- if (rport != cmd_rport) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_terminate_rport_io dev rst not pending sc 0x%p\n",
- sc);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- /*
- * Found IO that is still pending with firmware and
- * belongs to rport that went away
- */
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if (io_req->abts_done) {
- shost_printk(KERN_ERR, fnic->lport->host,
- "fnic_terminate_rport_io: io_req->abts_done is set "
- "state is %s\n",
- fnic_ioreq_state_to_str(CMD_STATE(sc)));
- }
- if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "fnic_terminate_rport_io "
- "IO not yet issued %p tag 0x%x flags "
- "%x state %d\n",
- sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
- }
- old_ioreq_state = CMD_STATE(sc);
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
- CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- atomic64_inc(&reset_stats->device_reset_terminates);
- abt_tag = (tag | FNIC_TAG_DEV_RST);
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_terminate_rport_io dev rst sc 0x%p\n", sc);
- }
-
- BUG_ON(io_req->abts_done);
-
- FNIC_SCSI_DBG(KERN_DEBUG,
- fnic->lport->host,
- "fnic_terminate_rport_io: Issuing abts\n");
-
- spin_unlock_irqrestore(io_lock, flags);
-
- /* Now queue the abort command to firmware */
- int_to_scsilun(sc->device->lun, &fc_lun);
-
- if (fnic_queue_abort_io_req(fnic, abt_tag,
- FCPIO_ITMF_ABT_TASK_TERM,
- fc_lun.scsi_lun, io_req)) {
- /*
- * Revert the cmd state back to old state, if
- * it hasn't changed in between. This cmd will get
- * aborted later by scsi_eh, or cleaned up during
- * lun reset
- */
- spin_lock_irqsave(io_lock, flags);
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- CMD_STATE(sc) = old_ioreq_state;
- spin_unlock_irqrestore(io_lock, flags);
- } else {
- spin_lock_irqsave(io_lock, flags);
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
- CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
- else
- CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
- spin_unlock_irqrestore(io_lock, flags);
- atomic64_inc(&term_stats->terminates);
- term_cnt++;
- }
- }
- if (term_cnt > atomic64_read(&term_stats->max_terminates))
- atomic64_set(&term_stats->max_terminates, term_cnt);
-
+ fnic_rport_exch_reset(fnic, rport->port_id);
}
/*
@@ -2118,6 +2012,156 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
return ret;
}
+struct fnic_pending_aborts_iter_data {
+ struct fnic *fnic;
+ struct scsi_cmnd *lr_sc;
+ struct scsi_device *lun_dev;
+ int ret;
+};
+
+static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc,
+ void *data, bool reserved)
+{
+ struct fnic_pending_aborts_iter_data *iter_data = data;
+ struct fnic *fnic = iter_data->fnic;
+ struct scsi_device *lun_dev = iter_data->lun_dev;
+ int abt_tag = sc->request->tag;
+ struct fnic_io_req *io_req;
+ spinlock_t *io_lock;
+ unsigned long flags;
+ struct scsi_lun fc_lun;
+ DECLARE_COMPLETION_ONSTACK(tm_done);
+ enum fnic_ioreq_state old_ioreq_state;
+
+ if (sc == iter_data->lr_sc || sc->device != lun_dev)
+ return true;
+ if (reserved)
+ return true;
+
+ io_lock = fnic_io_lock_tag(fnic, abt_tag);
+ spin_lock_irqsave(io_lock, flags);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+
+ /*
+ * Found IO that is still pending with firmware and
+ * belongs to the LUN that we are resetting
+ */
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "Found IO in %s on lun\n",
+ fnic_ioreq_state_to_str(CMD_STATE(sc)));
+
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+ if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+ (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+ "%s dev rst not pending sc 0x%p\n", __func__,
+ sc);
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+
+ if (io_req->abts_done)
+ shost_printk(KERN_ERR, fnic->lport->host,
+ "%s: io_req->abts_done is set state is %s\n",
+ __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
+ old_ioreq_state = CMD_STATE(sc);
+ /*
+ * Any pending IO issued prior to reset is expected to be
+ * in abts pending state, if not we need to set
+ * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
+ * When IO is completed, the IO will be handed over and
+ * handled in this function.
+ */
+ CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+
+ BUG_ON(io_req->abts_done);
+
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+ abt_tag |= FNIC_TAG_DEV_RST;
+ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+ "%s: dev rst sc 0x%p\n", __func__, sc);
+ }
+
+ CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+ io_req->abts_done = &tm_done;
+ spin_unlock_irqrestore(io_lock, flags);
+
+ /* Now queue the abort command to firmware */
+ int_to_scsilun(sc->device->lun, &fc_lun);
+
+ if (fnic_queue_abort_io_req(fnic, abt_tag,
+ FCPIO_ITMF_ABT_TASK_TERM,
+ fc_lun.scsi_lun, io_req)) {
+ spin_lock_irqsave(io_lock, flags);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (io_req)
+ io_req->abts_done = NULL;
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+ CMD_STATE(sc) = old_ioreq_state;
+ spin_unlock_irqrestore(io_lock, flags);
+ iter_data->ret = FAILED;
+ return false;
+ } else {
+ spin_lock_irqsave(io_lock, flags);
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+ CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+ spin_unlock_irqrestore(io_lock, flags);
+ }
+ CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+
+ wait_for_completion_timeout(&tm_done, msecs_to_jiffies
+ (fnic->config.ed_tov));
+
+ /* Recheck cmd state to check if it is now aborted */
+ spin_lock_irqsave(io_lock, flags);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
+ return true;
+ }
+
+ io_req->abts_done = NULL;
+
+ /* if abort is still pending with fw, fail */
+ if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
+ spin_unlock_irqrestore(io_lock, flags);
+ CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
+ iter_data->ret = FAILED;
+ return false;
+ }
+ CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
+
+ /* original sc used for lr is handled by dev reset code */
+ if (sc != iter_data->lr_sc)
+ CMD_SP(sc) = NULL;
+ spin_unlock_irqrestore(io_lock, flags);
+
+ /* original sc used for lr is handled by dev reset code */
+ if (sc != iter_data->lr_sc) {
+ fnic_release_ioreq_buf(fnic, io_req, sc);
+ mempool_free(io_req, fnic->io_req_pool);
+ }
+
+ /*
+ * Any IO is returned during reset, it needs to call scsi_done
+ * to return the scsi_cmnd to upper layer.
+ */
+ if (sc->scsi_done) {
+ /* Set result to let upper SCSI layer retry */
+ sc->result = DID_RESET << 16;
+ sc->scsi_done(sc);
+ }
+ return true;
+}
+
/*
* Clean up any pending aborts on the lun
* For each outstanding IO on this lun, whose abort is not completed by fw,
@@ -2126,157 +2170,25 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
*/
static int fnic_clean_pending_aborts(struct fnic *fnic,
struct scsi_cmnd *lr_sc,
- bool new_sc)
+ bool new_sc)
{
- int tag, abt_tag;
- struct fnic_io_req *io_req;
- spinlock_t *io_lock;
- unsigned long flags;
- int ret = 0;
- struct scsi_cmnd *sc;
- struct scsi_lun fc_lun;
- struct scsi_device *lun_dev = lr_sc->device;
- DECLARE_COMPLETION_ONSTACK(tm_done);
- enum fnic_ioreq_state old_ioreq_state;
+ int ret = SUCCESS;
+ struct fnic_pending_aborts_iter_data iter_data = {
+ .fnic = fnic,
+ .lun_dev = lr_sc->device,
+ .ret = SUCCESS,
+ };
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- io_lock = fnic_io_lock_tag(fnic, tag);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- /*
- * ignore this lun reset cmd if issued using new SC
- * or cmds that do not belong to this lun
- */
- if (!sc || ((sc == lr_sc) && new_sc) || sc->device != lun_dev) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
+ if (new_sc)
+ iter_data.lr_sc = lr_sc;
- io_req = (struct fnic_io_req *)CMD_SP(sc);
-
- if (!io_req || sc->device != lun_dev) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- /*
- * Found IO that is still pending with firmware and
- * belongs to the LUN that we are resetting
- */
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "Found IO in %s on lun\n",
- fnic_ioreq_state_to_str(CMD_STATE(sc)));
-
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "%s dev rst not pending sc 0x%p\n", __func__,
- sc);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- if (io_req->abts_done)
- shost_printk(KERN_ERR, fnic->lport->host,
- "%s: io_req->abts_done is set state is %s\n",
- __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
- old_ioreq_state = CMD_STATE(sc);
- /*
- * Any pending IO issued prior to reset is expected to be
- * in abts pending state, if not we need to set
- * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
- * When IO is completed, the IO will be handed over and
- * handled in this function.
- */
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
-
- BUG_ON(io_req->abts_done);
-
- abt_tag = tag;
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- abt_tag |= FNIC_TAG_DEV_RST;
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "%s: dev rst sc 0x%p\n", __func__, sc);
- }
-
- CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
- io_req->abts_done = &tm_done;
- spin_unlock_irqrestore(io_lock, flags);
-
- /* Now queue the abort command to firmware */
- int_to_scsilun(sc->device->lun, &fc_lun);
-
- if (fnic_queue_abort_io_req(fnic, abt_tag,
- FCPIO_ITMF_ABT_TASK_TERM,
- fc_lun.scsi_lun, io_req)) {
- spin_lock_irqsave(io_lock, flags);
- io_req = (struct fnic_io_req *)CMD_SP(sc);
- if (io_req)
- io_req->abts_done = NULL;
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- CMD_STATE(sc) = old_ioreq_state;
- spin_unlock_irqrestore(io_lock, flags);
- ret = 1;
- goto clean_pending_aborts_end;
- } else {
- spin_lock_irqsave(io_lock, flags);
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
- CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
- spin_unlock_irqrestore(io_lock, flags);
- }
- CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
-
- wait_for_completion_timeout(&tm_done,
- msecs_to_jiffies
- (fnic->config.ed_tov));
-
- /* Recheck cmd state to check if it is now aborted */
- spin_lock_irqsave(io_lock, flags);
- io_req = (struct fnic_io_req *)CMD_SP(sc);
- if (!io_req) {
- spin_unlock_irqrestore(io_lock, flags);
- CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
- continue;
- }
-
- io_req->abts_done = NULL;
-
- /* if abort is still pending with fw, fail */
- if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
- spin_unlock_irqrestore(io_lock, flags);
- CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
- ret = 1;
- goto clean_pending_aborts_end;
- }
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
-
- /* original sc used for lr is handled by dev reset code */
- if (sc != lr_sc)
- CMD_SP(sc) = NULL;
- spin_unlock_irqrestore(io_lock, flags);
-
- /* original sc used for lr is handled by dev reset code */
- if (sc != lr_sc) {
- fnic_release_ioreq_buf(fnic, io_req, sc);
- mempool_free(io_req, fnic->io_req_pool);
- }
-
- /*
- * Any IO is returned during reset, it needs to call scsi_done
- * to return the scsi_cmnd to upper layer.
- */
- if (sc->scsi_done) {
- /* Set result to let upper SCSI layer retry */
- sc->result = DID_RESET << 16;
- sc->scsi_done(sc);
- }
+ scsi_host_busy_iter(fnic->lport->host,
+ fnic_pending_aborts_iter, &iter_data);
+ if (iter_data.ret == FAILED) {
+ ret = iter_data.ret;
+ goto clean_pending_aborts_end;
}
-
schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
/* walk again to check, if IOs are still pending in fw */
@@ -2775,6 +2687,49 @@ void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
}
+static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data,
+ bool reserved)
+{
+ struct fnic_pending_aborts_iter_data *iter_data = data;
+ struct fnic *fnic = iter_data->fnic;
+ int cmd_state;
+ struct fnic_io_req *io_req;
+ spinlock_t *io_lock;
+ unsigned long flags;
+
+ /*
+ * ignore this lun reset cmd or cmds that do not belong to
+ * this lun
+ */
+ if (iter_data->lr_sc && sc == iter_data->lr_sc)
+ return true;
+ if (iter_data->lun_dev && sc->device != iter_data->lun_dev)
+ return true;
+
+ io_lock = fnic_io_lock_hash(fnic, sc);
+ spin_lock_irqsave(io_lock, flags);
+
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+
+ /*
+ * Found IO that is still pending with firmware and
+ * belongs to the LUN that we are resetting
+ */
+ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+ "Found IO in %s on lun\n",
+ fnic_ioreq_state_to_str(CMD_STATE(sc)));
+ cmd_state = CMD_STATE(sc);
+ spin_unlock_irqrestore(io_lock, flags);
+ if (cmd_state == FNIC_IOREQ_ABTS_PENDING)
+ iter_data->ret = 1;
+
+ return iter_data->ret ? false : true;
+}
+
/*
* fnic_is_abts_pending() is a helper function that
* walks through tag map to check if there is any IOs pending,if there is one,
@@ -2784,49 +2739,20 @@ void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
*/
int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
{
- int tag;
- struct fnic_io_req *io_req;
- spinlock_t *io_lock;
- unsigned long flags;
- int ret = 0;
- struct scsi_cmnd *sc;
- struct scsi_device *lun_dev = NULL;
+ struct fnic_pending_aborts_iter_data iter_data = {
+ .fnic = fnic,
+ .lun_dev = NULL,
+ .ret = 0,
+ };
- if (lr_sc)
- lun_dev = lr_sc->device;
-
- /* walk again to check, if IOs are still pending in fw */
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- /*
- * ignore this lun reset cmd or cmds that do not belong to
- * this lun
- */
- if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc)))
- continue;
-
- io_lock = fnic_io_lock_hash(fnic, sc);
- spin_lock_irqsave(io_lock, flags);
-
- io_req = (struct fnic_io_req *)CMD_SP(sc);
-
- if (!io_req || sc->device != lun_dev) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- /*
- * Found IO that is still pending with firmware and
- * belongs to the LUN that we are resetting
- */
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "Found IO in %s on lun\n",
- fnic_ioreq_state_to_str(CMD_STATE(sc)));
-
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- ret = 1;
- spin_unlock_irqrestore(io_lock, flags);
+ if (lr_sc) {
+ iter_data.lun_dev = lr_sc->device;
+ iter_data.lr_sc = lr_sc;
}
- return ret;
+ /* walk again to check, if IOs are still pending in fw */
+ scsi_host_busy_iter(fnic->lport->host,
+ fnic_abts_pending_iter, &iter_data);
+
+ return iter_data.ret;
}
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index c2776b8..38cfe1b 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -934,7 +934,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
INIT_LIST_HEAD(&head);
list_add_tail(&head, &piocbq->list);
- ct_req = (struct lpfc_sli_ct_request *)bdeBuf1;
+ ct_req = (struct lpfc_sli_ct_request *)bdeBuf1->virt;
evt_req_id = ct_req->FsType;
cmd = ct_req->CommandResponse.bits.CmdRsp;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 1e4c792..5f018d0 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -254,13 +254,13 @@ lpfc_config_port_prep(struct lpfc_hba *phba)
if (mb->un.varDmp.word_cnt == 0)
break;
- i = mb->un.varDmp.word_cnt * sizeof(uint32_t);
- if (offset + i > DMP_VPD_SIZE)
- i = DMP_VPD_SIZE - offset;
+ if (mb->un.varDmp.word_cnt > DMP_VPD_SIZE - offset)
+ mb->un.varDmp.word_cnt = DMP_VPD_SIZE - offset;
lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
- lpfc_vpd_data + offset, i);
- offset += i;
- } while (offset < DMP_VPD_SIZE);
+ lpfc_vpd_data + offset,
+ mb->un.varDmp.word_cnt);
+ offset += mb->un.varDmp.word_cnt;
+ } while (mb->un.varDmp.word_cnt && offset < DMP_VPD_SIZE);
lpfc_parse_vpd(phba, lpfc_vpd_data, offset);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 06ccc01..573c859 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -11804,13 +11804,20 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
lpfc_ctx_cmd ctx_cmd)
{
struct lpfc_io_buf *lpfc_cmd;
+ IOCB_t *icmd = NULL;
int rc = 1;
if (!iocbq || iocbq->vport != vport)
return rc;
- if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
- !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ))
+ if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
+ !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ) ||
+ iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+ return rc;
+
+ icmd = &iocbq->iocb;
+ if (icmd->ulpCommand == CMD_ABORT_XRI_CN ||
+ icmd->ulpCommand == CMD_CLOSE_XRI_CN)
return rc;
lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
@@ -19770,7 +19777,7 @@ lpfc_sli_get_config_region23(struct lpfc_hba *phba, char *rgn23_data)
LPFC_MBOXQ_t *pmb = NULL;
MAILBOX_t *mb;
uint32_t offset = 0;
- int i, rc;
+ int rc;
if (!rgn23_data)
return 0;
@@ -19801,13 +19808,14 @@ lpfc_sli_get_config_region23(struct lpfc_hba *phba, char *rgn23_data)
if (mb->un.varDmp.word_cnt == 0)
break;
- i = mb->un.varDmp.word_cnt * sizeof(uint32_t);
- if (offset + i > DMP_RGN23_SIZE)
- i = DMP_RGN23_SIZE - offset;
+ if (mb->un.varDmp.word_cnt > DMP_RGN23_SIZE - offset)
+ mb->un.varDmp.word_cnt = DMP_RGN23_SIZE - offset;
+
lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
- rgn23_data + offset, i);
- offset += i;
- } while (offset < DMP_RGN23_SIZE);
+ rgn23_data + offset,
+ mb->un.varDmp.word_cnt);
+ offset += mb->un.varDmp.word_cnt;
+ } while (mb->un.varDmp.word_cnt && offset < DMP_RGN23_SIZE);
mempool_free(pmb, phba->mbox_mem_pool);
return offset;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9c5782e..0de2505 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1195,6 +1195,9 @@ static int qla24xx_post_prli_work(struct scsi_qla_host *vha, fc_port_t *fcport)
{
struct qla_work_evt *e;
+ if (vha->host->active_mode == MODE_TARGET)
+ return QLA_FUNCTION_FAILED;
+
e = qla2x00_alloc_work(vha, QLA_EVT_PRLI);
if (!e)
return QLA_FUNCTION_FAILED;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index d74c32f..4eab564 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -7707,6 +7707,7 @@ struct scsi_host_template qla2xxx_driver_template = {
.eh_timed_out = fc_eh_timed_out,
.eh_abort_handler = qla2xxx_eh_abort,
+ .eh_should_retry_cmd = fc_eh_should_retry_cmd,
.eh_device_reset_handler = qla2xxx_eh_device_reset,
.eh_target_reset_handler = qla2xxx_eh_target_reset,
.eh_bus_reset_handler = qla2xxx_eh_bus_reset,
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 70165be1..a5d1633b 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -218,7 +218,7 @@ static const char *sdebug_version_date = "20200710";
*/
#define SDEBUG_CANQUEUE_WORDS 3 /* a WORD is bits in a long */
#define SDEBUG_CANQUEUE (SDEBUG_CANQUEUE_WORDS * BITS_PER_LONG)
-#define DEF_CMD_PER_LUN 255
+#define DEF_CMD_PER_LUN SDEBUG_CANQUEUE
/* UA - Unit Attention; SA - Service Action; SSU - Start Stop Unit */
#define F_D_IN 1 /* Data-in command (e.g. READ) */
@@ -5695,8 +5695,8 @@ MODULE_PARM_DESC(lbpu, "enable LBP, support UNMAP command (def=0)");
MODULE_PARM_DESC(lbpws, "enable LBP, support WRITE SAME(16) with UNMAP bit (def=0)");
MODULE_PARM_DESC(lbpws10, "enable LBP, support WRITE SAME(10) with UNMAP bit (def=0)");
MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)");
-MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method");
+MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
MODULE_PARM_DESC(max_queue, "max number of queued commands (1 to max(def))");
MODULE_PARM_DESC(medium_error_count, "count of sectors to return follow on MEDIUM error");
MODULE_PARM_DESC(medium_error_start, "starting sector number to return MEDIUM error");
@@ -5710,7 +5710,7 @@ MODULE_PARM_DESC(opt_xferlen_exp, "optimal transfer length granularity exponent
MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout, 8->recovered_err... (def=0)");
MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get new store (def=0)");
MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)");
-MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to max(submit_queues - 1)");
+MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to max(submit_queues - 1))");
MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
MODULE_PARM_DESC(random, "If set, uniformly randomize command duration between 0 and delay_in_ns");
MODULE_PARM_DESC(removable, "claim to have removable media (def=0)");
@@ -7165,12 +7165,15 @@ static int sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
}
num_in_q = atomic_read(&devip->num_in_q);
+ if (qdepth > SDEBUG_CANQUEUE) {
+ qdepth = SDEBUG_CANQUEUE;
+ pr_warn("%s: requested qdepth [%d] exceeds canqueue [%d], trim\n", __func__,
+ qdepth, SDEBUG_CANQUEUE);
+ }
if (qdepth < 1)
qdepth = 1;
- /* allow to exceed max host qc_arr elements for testing */
- if (qdepth > SDEBUG_CANQUEUE + 10)
- qdepth = SDEBUG_CANQUEUE + 10;
- scsi_change_queue_depth(sdev, qdepth);
+ if (qdepth != sdev->queue_depth)
+ scsi_change_queue_depth(sdev, qdepth);
if (SDEBUG_OPT_Q_NOISE & sdebug_opts) {
sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n",
@@ -7558,6 +7561,7 @@ static int sdebug_driver_probe(struct device *dev)
sdbg_host = to_sdebug_host(dev);
sdebug_driver_template.can_queue = sdebug_max_queue;
+ sdebug_driver_template.cmd_per_lun = sdebug_max_queue;
if (!sdebug_clustering)
sdebug_driver_template.dma_boundary = PAGE_SIZE - 1;
@@ -7593,7 +7597,11 @@ static int sdebug_driver_probe(struct device *dev)
* If condition not met, trim poll_queues to 1 (just for simplicity).
*/
if (poll_queues >= submit_queues) {
- pr_warn("%s: trim poll_queues to 1\n", my_name);
+ if (submit_queues < 3)
+ pr_warn("%s: trim poll_queues to 1\n", my_name);
+ else
+ pr_warn("%s: trim poll_queues to 1. Perhaps try poll_queues=%d\n",
+ my_name, submit_queues - 1);
poll_queues = 1;
}
if (poll_queues)
diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c
index d7c3cff..5d0e98a 100644
--- a/drivers/scsi/ufs/ufs-sysfs.c
+++ b/drivers/scsi/ufs/ufs-sysfs.c
@@ -9,7 +9,7 @@
#include "ufs.h"
#include "ufs-sysfs.h"
-static const char *ufschd_uic_link_state_to_string(
+static const char *ufshcd_uic_link_state_to_string(
enum uic_link_state state)
{
switch (state) {
@@ -21,7 +21,7 @@ static const char *ufschd_uic_link_state_to_string(
}
}
-static const char *ufschd_ufs_dev_pwr_mode_to_string(
+static const char *ufshcd_ufs_dev_pwr_mode_to_string(
enum ufs_dev_pwr_mode state)
{
switch (state) {
@@ -81,7 +81,7 @@ static ssize_t rpm_target_dev_state_show(struct device *dev,
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_ufs_dev_pwr_mode_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_ufs_dev_pwr_mode_to_string(
ufs_pm_lvl_states[hba->rpm_lvl].dev_state));
}
@@ -90,7 +90,7 @@ static ssize_t rpm_target_link_state_show(struct device *dev,
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_uic_link_state_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_uic_link_state_to_string(
ufs_pm_lvl_states[hba->rpm_lvl].link_state));
}
@@ -113,7 +113,7 @@ static ssize_t spm_target_dev_state_show(struct device *dev,
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_ufs_dev_pwr_mode_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_ufs_dev_pwr_mode_to_string(
ufs_pm_lvl_states[hba->spm_lvl].dev_state));
}
@@ -122,7 +122,7 @@ static ssize_t spm_target_link_state_show(struct device *dev,
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_uic_link_state_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_uic_link_state_to_string(
ufs_pm_lvl_states[hba->spm_lvl].link_state));
}
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 0625da7..3eb5493 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -8593,7 +8593,7 @@ static void ufshcd_vreg_set_lpm(struct ufs_hba *hba)
} else if (!ufshcd_is_ufs_dev_active(hba)) {
ufshcd_toggle_vreg(hba->dev, hba->vreg_info.vcc, false);
vcc_off = true;
- if (!ufshcd_is_link_active(hba)) {
+ if (ufshcd_is_link_hibern8(hba) || ufshcd_is_link_off(hba)) {
ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq);
ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq2);
}
@@ -8615,7 +8615,7 @@ static int ufshcd_vreg_set_hpm(struct ufs_hba *hba)
!hba->dev_info.is_lu_power_on_wp) {
ret = ufshcd_setup_vreg(hba, true);
} else if (!ufshcd_is_ufs_dev_active(hba)) {
- if (!ret && !ufshcd_is_link_active(hba)) {
+ if (!ufshcd_is_link_active(hba)) {
ret = ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq);
if (ret)
goto vcc_disable;
@@ -8975,10 +8975,13 @@ int ufshcd_system_suspend(struct ufs_hba *hba)
if (!hba->is_powered)
return 0;
+ cancel_delayed_work_sync(&hba->rpm_dev_flush_recheck_work);
+
if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) ==
hba->curr_dev_pwr_mode) &&
(ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) ==
hba->uic_link_state) &&
+ pm_runtime_suspended(hba->dev) &&
!hba->dev_info.b_rpm_dev_flush_capable)
goto out;
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index eec2fd5..198d25ae 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1413,7 +1413,7 @@ static int tcmu_run_tmr_queue(struct tcmu_dev *udev)
return 1;
}
-static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
+static bool tcmu_handle_completions(struct tcmu_dev *udev)
{
struct tcmu_mailbox *mb;
struct tcmu_cmd *cmd;
@@ -1456,7 +1456,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
pr_err("cmd_id %u not found, ring is broken\n",
entry->hdr.cmd_id);
set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
- break;
+ return false;
}
tcmu_handle_completion(cmd, entry);
diff --git a/drivers/tty/vt/.gitignore b/drivers/tty/vt/.gitignore
index 3ecf422..0221709 100644
--- a/drivers/tty/vt/.gitignore
+++ b/drivers/tty/vt/.gitignore
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-conmakehash
-consolemap_deftbl.c
-defkeymap.c
+/conmakehash
+/consolemap_deftbl.c
+/defkeymap.c
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 372b52a..072780b 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -733,7 +733,7 @@ static int fb_seq_show(struct seq_file *m, void *v)
return 0;
}
-static const struct seq_operations proc_fb_seq_ops = {
+static const struct seq_operations __maybe_unused proc_fb_seq_ops = {
.start = fb_seq_start,
.next = fb_seq_next,
.stop = fb_seq_stop,
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 39def02..cdb9950 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -583,7 +583,7 @@ static struct attribute *v9fs_attrs[] = {
NULL,
};
-static struct attribute_group v9fs_attr_group = {
+static const struct attribute_group v9fs_attr_group = {
.attrs = v9fs_attrs,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 649f04f..59c32c9 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -86,8 +86,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
* to work.
*/
writeback_fid = v9fs_writeback_fid(file_dentry(file));
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
+ if (IS_ERR(writeback_fid)) {
+ err = PTR_ERR(writeback_fid);
mutex_unlock(&v9inode->v_mutex);
goto out_error;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index 89a750d2..141a856 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -338,8 +338,8 @@
default y
config NFS_V4_2_SSC_HELPER
- tristate
- default y if NFS_V4=y || NFS_FS=y
+ bool
+ default y if NFS_V4_2
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9114e0a..b8abccd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1677,6 +1677,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
struct blk_plug plug;
+ size_t shorted = 0;
ssize_t ret;
if (bdev_read_only(I_BDEV(bd_inode)))
@@ -1694,12 +1695,17 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
return -EOPNOTSUPP;
- iov_iter_truncate(from, size - iocb->ki_pos);
+ size -= iocb->ki_pos;
+ if (iov_iter_count(from) > size) {
+ shorted = iov_iter_count(from) - size;
+ iov_iter_truncate(from, size);
+ }
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
+ iov_iter_reexpand(from, iov_iter_count(from) + shorted);
blk_finish_plug(&plug);
return ret;
}
@@ -1711,13 +1717,21 @@ ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
loff_t pos = iocb->ki_pos;
+ size_t shorted = 0;
+ ssize_t ret;
if (pos >= size)
return 0;
size -= pos;
- iov_iter_truncate(to, size);
- return generic_file_read_iter(iocb, to);
+ if (iov_iter_count(to) > size) {
+ shorted = iov_iter_count(to) - size;
+ iov_iter_truncate(to, size);
+ }
+
+ ret = generic_file_read_iter(iocb, to);
+ iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+ return ret;
}
EXPORT_SYMBOL_GPL(blkdev_read_iter);
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 3bcf881..5d21cd9 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -1021,6 +1021,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
}
ctx->max_channels = result.uint_32;
+ /* If more than one channel requested ... they want multichan */
+ if (result.uint_32 > 1)
+ ctx->multichannel = true;
break;
case Opt_handletimeout:
ctx->handle_timeout = result.uint_32;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 63d517b..a92a1fb 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -97,6 +97,12 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
return 0;
}
+ if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ cifs_dbg(VFS, "server %s does not support multichannel\n", ses->server->hostname);
+ ses->chan_max = 1;
+ return 0;
+ }
+
/*
* Make a copy of the iface list at the time and use that
* instead so as to not hold the iface spinlock for opening
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e36c2a8..a8bf431 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -841,6 +841,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
req->SecurityMode = 0;
req->Capabilities = cpu_to_le32(server->vals->req_capabilities);
+ if (ses->chan_max > 1)
+ req->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL);
/* ClientGUID must be zero for SMB2.02 dialect */
if (server->vals->protocol_id == SMB20_PROT_ID)
@@ -1032,6 +1034,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
pneg_inbuf->Capabilities =
cpu_to_le32(server->vals->req_capabilities);
+ if (tcon->ses->chan_max > 1)
+ pneg_inbuf->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL);
+
memcpy(pneg_inbuf->Guid, server->client_guid,
SMB2_CLIENT_GUID_SIZE);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 360f813..f46acbb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -251,7 +251,7 @@ struct io_rsrc_data {
struct io_buffer {
struct list_head list;
__u64 addr;
- __s32 len;
+ __u32 len;
__u16 bid;
};
@@ -456,6 +456,7 @@ struct io_ring_ctx {
spinlock_t rsrc_ref_lock;
struct io_rsrc_node *rsrc_node;
struct io_rsrc_node *rsrc_backup_node;
+ struct io_mapped_ubuf *dummy_ubuf;
struct io_restriction restrictions;
@@ -702,7 +703,8 @@ enum {
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
- REQ_F_FAIL_LINK_BIT,
+ /* first byte is taken by user flags, shift it to not overlap */
+ REQ_F_FAIL_LINK_BIT = 8,
REQ_F_INFLIGHT_BIT,
REQ_F_CUR_POS_BIT,
REQ_F_NOWAIT_BIT,
@@ -1157,6 +1159,12 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
goto err;
__hash_init(ctx->cancel_hash, 1U << hash_bits);
+ ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL);
+ if (!ctx->dummy_ubuf)
+ goto err;
+ /* set invalid range, so io_import_fixed() fails meeting it */
+ ctx->dummy_ubuf->ubuf = -1UL;
+
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
goto err;
@@ -1184,6 +1192,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list);
return ctx;
err:
+ kfree(ctx->dummy_ubuf);
kfree(ctx->cancel_hash);
kfree(ctx);
return NULL;
@@ -3977,7 +3986,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
break;
buf->addr = addr;
- buf->len = pbuf->len;
+ buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
buf->bid = bid;
addr += pbuf->len;
bid++;
@@ -6503,14 +6512,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->work.creds = NULL;
/* enforce forwards compatibility on users */
- if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) {
- req->flags = 0;
+ if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
return -EINVAL;
- }
-
if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
-
if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
return -EACCES;
@@ -7539,6 +7544,7 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
io_ring_submit_lock(ctx, lock_ring);
spin_lock_irqsave(&ctx->completion_lock, flags);
io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
+ ctx->cq_extra++;
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
@@ -8111,11 +8117,13 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
struct io_mapped_ubuf *imu = *slot;
unsigned int i;
- for (i = 0; i < imu->nr_bvecs; i++)
- unpin_user_page(imu->bvec[i].bv_page);
- if (imu->acct_pages)
- io_unaccount_mem(ctx, imu->acct_pages);
- kvfree(imu);
+ if (imu != ctx->dummy_ubuf) {
+ for (i = 0; i < imu->nr_bvecs; i++)
+ unpin_user_page(imu->bvec[i].bv_page);
+ if (imu->acct_pages)
+ io_unaccount_mem(ctx, imu->acct_pages);
+ kvfree(imu);
+ }
*slot = NULL;
}
@@ -8132,7 +8140,7 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
for (i = 0; i < ctx->nr_user_bufs; i++)
io_buffer_unmap(ctx, &ctx->user_bufs[i]);
kfree(ctx->user_bufs);
- kfree(ctx->buf_data);
+ io_rsrc_data_free(ctx->buf_data);
ctx->user_bufs = NULL;
ctx->buf_data = NULL;
ctx->nr_user_bufs = 0;
@@ -8255,6 +8263,11 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
size_t size;
int ret, pret, nr_pages, i;
+ if (!iov->iov_base) {
+ *pimu = ctx->dummy_ubuf;
+ return 0;
+ }
+
ubuf = (unsigned long) iov->iov_base;
end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
start = ubuf >> PAGE_SHIFT;
@@ -8352,7 +8365,9 @@ static int io_buffer_validate(struct iovec *iov)
* constraints here, we'll -EINVAL later when IO is
* submitted if they are wrong.
*/
- if (!iov->iov_base || !iov->iov_len)
+ if (!iov->iov_base)
+ return iov->iov_len ? -EFAULT : 0;
+ if (!iov->iov_len)
return -EFAULT;
/* arbitrary limit, but we need something */
@@ -8385,7 +8400,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
return -ENOMEM;
ret = io_buffers_map_alloc(ctx, nr_args);
if (ret) {
- kfree(data);
+ io_rsrc_data_free(data);
return ret;
}
@@ -8402,6 +8417,10 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
ret = io_buffer_validate(&iov);
if (ret)
break;
+ if (!iov.iov_base && tag) {
+ ret = -EINVAL;
+ break;
+ }
ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
&last_hpage);
@@ -8451,12 +8470,16 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
err = io_buffer_validate(&iov);
if (err)
break;
+ if (!iov.iov_base && tag) {
+ err = -EINVAL;
+ break;
+ }
err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
if (err)
break;
i = array_index_nospec(offset, ctx->nr_user_bufs);
- if (ctx->user_bufs[i]) {
+ if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
err = io_queue_rsrc_removal(ctx->buf_data, offset,
ctx->rsrc_node, ctx->user_bufs[i]);
if (unlikely(err)) {
@@ -8604,6 +8627,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
if (ctx->hash_map)
io_wq_put_hash(ctx->hash_map);
kfree(ctx->cancel_hash);
+ kfree(ctx->dummy_ubuf);
kfree(ctx);
}
@@ -9607,7 +9631,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
if (ret)
goto err;
/* always set a rsrc node */
- io_rsrc_node_switch_start(ctx);
+ ret = io_rsrc_node_switch_start(ctx);
+ if (ret)
+ goto err;
io_rsrc_node_switch(ctx, NULL);
memset(&p->sq_off, 0, sizeof(p->sq_off));
@@ -10136,6 +10162,13 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
+ BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
+ sizeof(struct io_uring_rsrc_update));
+ BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
+ sizeof(struct io_uring_rsrc_update2));
+ /* should fit into one byte */
+ BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
+
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f7786e0..ed9d580 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -137,12 +137,12 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
if (!pnfs_layout_is_valid(lo))
continue;
- if (stateid != NULL &&
- !nfs4_stateid_match_other(stateid, &lo->plh_stateid))
+ if (!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
continue;
- if (!nfs_sb_active(server->super))
- continue;
- inode = igrab(lo->plh_inode);
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
rcu_read_unlock();
if (inode)
return inode;
@@ -176,9 +176,10 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp,
continue;
if (nfsi->layout != lo)
continue;
- if (!nfs_sb_active(server->super))
- continue;
- inode = igrab(lo->plh_inode);
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
rcu_read_unlock();
if (inode)
return inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ff5c4d0..cfeaadf 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -476,7 +476,6 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_maxval = to->to_initval;
to->to_exponential = 0;
break;
-#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS;
@@ -487,7 +486,6 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_maxval = NFS_MAX_UDP_TIMEOUT;
to->to_exponential = 1;
break;
-#endif
default:
BUG();
}
@@ -698,9 +696,18 @@ static int nfs_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
- server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
- NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
- NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+ server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+
+ switch (clp->rpc_ops->version) {
+ case 2:
+ server->fattr_valid = NFS_ATTR_FATTR_V2;
+ break;
+ case 3:
+ server->fattr_valid = NFS_ATTR_FATTR_V3;
+ break;
+ default:
+ server->fattr_valid = NFS_ATTR_FATTR_V4;
+ }
if (ctx->rsize)
server->rsize = nfs_block_size(ctx->rsize, NULL);
@@ -794,6 +801,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->maxfilesize = fsinfo->maxfilesize;
server->time_delta = fsinfo->time_delta;
+ server->change_attr_type = fsinfo->change_attr_type;
server->clone_blksize = fsinfo->clone_blksize;
/* We're airborne Set socket buffersize */
@@ -935,6 +943,8 @@ struct nfs_server *nfs_alloc_server(void)
return NULL;
}
+ server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
ida_init(&server->openowner_id);
ida_init(&server->lockowner_id);
pnfs_init_server(server);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 04bf806..e6ec6f0 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -114,7 +114,7 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
return ret;
}
/**
- * nfs_have_delegation - check if inode has a delegation, mark it
+ * nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
* @flags: delegation types to check for
@@ -481,6 +481,22 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (freeme == NULL)
goto out;
add_new:
+ /*
+ * If we didn't revalidate the change attribute before setting
+ * the delegation, then pre-emptively ask for a full attribute
+ * cache revalidation.
+ */
+ spin_lock(&inode->i_lock);
+ if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_CHANGE)
+ nfs_set_cache_invalid(inode,
+ NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
+ NFS_INO_INVALID_OTHER | NFS_INO_INVALID_DATA |
+ NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_XATTR);
+ spin_unlock(&inode->i_lock);
+
list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
@@ -488,11 +504,6 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type);
-
- spin_lock(&inode->i_lock);
- if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME))
- NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED;
- spin_unlock(&inode->i_lock);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
@@ -674,7 +685,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
}
/**
- * nfs_inode_return_delegation - synchronously return a delegation
+ * nfs4_inode_return_delegation - synchronously return a delegation
* @inode: inode to process
*
* This routine will always flush any dirty data to disk on the
@@ -697,7 +708,7 @@ int nfs4_inode_return_delegation(struct inode *inode)
}
/**
- * nfs_inode_return_delegation_on_close - asynchronously return a delegation
+ * nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
* This routine is called on file close in order to determine if the
@@ -811,7 +822,7 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
}
/**
- * nfs_super_return_all_delegations - return delegations for one superblock
+ * nfs_server_return_all_delegations - return delegations for one superblock
* @server: pointer to nfs_server to process
*
*/
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9b00a0b..c19b4fd 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -84,8 +84,7 @@ int nfs4_inode_make_writeable(struct inode *inode);
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
- return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
- !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED);
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
}
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3d8e369..1a6d286 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -866,6 +866,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
break;
}
+ verf_arg = verf_res;
+
status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays);
} while (!status && nfs_readdir_page_needs_filling(page));
@@ -927,7 +929,12 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
}
return res;
}
- memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
+ /*
+ * Set the cookie verifier if the page cache was empty
+ */
+ if (desc->page_index == 0)
+ memcpy(nfsi->cookieverf, verf,
+ sizeof(nfsi->cookieverf));
}
res = nfs_readdir_search_array(desc);
if (res == 0) {
@@ -974,10 +981,10 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
/*
* Once we've found the start of the dirent within a page: fill 'er up...
*/
-static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
+static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
+ const __be32 *verf)
{
struct file *file = desc->file;
- struct nfs_inode *nfsi = NFS_I(file_inode(file));
struct nfs_cache_array *array;
unsigned int i = 0;
@@ -991,7 +998,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
desc->eof = true;
break;
}
- memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
+ memcpy(desc->verf, verf, sizeof(desc->verf));
if (i < (array->size-1))
desc->dir_cookie = array->array[i+1].cookie;
else
@@ -1048,7 +1055,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
desc->page = arrays[i];
- nfs_do_filldir(desc);
+ nfs_do_filldir(desc, verf);
}
desc->page = NULL;
@@ -1069,6 +1076,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
+ struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_dir_context *dir_ctx = file->private_data;
struct nfs_readdir_descriptor *desc;
int res;
@@ -1122,7 +1130,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
}
if (res == -ETOOSMALL && desc->plus) {
- clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
+ clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
nfs_zap_caches(inode);
desc->page_index = 0;
desc->plus = false;
@@ -1132,7 +1140,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res < 0)
break;
- nfs_do_filldir(desc);
+ nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc);
} while (!desc->eof);
@@ -1703,7 +1711,7 @@ static void nfs_drop_nlink(struct inode *inode)
NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
nfs_set_cache_invalid(
inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
- NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED);
+ NFS_INO_INVALID_NLINK);
spin_unlock(&inode->i_lock);
}
@@ -2940,7 +2948,7 @@ static int nfs_execute_ok(struct inode *inode, int mask)
if (S_ISDIR(inode->i_mode))
return 0;
- if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
ret = __nfs_revalidate_inode(server, inode);
@@ -2998,7 +3006,8 @@ int nfs_permission(struct user_namespace *mnt_userns,
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
- res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
+ NFS_INO_INVALID_OTHER);
if (res == 0)
res = generic_permission(&init_user_ns, inode, mask);
goto out;
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index f2b34cf..37a1a88 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -169,19 +169,8 @@ nfs_get_parent(struct dentry *dentry)
static u64 nfs_fetch_iversion(struct inode *inode)
{
- struct nfs_server *server = NFS_SERVER(inode);
-
- /* Is this the right call?: */
- nfs_revalidate_inode(server, inode);
- /*
- * Also, note we're ignoring any returned error. That seems to be
- * the practice for cache consistency information elsewhere in
- * the server, but I'm not sure why.
- */
- if (server->nfs_client->rpc_ops->version >= 4)
- return inode_peek_iversion_raw(inode);
- else
- return time_to_chattr(&inode->i_ctime);
+ nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
+ return inode_peek_iversion_raw(inode);
}
const struct export_operations nfs_export_ops = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 16ad505..1fef107 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -105,7 +105,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
if (filp->f_flags & O_DIRECT)
goto force_reval;
- if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE))
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE))
goto force_reval;
return 0;
force_reval:
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 872112b..d383de0 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -106,7 +106,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p))
return -ENOBUFS;
fh->size = be32_to_cpup(p++);
- if (fh->size > sizeof(struct nfs_fh)) {
+ if (fh->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
fh->size);
return -EOVERFLOW;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index a06d213..d95c9a3 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -283,20 +283,40 @@ static int nfs_verify_server_address(struct sockaddr *addr)
return 0;
}
+#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+ return true;
+}
+#else
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+ if (ctx->version == 4)
+ return true;
+ return false;
+}
+#endif
+
/*
* Sanity check the NFS transport protocol.
- *
*/
-static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
+static int nfs_validate_transport_protocol(struct fs_context *fc,
+ struct nfs_fs_context *ctx)
{
switch (ctx->nfs_server.protocol) {
case XPRT_TRANSPORT_UDP:
+ if (nfs_server_transport_udp_invalid(ctx))
+ goto out_invalid_transport_udp;
+ break;
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_RDMA:
break;
default:
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
}
+ return 0;
+out_invalid_transport_udp:
+ return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
}
/*
@@ -305,8 +325,6 @@ static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
*/
static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
{
- nfs_validate_transport_protocol(ctx);
-
if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
return;
@@ -932,6 +950,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
struct nfs_fh *mntfh = ctx->mntfh;
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
+ int ret;
if (data == NULL)
goto out_no_data;
@@ -977,6 +996,15 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
sizeof(mntfh->data) - mntfh->size);
/*
+ * for proto == XPRT_TRANSPORT_UDP, which is what uses
+ * to_exponential, implying shift: limit the shift value
+ * to BITS_PER_LONG (majortimeo is unsigned long)
+ */
+ if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */
+ if (data->retrans >= 64) /* shift value is too large */
+ goto out_invalid_data;
+
+ /*
* Translate to nfs_fs_context, which nfs_fill_super
* can deal with.
*/
@@ -1048,6 +1076,10 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
goto generic;
}
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
+
ctx->skip_reconfig_option_check = true;
return 0;
@@ -1076,6 +1108,9 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
out_invalid_fh:
return nfs_invalf(fc, "NFS: invalid root filehandle");
+
+out_invalid_data:
+ return nfs_invalf(fc, "NFS: invalid binary mount data");
}
#if IS_ENABLED(CONFIG_NFS_V4)
@@ -1146,6 +1181,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ int ret;
char *c;
if (!data) {
@@ -1218,9 +1254,9 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
ctx->acdirmin = data->acdirmin;
ctx->acdirmax = data->acdirmax;
ctx->nfs_server.protocol = data->proto;
- nfs_validate_transport_protocol(ctx);
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
done:
ctx->skip_reconfig_option_check = true;
return 0;
@@ -1231,9 +1267,6 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
out_no_address:
return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
-
-out_invalid_transport_udp:
- return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
}
#endif
@@ -1298,6 +1331,10 @@ static int nfs_fs_context_validate(struct fs_context *fc)
if (!nfs_verify_server_address(sap))
goto out_no_address;
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
+
if (ctx->version == 4) {
if (IS_ENABLED(CONFIG_NFS_V4)) {
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
@@ -1306,9 +1343,6 @@ static int nfs_fs_context_validate(struct fs_context *fc)
port = NFS_PORT;
max_namelen = NFS4_MAXNAMLEN;
max_pathlen = NFS4_MAXPATHLEN;
- nfs_validate_transport_protocol(ctx);
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
NFS_MOUNT_LOCAL_FCNTL);
@@ -1317,10 +1351,6 @@ static int nfs_fs_context_validate(struct fs_context *fc)
}
} else {
nfs_set_mount_transport_protocol(ctx);
-#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
-#endif
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
port = NFS_RDMA_PORT;
}
@@ -1354,8 +1384,6 @@ static int nfs_fs_context_validate(struct fs_context *fc)
out_v4_not_compiled:
nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
return -EPROTONOSUPPORT;
-out_invalid_transport_udp:
- return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
out_no_address:
return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
out_mountproto_mismatch:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5a8854d..529c409 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -164,34 +164,19 @@ static int nfs_attribute_timeout(struct inode *inode)
return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
}
-static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags)
+static bool nfs_check_cache_flags_invalid(struct inode *inode,
+ unsigned long flags)
{
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- /* Special case for the pagecache or access cache */
- if (flags == NFS_INO_REVAL_PAGECACHE &&
- !(cache_validity & NFS_INO_REVAL_FORCED))
- return false;
return (cache_validity & flags) != 0;
}
-static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags)
-{
- unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
-
- if ((cache_validity & flags) != 0)
- return true;
- if (nfs_attribute_timeout(inode))
- return true;
- return false;
-}
-
bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
{
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
- return nfs_check_cache_invalid_delegated(inode, flags);
-
- return nfs_check_cache_invalid_not_delegated(inode, flags);
+ if (nfs_check_cache_flags_invalid(inode, flags))
+ return true;
+ return nfs_attribute_cache_expired(inode);
}
EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
@@ -214,20 +199,21 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
if (have_delegation) {
if (!(flags & NFS_INO_REVAL_FORCED))
- flags &= ~NFS_INO_INVALID_OTHER;
- flags &= ~(NFS_INO_INVALID_CHANGE
- | NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_INVALID_XATTR);
- }
+ flags &= ~(NFS_INO_INVALID_MODE |
+ NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_XATTR);
+ flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
+ } else if (flags & NFS_INO_REVAL_PAGECACHE)
+ flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE;
if (!nfs_has_xattr_cache(nfsi))
flags &= ~NFS_INO_INVALID_XATTR;
- if (inode->i_mapping->nrpages == 0)
- flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
- nfsi->cache_validity |= flags;
if (flags & NFS_INO_INVALID_DATA)
nfs_fscache_invalidate(inode);
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
+ flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED);
+ nfsi->cache_validity |= flags;
}
EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
@@ -452,6 +438,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
.fattr = fattr
};
struct inode *inode = ERR_PTR(-ENOENT);
+ u64 fattr_supported = NFS_SB(sb)->fattr_valid;
unsigned long hash;
nfs_attr_check_mountpoint(sb, fattr);
@@ -484,8 +471,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_mode = fattr->mode;
nfsi->cache_validity = 0;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
- && nfs_server_capable(inode, NFS_CAP_MODE))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ && (fattr_supported & NFS_ATTR_FATTR_MODE))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
@@ -530,15 +517,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
- else if (nfs_server_capable(inode, NFS_CAP_ATIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
- else if (nfs_server_capable(inode, NFS_CAP_MTIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
- else if (nfs_server_capable(inode, NFS_CAP_CTIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode_set_iversion_raw(inode, fattr->change_attr);
@@ -550,29 +537,31 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE);
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
- else if (nfs_server_capable(inode, NFS_CAP_NLINK))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
- else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+ else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
- else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+ else if (fattr_supported & NFS_ATTR_FATTR_GROUP)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (nfs_server_capable(inode, NFS_CAP_XATTR))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
+ else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED &&
+ fattr->size != 0)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/*
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- }
-
- if (nfsi->cache_validity != 0)
- nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+ } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED &&
+ fattr->size != 0)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfs_setsecurity(inode, fattr, label);
@@ -634,8 +623,7 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
/* Optimization: if the end result is no change, don't RPC */
- attr->ia_valid &= NFS_VALID_ATTRS;
- if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
+ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
trace_nfs_setattr_enter(inode);
@@ -710,12 +698,20 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) {
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
+ NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME;
+ if ((attr->ia_valid & ATTR_KILL_SUID) != 0 &&
+ inode->i_mode & S_ISUID)
+ inode->i_mode &= ~S_ISUID;
+ if ((attr->ia_valid & ATTR_KILL_SGID) != 0 &&
+ (inode->i_mode & (S_ISGID | S_IXGRP)) ==
+ (S_ISGID | S_IXGRP))
+ inode->i_mode &= ~S_ISGID;
if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO;
@@ -793,14 +789,28 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
dput(parent);
}
-static bool nfs_need_revalidate_inode(struct inode *inode)
+static u32 nfs_get_valid_attrmask(struct inode *inode)
{
- if (NFS_I(inode)->cache_validity &
- (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
- return true;
- if (nfs_attribute_cache_expired(inode))
- return true;
- return false;
+ unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ u32 reply_mask = STATX_INO | STATX_TYPE;
+
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ reply_mask |= STATX_ATIME;
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ reply_mask |= STATX_CTIME;
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ reply_mask |= STATX_MTIME;
+ if (!(cache_validity & NFS_INO_INVALID_SIZE))
+ reply_mask |= STATX_SIZE;
+ if (!(cache_validity & NFS_INO_INVALID_NLINK))
+ reply_mask |= STATX_NLINK;
+ if (!(cache_validity & NFS_INO_INVALID_MODE))
+ reply_mask |= STATX_MODE;
+ if (!(cache_validity & NFS_INO_INVALID_OTHER))
+ reply_mask |= STATX_UID | STATX_GID;
+ if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
+ reply_mask |= STATX_BLOCKS;
+ return reply_mask;
}
int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
@@ -815,9 +825,13 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
trace_nfs_getattr_enter(inode);
+ request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
+ STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
+ STATX_INO | STATX_SIZE | STATX_BLOCKS;
+
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
nfs_readdirplus_parent_cache_hit(path->dentry);
- goto out_no_update;
+ goto out_no_revalidate;
}
/* Flush out writes to the server in order to update c/mtime. */
@@ -850,14 +864,24 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
/* Check whether the cached attributes are stale */
do_update |= force_sync || nfs_attribute_cache_expired(inode);
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- do_update |= cache_validity &
- (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL);
+ do_update |= cache_validity & NFS_INO_INVALID_CHANGE;
if (request_mask & STATX_ATIME)
do_update |= cache_validity & NFS_INO_INVALID_ATIME;
- if (request_mask & (STATX_CTIME|STATX_MTIME))
- do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE;
+ if (request_mask & STATX_CTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_CTIME;
+ if (request_mask & STATX_MTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_MTIME;
+ if (request_mask & STATX_SIZE)
+ do_update |= cache_validity & NFS_INO_INVALID_SIZE;
+ if (request_mask & STATX_NLINK)
+ do_update |= cache_validity & NFS_INO_INVALID_NLINK;
+ if (request_mask & STATX_MODE)
+ do_update |= cache_validity & NFS_INO_INVALID_MODE;
+ if (request_mask & (STATX_UID | STATX_GID))
+ do_update |= cache_validity & NFS_INO_INVALID_OTHER;
if (request_mask & STATX_BLOCKS)
do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
+
if (do_update) {
/* Update the attribute cache */
if (!(server->flags & NFS_MOUNT_NOAC))
@@ -871,8 +895,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
nfs_readdirplus_parent_cache_hit(path->dentry);
out_no_revalidate:
/* Only return attributes that were revalidated. */
- stat->result_mask &= request_mask;
-out_no_update:
+ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
+
generic_fillattr(&init_user_ns, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
if (S_ISDIR(inode->i_mode))
@@ -963,7 +987,6 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
{
struct nfs_inode *nfsi;
struct inode *inode;
- struct nfs_server *server;
if (!(ctx->mode & FMODE_WRITE))
return;
@@ -979,10 +1002,10 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
return;
if (!list_empty(&nfsi->open_files))
return;
- server = NFS_SERVER(inode);
- if (server->flags & NFS_MOUNT_NOCTO)
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)
return;
- nfs_revalidate_inode(server, inode);
+ nfs_revalidate_inode(inode,
+ NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
}
EXPORT_SYMBOL_GPL(nfs_close_context);
@@ -1237,16 +1260,16 @@ int nfs_attribute_cache_expired(struct inode *inode)
/**
* nfs_revalidate_inode - Revalidate the inode attributes
- * @server: pointer to nfs_server struct
* @inode: pointer to inode struct
+ * @flags: cache flags to check
*
* Updates inode attribute information by retrieving the data from the server.
*/
-int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+int nfs_revalidate_inode(struct inode *inode, unsigned long flags)
{
- if (!nfs_need_revalidate_inode(inode))
+ if (!nfs_check_cache_invalid(inode, flags))
return NFS_STALE(inode) ? -ESTALE : 0;
- return __nfs_revalidate_inode(server, inode);
+ return __nfs_revalidate_inode(NFS_SERVER(inode), inode);
}
EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
@@ -1332,7 +1355,7 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
bool nfs_mapping_need_revalidate_inode(struct inode *inode)
{
- return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+ return nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE) ||
NFS_STALE(inode);
}
@@ -1468,8 +1491,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (!nfs_file_has_buffered_writers(nfsi)) {
/* Verify a few of the more important attributes */
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
- invalid |= NFS_INO_INVALID_CHANGE
- | NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_CHANGE;
ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
@@ -1483,28 +1505,21 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize)
- invalid |= NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_SIZE;
}
}
/* Have any file permissions changed? */
if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_MODE;
if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_OTHER;
if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_OTHER;
/* Has the link count changed? */
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
- invalid |= NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_NLINK;
ts = inode->i_atime;
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
@@ -1642,41 +1657,142 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
#endif
/**
- * nfs_inode_attrs_need_update - check if the inode attributes need updating
- * @inode: pointer to inode
+ * nfs_inode_attrs_cmp_generic - compare attributes
* @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
+ * Note also the check for wraparound of 'attr_gencount'
+ *
+ * The function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. Otherwise it returns
+ * the value '0'.
+ */
+static int nfs_inode_attrs_cmp_generic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
+
+ return (long)(fattr->gencount - attr_gencount) > 0 ||
+ (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
+}
+
+/**
+ * nfs_inode_attrs_cmp_monotonic - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
*
- * To do so, the function first assumes that a more recent ctime means
- * that the attributes in fattr are newer, however it also attempt to
- * catch the case where ctime either didn't change, or went backwards
- * (if someone reset the clock on the server) by looking at whether
- * or not this RPC call was started after the inode was last updated.
- * Note also the check for wraparound of 'attr_gencount'
- *
- * The function returns 'true' if it thinks the attributes in 'fattr' are
- * more recent than the ones cached in the inode.
- *
+ * We assume that the server observes monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '0' indicates no measurable change
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent.
*/
-static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
+static int nfs_inode_attrs_cmp_monotonic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
{
- const struct nfs_inode *nfsi = NFS_I(inode);
-
- return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
- ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
+ s64 diff = fattr->change_attr - inode_peek_iversion_raw(inode);
+ if (diff > 0)
+ return 1;
+ return diff == 0 ? 0 : -1;
}
-static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
+/**
+ * nfs_inode_attrs_cmp_strict_monotonic - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
+ *
+ * We assume that the server observes strictly monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent or unchanged.
+ */
+static int nfs_inode_attrs_cmp_strict_monotonic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
{
- int ret;
+ return nfs_inode_attrs_cmp_monotonic(fattr, inode) > 0 ? 1 : -1;
+}
+
+/**
+ * nfs_inode_attrs_cmp - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * This function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. It returns '-1' if
+ * the attributes in @inode are more recent than the ones in @fattr,
+ * and it returns 0 if not sure.
+ */
+static int nfs_inode_attrs_cmp(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ if (nfs_inode_attrs_cmp_generic(fattr, inode) > 0)
+ return 1;
+ switch (NFS_SERVER(inode)->change_attr_type) {
+ case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+ break;
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+ break;
+ return nfs_inode_attrs_cmp_monotonic(fattr, inode);
+ default:
+ if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+ break;
+ return nfs_inode_attrs_cmp_strict_monotonic(fattr, inode);
+ }
+ return 0;
+}
+
+/**
+ * nfs_inode_finish_partial_attr_update - complete a previous inode update
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * Returns '1' if the last attribute update left the inode cached
+ * attributes in a partially unrevalidated state, and @fattr
+ * matches the change attribute of that partial update.
+ * Otherwise returns '0'.
+ */
+static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ const unsigned long check_valid =
+ NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK;
+ unsigned long cache_validity = NFS_I(inode)->cache_validity;
+
+ if (!(cache_validity & NFS_INO_INVALID_CHANGE) &&
+ (cache_validity & check_valid) != 0 &&
+ (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
+ nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0)
+ return 1;
+ return 0;
+}
+
+static int nfs_refresh_inode_locked(struct inode *inode,
+ struct nfs_fattr *fattr)
+{
+ int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
+ int ret = 0;
trace_nfs_refresh_inode_enter(inode);
- if (nfs_inode_attrs_need_update(inode, fattr))
+ if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode))
ret = nfs_update_inode(inode, fattr);
- else
+ else if (attr_cmp == 0)
ret = nfs_check_inode_attributes(inode, fattr);
trace_nfs_refresh_inode_exit(inode, ret);
@@ -1761,11 +1877,13 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
*/
int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
{
+ int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
int status;
/* Don't do a WCC update if these attributes are already stale */
- if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
- !nfs_inode_attrs_need_update(inode, fattr)) {
+ if (attr_cmp < 0)
+ return 0;
+ if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) {
fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
@@ -1839,9 +1957,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc);
*/
static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
- struct nfs_server *server;
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_isize, new_isize;
+ u64 fattr_supported = server->fattr_valid;
unsigned long invalid = 0;
unsigned long now = jiffies;
unsigned long save_cache_validity;
@@ -1885,7 +2004,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
goto out_err;
}
- server = NFS_SERVER(inode);
/* Update the fsid? */
if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
@@ -1904,14 +2022,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED
- | NFS_INO_REVAL_PAGECACHE
| NFS_INO_INVALID_BLOCKS);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
if (pnfs_layoutcommit_outstanding(inode)) {
- nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR;
+ nfsi->cache_validity |=
+ save_cache_validity &
+ (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS);
cache_revalidated = false;
}
@@ -1928,6 +2049,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
save_cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_SIZE
+ | NFS_INO_INVALID_BLOCKS
+ | NFS_INO_INVALID_NLINK
+ | NFS_INO_INVALID_MODE
| NFS_INO_INVALID_OTHER;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
@@ -1940,28 +2064,24 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
attr_changed = true;
}
} else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_CHANGE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_REVAL_FORCED);
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_CHANGE;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
inode->i_mtime = fattr->mtime;
- } else if (server->caps & NFS_CAP_MTIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_MTIME
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_MTIME;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
inode->i_ctime = fattr->ctime;
- } else if (server->caps & NFS_CAP_CTIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_CTIME
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_CTIME;
cache_revalidated = false;
}
@@ -1985,21 +2105,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)cur_isize,
(long long)new_isize);
}
+ if (new_isize == 0 &&
+ !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED |
+ NFS_ATTR_FATTR_BLOCKS_USED))) {
+ fattr->du.nfs3.used = 0;
+ fattr->valid |= NFS_ATTR_FATTR_SPACE_USED;
+ }
} else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_REVAL_FORCED);
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_SIZE;
cache_revalidated = false;
}
-
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
- else if (server->caps & NFS_CAP_ATIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_ATIME
- | NFS_INO_REVAL_FORCED);
+ else if (fattr_supported & NFS_ATTR_FATTR_ATIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_ATIME;
cache_revalidated = false;
}
@@ -2012,10 +2134,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
| NFS_INO_INVALID_ACL;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_MODE) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_MODE) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_MODE;
cache_revalidated = false;
}
@@ -2026,10 +2147,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_uid = fattr->uid;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_OWNER) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_OTHER;
cache_revalidated = false;
}
@@ -2040,10 +2160,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_gid = fattr->gid;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_OWNER_GROUP) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_OTHER;
cache_revalidated = false;
}
@@ -2054,10 +2173,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_NLINK) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_NLINK;
cache_revalidated = false;
}
@@ -2066,18 +2184,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+ } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BLOCKS;
+ cache_revalidated = false;
+ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) {
inode->i_blocks = fattr->du.nfs2.blocks;
- else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_BLOCKS
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BLOCKS;
cache_revalidated = false;
}
/* Update attrtimeo value if we're out of the unstable period */
if (attr_changed) {
- invalid &= ~NFS_INO_INVALID_ATTR;
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
@@ -2094,7 +2216,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->attrtimeo_timestamp = now;
}
/* Set the barrier to be more recent than this fattr */
- if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
nfsi->attr_gencount = fattr->gencount;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7395d09..a36af04 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -181,7 +181,7 @@ struct nfs_mount_request {
struct net *net;
};
-extern int nfs_mount(struct nfs_mount_request *info);
+extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans);
extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index 5088fda9..b5551ed 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -104,7 +104,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
}
/**
- * nfs_end_io_direct - declare the file is being used for direct i/o
+ * nfs_start_io_direct - declare the file is being used for direct i/o
* @inode: file inode
*
* Declare that a direct I/O operation is about to start, and ensure
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index dda5c3e..c5e3b6b 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -136,14 +136,16 @@ struct mnt_fhstatus {
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
+ * @timeo: deciseconds the mount waits for a response before it retries
+ * @retrans: number of times the mount retries a request
*
- * Uses default timeout parameters specified by underlying transport. On
- * successful return, the auth_flavs list and auth_flav_len will be populated
- * with the list from the server or a faked-up list if the server didn't
- * provide one.
+ * Uses timeout parameters specified by caller. On successful return, the
+ * auth_flavs list and auth_flav_len will be populated with the list from the
+ * server or a faked-up list if the server didn't provide one.
*/
-int nfs_mount(struct nfs_mount_request *info)
+int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
{
+ struct rpc_timeout mnt_timeout;
struct mountres result = {
.fh = info->fh,
.auth_count = info->auth_flav_len,
@@ -158,6 +160,7 @@ int nfs_mount(struct nfs_mount_request *info)
.protocol = info->protocol,
.address = info->sap,
.addrsize = info->salen,
+ .timeout = &mnt_timeout,
.servername = info->hostname,
.program = &mnt_program,
.version = info->version,
@@ -177,6 +180,7 @@ int nfs_mount(struct nfs_mount_request *info)
if (info->noresvport)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ nfs_init_timeout_values(&mnt_timeout, info->protocol, timeo, retrans);
mnt_clnt = rpc_create(&args);
if (IS_ERR(mnt_clnt))
goto out_clnt_err;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index bb386a6..9ec560a 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -65,7 +65,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
return ERR_PTR(-EOPNOTSUPP);
- status = nfs_revalidate_inode(server, inode);
+ status = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (status < 0)
return ERR_PTR(status);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index ed1c837..e6eca1d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -433,7 +433,7 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p))
return -EIO;
length = be32_to_cpup(p++);
- if (unlikely(length > NFS3_FHSIZE))
+ if (unlikely(length > NFS3_FHSIZE || length == 0))
goto out_toobig;
p = xdr_inline_decode(xdr, length);
if (unlikely(!p))
@@ -442,7 +442,7 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
memcpy(fh->data, p, length);
return 0;
out_toobig:
- dprintk("NFS: file handle size (%u) too big\n", length);
+ trace_nfs_xdr_bad_filehandle(xdr, NFSERR_BADHANDLE);
return -E2BIG;
}
@@ -2227,6 +2227,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
/* ignore properties */
result->lease_time = 0;
+ result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0;
}
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 094024b..a243495 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -46,11 +46,12 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
+ u32 bitmask[3];
struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
.falloc_length = len,
- .falloc_bitmask = nfs4_fattr_bitmap,
+ .falloc_bitmask = bitmask,
};
struct nfs42_falloc_res res = {
.falloc_server = server,
@@ -68,6 +69,10 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
return status;
}
+ memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
+ if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
+ bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
return -ENOMEM;
@@ -75,7 +80,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
if (status == 0)
- status = nfs_post_op_update_inode(inode, res.falloc_fattr);
+ status = nfs_post_op_update_inode_force_wcc(inode,
+ res.falloc_fattr);
kfree(res.falloc_fattr);
return status;
@@ -84,7 +90,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
loff_t offset, loff_t len)
{
- struct nfs_server *server = NFS_SERVER(file_inode(filep));
+ struct inode *inode = file_inode(filep);
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
struct nfs_lock_context *lock;
int err;
@@ -93,9 +100,13 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
if (IS_ERR(lock))
return PTR_ERR(lock);
- exception.inode = file_inode(filep);
+ exception.inode = inode;
exception.state = lock->open_context->state;
+ err = nfs_sync_inode(inode);
+ if (err)
+ goto out;
+
do {
err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
if (err == -ENOTSUPP) {
@@ -104,7 +115,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
-
+out:
nfs_put_lock_context(lock);
return err;
}
@@ -142,16 +153,13 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return -EOPNOTSUPP;
inode_lock(inode);
- err = nfs_sync_inode(inode);
- if (err)
- goto out_unlock;
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
truncate_pagecache_range(inode, offset, (offset + len) -1);
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
-out_unlock:
+
inode_unlock(inode);
return err;
}
@@ -261,6 +269,33 @@ static int process_copy_commit(struct file *dst, loff_t pos_dst,
return status;
}
+/**
+ * nfs42_copy_dest_done - perform inode cache updates after clone/copy offload
+ * @inode: pointer to destination inode
+ * @pos: destination offset
+ * @len: copy length
+ *
+ * Punch a hole in the inode page cache, so that the NFS client will
+ * know to retrieve new data.
+ * Update the file size if necessary, and then mark the inode as having
+ * invalid cached values for change attribute, ctime, mtime and space used.
+ */
+static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
+{
+ loff_t newsize = pos + len;
+ loff_t end = newsize - 1;
+
+ truncate_pagecache_range(inode, pos, end);
+ spin_lock(&inode->i_lock);
+ if (newsize > i_size_read(inode))
+ i_size_write(inode, newsize);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME |
+ NFS_INO_INVALID_BLOCKS);
+ spin_unlock(&inode->i_lock);
+}
+
static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_lock_context *src_lock,
struct file *dst,
@@ -354,19 +389,8 @@ static ssize_t _nfs42_proc_copy(struct file *src,
goto out;
}
- truncate_pagecache_range(dst_inode, pos_dst,
- pos_dst + res->write_res.count);
- spin_lock(&dst_inode->i_lock);
- nfs_set_cache_invalid(
- dst_inode, NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED |
- NFS_INO_INVALID_SIZE | NFS_INO_INVALID_ATTR |
- NFS_INO_INVALID_DATA);
- spin_unlock(&dst_inode->i_lock);
- spin_lock(&src_inode->i_lock);
- nfs_set_cache_invalid(src_inode, NFS_INO_REVAL_PAGECACHE |
- NFS_INO_REVAL_FORCED |
- NFS_INO_INVALID_ATIME);
- spin_unlock(&src_inode->i_lock);
+ nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count);
+ nfs_invalidate_atime(src_inode);
status = res->write_res.count;
out:
if (args->sync)
@@ -659,7 +683,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
if (status)
return status;
- return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+ if (whence == SEEK_DATA && res.sr_eof)
+ return -NFS4ERR_NXIO;
+ else
+ return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
@@ -1044,8 +1071,10 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
- if (status == 0)
+ if (status == 0) {
+ nfs42_copy_dest_done(dst_inode, dst_offset, count);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
+ }
kfree(res.dst_fattr);
return status;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 6c2ce79..1c4d2a0 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -168,7 +168,7 @@ nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
* make it easier to copy the value after an RPC, even if
* the value will not be passed up to application (e.g.
* for a 'query' getxattr with NULL buffer).
- * @len: Length of the value. Can be 0 for zero-length attribues.
+ * @len: Length of the value. Can be 0 for zero-length attributes.
* @value and @pages will be NULL if @len is 0.
*/
static struct nfs4_xattr_entry *
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 441a2fa..57b3821 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -420,9 +420,7 @@ static const struct nfs4_ssc_client_ops nfs4_ssc_clnt_ops_tbl = {
*/
void nfs42_ssc_register_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl);
-#endif
}
/**
@@ -433,9 +431,7 @@ void nfs42_ssc_register_ops(void)
*/
void nfs42_ssc_unregister_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl);
-#endif
}
#endif /* CONFIG_NFS_V4_2 */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 545010d6..87d04f2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -108,9 +108,10 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label);
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
+ const __u32 *src, struct inode *inode,
+ struct nfs_server *server,
+ struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
@@ -263,6 +264,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
| FATTR4_WORD1_FS_LAYOUT_TYPES,
FATTR4_WORD2_LAYOUT_BLKSIZE
| FATTR4_WORD2_CLONE_BLKSIZE
+ | FATTR4_WORD2_CHANGE_ATTR_TYPE
| FATTR4_WORD2_XATTR_SUPPORT
};
@@ -283,7 +285,7 @@ const u32 nfs4_fs_locations_bitmap[3] = {
};
static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
- struct inode *inode)
+ struct inode *inode, unsigned long flags)
{
unsigned long cache_validity;
@@ -291,22 +293,20 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
return;
- cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- if (!(cache_validity & NFS_INO_REVAL_FORCED))
- cache_validity &= ~(NFS_INO_INVALID_CHANGE
- | NFS_INO_INVALID_SIZE);
+ cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
+ /* Remove the attributes over which we have full control */
+ dst[1] &= ~FATTR4_WORD1_RAWDEV;
if (!(cache_validity & NFS_INO_INVALID_SIZE))
dst[0] &= ~FATTR4_WORD0_SIZE;
if (!(cache_validity & NFS_INO_INVALID_CHANGE))
dst[0] &= ~FATTR4_WORD0_CHANGE;
-}
-static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst,
- const __u32 *src, struct inode *inode)
-{
- nfs4_bitmap_copy_adjust(dst, src, inode);
+ if (!(cache_validity & NFS_INO_INVALID_MODE))
+ dst[1] &= ~FATTR4_WORD1_MODE;
+ if (!(cache_validity & NFS_INO_INVALID_OTHER))
+ dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@@ -1169,14 +1169,26 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
static void
nfs4_inc_nlink_locked(struct inode *inode)
{
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_NLINK);
inc_nlink(inode);
}
static void
+nfs4_inc_nlink(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ nfs4_inc_nlink_locked(inode);
+ spin_unlock(&inode->i_lock);
+}
+
+static void
nfs4_dec_nlink_locked(struct inode *inode)
{
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_NLINK);
drop_nlink(inode);
}
@@ -1186,11 +1198,23 @@ nfs4_update_changeattr_locked(struct inode *inode,
unsigned long timestamp, unsigned long cache_validity)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ u64 change_attr = inode_peek_iversion_raw(inode);
cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
- if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) {
- nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+ switch (NFS_SERVER(inode)->change_attr_type) {
+ case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+ break;
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ if ((s64)(change_attr - cinfo->after) > 0)
+ goto out;
+ break;
+ default:
+ if ((s64)(change_attr - cinfo->after) >= 0)
+ goto out;
+ }
+
+ if (cinfo->atomic && cinfo->before == change_attr) {
nfsi->attrtimeo_timestamp = jiffies;
} else {
if (S_ISDIR(inode->i_mode)) {
@@ -1202,7 +1226,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- if (cinfo->before != inode_peek_iversion_raw(inode))
+ if (cinfo->before != change_attr)
cache_validity |= NFS_INO_INVALID_ACCESS |
NFS_INO_INVALID_ACL |
NFS_INO_INVALID_XATTR;
@@ -1210,8 +1234,9 @@ nfs4_update_changeattr_locked(struct inode *inode,
inode_set_iversion_raw(inode, cinfo->after);
nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
- nfs_set_cache_invalid(inode, cache_validity);
nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
+out:
+ nfs_set_cache_invalid(inode, cache_validity);
}
void
@@ -3344,12 +3369,17 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
.inode = inode,
.stateid = &arg.stateid,
};
+ unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
int err;
+ if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
+ adjust_flags |= NFS_INO_INVALID_MODE;
+ if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
+ adjust_flags |= NFS_INO_INVALID_OTHER;
+
do {
- nfs4_bitmap_copy_adjust_setattr(bitmask,
- nfs4_bitmask(server, olabel),
- inode);
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel),
+ inode, adjust_flags);
err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
switch (err) {
@@ -3591,6 +3621,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
@@ -3647,8 +3678,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
if (!nfs4_have_delegation(inode, FMODE_READ)) {
- calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
- nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL);
+ nfs4_bitmask_set(calldata->arg.bitmask_store,
+ server->cache_consistency_bitmask,
+ inode, server, NULL);
+ calldata->arg.bitmask = calldata->arg.bitmask_store;
} else
calldata->arg.bitmask = NULL;
}
@@ -3835,12 +3868,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
- server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
- NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
- NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
- NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
- NFS_CAP_CTIME|NFS_CAP_MTIME|
- NFS_CAP_SECURITY_LABEL);
+ server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
+ NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL);
+ server->fattr_valid = NFS_ATTR_FATTR_V4;
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
server->caps |= NFS_CAP_ACLS;
@@ -3848,25 +3878,29 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
- if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
- server->caps |= NFS_CAP_FILEID;
- if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
- server->caps |= NFS_CAP_MODE;
- if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
- server->caps |= NFS_CAP_NLINK;
- if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
- server->caps |= NFS_CAP_OWNER;
- if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
- server->caps |= NFS_CAP_OWNER_GROUP;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
- server->caps |= NFS_CAP_ATIME;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
- server->caps |= NFS_CAP_CTIME;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
- server->caps |= NFS_CAP_MTIME;
+ if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MODE;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_NLINK;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER))
+ server->fattr_valid &= ~(NFS_ATTR_FATTR_OWNER |
+ NFS_ATTR_FATTR_OWNER_NAME);
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP))
+ server->fattr_valid &= ~(NFS_ATTR_FATTR_GROUP |
+ NFS_ATTR_FATTR_GROUP_NAME);
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_SPACE_USED))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_SPACE_USED;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_ATIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
- if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
- server->caps |= NFS_CAP_SECURITY_LABEL;
+ if (!(res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_V4_SECURITY_LABEL;
#endif
memcpy(server->attr_bitmask_nl, res.attr_bitmask,
sizeof(server->attr_bitmask));
@@ -4154,8 +4188,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
task_flags |= RPC_TASK_TIMEOUT;
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
-
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0);
nfs_fattr_init(fattr);
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
return nfs4_do_call_sync(server->client, server, &msg,
@@ -4582,11 +4615,11 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
- NFS_INO_INVALID_DATA);
/* Removing a directory decrements nlink in the parent */
if (ftype == NF4DIR && dir->i_nlink > 2)
nfs4_dec_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
+ NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
}
return status;
@@ -4715,11 +4748,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
/* Note: If we moved a directory, nlink will change */
nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start,
- NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA);
nfs4_update_changeattr(new_dir, &res->new_cinfo,
res->new_fattr->time_start,
- NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA);
} else
nfs4_update_changeattr(old_dir, &res->old_cinfo,
@@ -4761,12 +4794,13 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
}
nfs4_inode_make_writeable(inode);
- nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode);
-
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode,
+ NFS_INO_INVALID_CHANGE);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
NFS_INO_INVALID_DATA);
+ nfs4_inc_nlink(inode);
status = nfs_post_op_update_inode(inode, res.fattr);
if (!status)
nfs_setsecurity(inode, res.fattr, res.label);
@@ -4844,12 +4878,12 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
- data->res.fattr->time_start,
- NFS_INO_INVALID_DATA);
/* Creating a directory bumps nlink in the parent */
if (data->arg.ftype == NF4DIR)
nfs4_inc_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
}
@@ -5416,37 +5450,39 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label)
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
+ struct inode *inode, struct nfs_server *server,
+ struct nfs4_label *label)
{
-
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ unsigned int i;
- if ((cache_validity & NFS_INO_INVALID_DATA) ||
- (cache_validity & NFS_INO_REVAL_PAGECACHE) ||
- (cache_validity & NFS_INO_REVAL_FORCED) ||
- (cache_validity & NFS_INO_INVALID_OTHER))
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
+ memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
- if (cache_validity & NFS_INO_INVALID_ATIME)
- bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
- if (cache_validity & NFS_INO_INVALID_OTHER)
- bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
- FATTR4_WORD1_OWNER_GROUP |
- FATTR4_WORD1_NUMLINKS;
- if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
- bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
if (cache_validity & NFS_INO_INVALID_CHANGE)
bitmask[0] |= FATTR4_WORD0_CHANGE;
+ if (cache_validity & NFS_INO_INVALID_ATIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
+ if (cache_validity & NFS_INO_INVALID_MODE)
+ bitmask[1] |= FATTR4_WORD1_MODE;
+ if (cache_validity & NFS_INO_INVALID_OTHER)
+ bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
+ if (cache_validity & NFS_INO_INVALID_NLINK)
+ bitmask[1] |= FATTR4_WORD1_NUMLINKS;
+ if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
+ bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
if (cache_validity & NFS_INO_INVALID_CTIME)
bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
- if (cache_validity & NFS_INO_INVALID_SIZE)
- bitmask[0] |= FATTR4_WORD0_SIZE;
if (cache_validity & NFS_INO_INVALID_BLOCKS)
bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
+ if (cache_validity & NFS_INO_INVALID_SIZE)
+ bitmask[0] |= FATTR4_WORD0_SIZE;
+
+ for (i = 0; i < NFS4_BITMASK_SZ; i++)
+ bitmask[i] &= server->attr_bitmask[i];
}
static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
@@ -5459,8 +5495,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
hdr->args.bitmask = NULL;
hdr->res.fattr = NULL;
} else {
- hdr->args.bitmask = server->cache_consistency_bitmask;
- nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL);
+ nfs4_bitmask_set(hdr->args.bitmask_store,
+ server->cache_consistency_bitmask,
+ hdr->inode, server, NULL);
+ hdr->args.bitmask = hdr->args.bitmask_store;
}
if (!hdr->pgio_done_cb)
@@ -5858,7 +5896,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
- ret = nfs_revalidate_inode(server, inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret < 0)
return ret;
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
@@ -6502,8 +6540,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
- data->args.bitmask = server->cache_consistency_bitmask;
- nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL);
+ nfs4_bitmask_set(data->args.bitmask_store,
+ server->cache_consistency_bitmask, inode, server,
+ NULL);
+ data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr;
@@ -7250,22 +7290,22 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
#ifdef CONFIG_NFS_V4_1
struct nfs4_lock_waiter {
- struct task_struct *task;
struct inode *inode;
- struct nfs_lowner *owner;
+ struct nfs_lowner owner;
+ wait_queue_entry_t wait;
};
static int
nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
{
- int ret;
- struct nfs4_lock_waiter *waiter = wait->private;
+ struct nfs4_lock_waiter *waiter =
+ container_of(wait, struct nfs4_lock_waiter, wait);
/* NULL key means to wake up everyone */
if (key) {
struct cb_notify_lock_args *cbnl = key;
struct nfs_lowner *lowner = &cbnl->cbnl_owner,
- *wowner = waiter->owner;
+ *wowner = &waiter->owner;
/* Only wake if the callback was for the same owner. */
if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev)
@@ -7276,53 +7316,45 @@ nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, vo
return 0;
}
- /* override "private" so we can use default_wake_function */
- wait->private = waiter->task;
- ret = woken_wake_function(wait, mode, flags, key);
- if (ret)
- list_del_init(&wait->entry);
- wait->private = waiter;
- return ret;
+ return woken_wake_function(wait, mode, flags, key);
}
static int
nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- int status = -ERESTARTSYS;
struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs_client *clp = server->nfs_client;
wait_queue_head_t *q = &clp->cl_lock_waitq;
- struct nfs_lowner owner = { .clientid = clp->cl_clientid,
- .id = lsp->ls_seqid.owner_id,
- .s_dev = server->s_dev };
- struct nfs4_lock_waiter waiter = { .task = current,
- .inode = state->inode,
- .owner = &owner};
- wait_queue_entry_t wait;
+ struct nfs4_lock_waiter waiter = {
+ .inode = state->inode,
+ .owner = { .clientid = clp->cl_clientid,
+ .id = lsp->ls_seqid.owner_id,
+ .s_dev = server->s_dev },
+ };
+ int status;
/* Don't bother with waitqueue if we don't expect a callback */
if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
return nfs4_retry_setlk_simple(state, cmd, request);
- init_wait(&wait);
- wait.private = &waiter;
- wait.func = nfs4_wake_lock_waiter;
+ init_wait(&waiter.wait);
+ waiter.wait.func = nfs4_wake_lock_waiter;
+ add_wait_queue(q, &waiter.wait);
- while(!signalled()) {
- add_wait_queue(q, &wait);
+ do {
status = nfs4_proc_setlk(state, cmd, request);
- if ((status != -EAGAIN) || IS_SETLK(cmd)) {
- finish_wait(q, &wait);
+ if (status != -EAGAIN || IS_SETLK(cmd))
break;
- }
status = -ERESTARTSYS;
freezer_do_not_count();
- wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);
+ wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
+ NFS4_LOCK_MAXTIMEOUT);
freezer_count();
- finish_wait(q, &wait);
- }
+ } while (!signalled());
+
+ remove_wait_queue(q, &waiter.wait);
return status;
}
@@ -7615,7 +7647,7 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
return -EACCES;
}
- ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret)
return ret;
@@ -7646,7 +7678,7 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
return 0;
}
- ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret)
return ret;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2eec5bb..f22818a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -645,7 +645,7 @@ void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
}
/**
- * nfs4_purge_state_owners - Release all cached state owners
+ * nfs4_free_state_owners - Release all cached state owners
* @head: resulting list of state owners
*
* Frees a list of state owners that was generated by
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 48d761e..2ef75ca 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -666,7 +666,42 @@ TRACE_EVENT(nfs4_state_mgr_failed,
)
)
-TRACE_EVENT(nfs4_xdr_status,
+TRACE_EVENT(nfs4_xdr_bad_operation,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+ u32 op,
+ u32 expected
+ ),
+
+ TP_ARGS(xdr, op, expected),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, op)
+ __field(u32, expected)
+ ),
+
+ TP_fast_assign(
+ const struct rpc_rqst *rqstp = xdr->rqst;
+ const struct rpc_task *task = rqstp->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->op = op;
+ __entry->expected = expected;
+ ),
+
+ TP_printk(
+ "task:%u@%d xid=0x%08x operation=%u, expected=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->op, __entry->expected
+ )
+);
+
+DECLARE_EVENT_CLASS(nfs4_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
u32 op,
@@ -701,6 +736,16 @@ TRACE_EVENT(nfs4_xdr_status,
__entry->op
)
);
+#define DEFINE_NFS4_XDR_EVENT(name) \
+ DEFINE_EVENT(nfs4_xdr_event, name, \
+ TP_PROTO( \
+ const struct xdr_stream *xdr, \
+ u32 op, \
+ u32 error \
+ ), \
+ TP_ARGS(xdr, op, error))
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_status);
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_bad_filehandle);
DECLARE_EVENT_CLASS(nfs4_cb_error_class,
TP_PROTO(
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d4fd3be..a8cff19 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -144,7 +144,17 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
* layout types will be returned.
*/
#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \
- nfs4_fattr_bitmap_maxsz + 4 + 8 + 5)
+ nfs4_fattr_bitmap_maxsz + 1 + \
+ 1 /* lease time */ + \
+ 2 /* max filesize */ + \
+ 2 /* max read */ + \
+ 2 /* max write */ + \
+ nfstime4_maxsz /* time delta */ + \
+ 5 /* fs layout types */ + \
+ 1 /* layout blksize */ + \
+ 1 /* clone blksize */ + \
+ 1 /* change attr type */ + \
+ 1 /* xattr support */)
#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
#define decode_renew_maxsz (op_decode_hdr_maxsz)
#define encode_setclientid_maxsz \
@@ -3200,9 +3210,7 @@ static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
*nfs_retval = nfs4_stat_to_errno(nfserr);
return true;
out_bad_operation:
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
+ trace_nfs4_xdr_bad_operation(xdr, opnum, expected);
*nfs_retval = -EREMOTEIO;
return false;
out_overflow:
@@ -3487,8 +3495,11 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru
if (unlikely(!p))
return -EIO;
len = be32_to_cpup(p);
- if (len > NFS4_FHSIZE)
- return -EIO;
+ if (len > NFS4_FHSIZE || len == 0) {
+ trace_nfs4_xdr_bad_filehandle(xdr, OP_READDIR,
+ NFS4ERR_BADHANDLE);
+ return -EREMOTEIO;
+ }
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
return -EIO;
@@ -4837,6 +4848,32 @@ static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
return 0;
}
+static int decode_attr_change_attr_type(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ enum nfs4_change_attr_type *res)
+{
+ u32 tmp = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
+ dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
+ if (bitmap[2] & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+ if (xdr_stream_decode_u32(xdr, &tmp))
+ return -EIO;
+ bitmap[2] &= ~FATTR4_WORD2_CHANGE_ATTR_TYPE;
+ }
+
+ switch(tmp) {
+ case NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR:
+ case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER:
+ case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS:
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ *res = tmp;
+ break;
+ default:
+ *res = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+ }
+ return 0;
+}
+
static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
{
unsigned int savep;
@@ -4885,6 +4922,11 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
if (status)
goto xdr_error;
+ status = decode_attr_change_attr_type(xdr, bitmap,
+ &fsinfo->change_attr_type);
+ if (status)
+ goto xdr_error;
+
status = decode_attr_xattrsupport(xdr, bitmap,
&fsinfo->xattr_support);
if (status)
@@ -4913,8 +4955,10 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p))
return -EIO;
len = be32_to_cpup(p);
- if (len > NFS4_FHSIZE)
- return -EIO;
+ if (len > NFS4_FHSIZE || len == 0) {
+ trace_nfs4_xdr_bad_filehandle(xdr, OP_GETFH, NFS4ERR_BADHANDLE);
+ return -EREMOTEIO;
+ }
fh->size = len;
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c
index a90b363..5d1bfcc 100644
--- a/fs/nfs/nfstrace.c
+++ b/fs/nfs/nfstrace.c
@@ -12,3 +12,4 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_bad_filehandle);
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 5a59dcd..eb1ef34 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -45,6 +45,11 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
+TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE);
#define nfs_show_cache_validity(v) \
__print_flags(v, "|", \
@@ -60,7 +65,11 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
{ NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \
{ NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \
{ NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \
- { NFS_INO_INVALID_XATTR, "INVALID_XATTR" })
+ { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \
+ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \
+ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \
+ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
+ { NFS_INO_INVALID_MODE, "INVALID_MODE" })
TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
TRACE_DEFINE_ENUM(NFS_INO_STALE);
@@ -1392,7 +1401,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
{ NFSERR_BADTYPE, "BADTYPE" }, \
{ NFSERR_JUKEBOX, "JUKEBOX" })
-TRACE_EVENT(nfs_xdr_status,
+DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
int error
@@ -1434,6 +1443,15 @@ TRACE_EVENT(nfs_xdr_status,
nfs_show_status(__entry->error)
)
);
+#define DEFINE_NFS_XDR_EVENT(name) \
+ DEFINE_EVENT(nfs_xdr_event, name, \
+ TP_PROTO( \
+ const struct xdr_stream *xdr, \
+ int error \
+ ), \
+ TP_ARGS(xdr, error))
+DEFINE_NFS_XDR_EVENT(nfs_xdr_status);
+DEFINE_NFS_XDR_EVENT(nfs_xdr_bad_filehandle);
#endif /* _TRACE_NFS_H */
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 78c9c4b..6c20b28 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -577,7 +577,7 @@ static void nfs_clear_request(struct nfs_page *req)
}
/**
- * nfs_release_request - Release the count on an NFS read/write request
+ * nfs_free_request - Release the count on an NFS read/write request
* @req: request to release
*
* Note: Should never be called with the spinlock held!
@@ -1152,7 +1152,7 @@ nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
}
/**
- * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
+ * __nfs_pageio_add_request - Attempt to coalesce a request into a page list.
* @desc: destination io descriptor
* @req: request
*
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 102b66e..03e0b34 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1344,7 +1344,7 @@ _pnfs_return_layout(struct inode *ino)
}
valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list);
- pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0);
+ pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
struct pnfs_layout_range range = {
@@ -2410,9 +2410,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
.iomode = IOMODE_ANY,
.length = NFS4_MAX_UINT64,
};
- pnfs_set_plh_return_info(lo, IOMODE_ANY, 0);
- pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
goto out_forget;
} else {
/* We have a completely new layout */
@@ -2468,6 +2466,9 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
assert_spin_locked(&lo->plh_inode->i_lock);
+ if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+ tmp_list = &lo->plh_return_segs;
+
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
dprintk("%s: marking lseg %p iomode %d "
@@ -2475,6 +2476,8 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
lseg, lseg->pls_range.iomode,
lseg->pls_range.offset,
lseg->pls_range.length);
+ if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+ tmp_list = &lo->plh_return_segs;
if (mark_lseg_invalid(lseg, tmp_list))
continue;
remaining++;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 73ab7c5..ea19dbf 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -91,6 +91,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
+ info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4aaa1f5..19a212f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -116,16 +116,12 @@ static void unregister_nfs4_fs(void)
#ifdef CONFIG_NFS_V4_2
static void nfs_ssc_register_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs_ssc_register(&nfs_ssc_clnt_ops_tbl);
-#endif
}
static void nfs_ssc_unregister_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl);
-#endif
}
#endif /* CONFIG_NFS_V4_2 */
@@ -867,7 +863,7 @@ static int nfs_request_mount(struct fs_context *fc,
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request);
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f05a903..3bf8217 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -764,9 +764,6 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
* with invalidate/truncate.
*/
spin_lock(&mapping->private_lock);
- if (!nfs_have_writebacks(inode) &&
- NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
- inode_inc_iversion_raw(inode);
if (likely(!PageSwapCache(req->wb_page))) {
set_bit(PG_MAPPED, &req->wb_flags);
SetPagePrivate(req->wb_page);
@@ -1293,7 +1290,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
if (nfs_have_delegated_attributes(inode))
goto out;
if (nfsi->cache_validity &
- (NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
+ (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE))
return false;
smp_rmb();
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
@@ -1604,7 +1601,7 @@ static int nfs_writeback_done(struct rpc_task *task,
/* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
spin_unlock(&inode->i_lock);
}
return 0;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 5fa38ad..f229172 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -138,7 +138,7 @@
config NFSD_V4_2_INTER_SSC
bool "NFSv4.2 inter server to server COPY"
- depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
+ depends on NFSD_V4 && NFS_V4_2
help
This option enables support for NFSv4.2 inter server to
server copy where the destination server calls the NFSv4.2
diff --git a/fs/unicode/.gitignore b/fs/unicode/.gitignore
index 9b2467e..3612945 100644
--- a/fs/unicode/.gitignore
+++ b/fs/unicode/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-mkutf8data
-utf8data.h
+/mkutf8data
+/utf8data.h
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3bd3ee6..3594869 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -313,12 +313,12 @@ struct blk_mq_ops {
*/
void (*put_budget)(struct request_queue *, int);
- /*
- * @set_rq_budget_toekn: store rq's budget token
+ /**
+ * @set_rq_budget_token: store rq's budget token
*/
void (*set_rq_budget_token)(struct request *, int);
- /*
- * @get_rq_budget_toekn: retrieve rq's budget token
+ /**
+ * @get_rq_budget_token: retrieve rq's budget token
*/
int (*get_rq_budget_token)(struct request *);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6023a13..0684151 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -302,10 +302,11 @@ struct bpf_verifier_state_list {
};
/* Possible states for alu_state member. */
-#define BPF_ALU_SANITIZE_SRC 1U
-#define BPF_ALU_SANITIZE_DST 2U
+#define BPF_ALU_SANITIZE_SRC (1U << 0)
+#define BPF_ALU_SANITIZE_DST (1U << 1)
#define BPF_ALU_NEG_VALUE (1U << 2)
#define BPF_ALU_NON_POINTER (1U << 3)
+#define BPF_ALU_IMMEDIATE (1U << 4)
#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \
BPF_ALU_SANITIZE_DST)
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index bceb064..4d7fced 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -71,6 +71,19 @@ static inline void exception_exit(enum ctx_state prev_ctx)
}
}
+static __always_inline bool context_tracking_guest_enter(void)
+{
+ if (context_tracking_enabled())
+ __context_tracking_enter(CONTEXT_GUEST);
+
+ return context_tracking_enabled_this_cpu();
+}
+
+static __always_inline void context_tracking_guest_exit(void)
+{
+ if (context_tracking_enabled())
+ __context_tracking_exit(CONTEXT_GUEST);
+}
/**
* ct_state() - return the current context tracking state if known
@@ -92,6 +105,9 @@ static inline void user_exit_irqoff(void) { }
static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
+static inline bool context_tracking_guest_enter(void) { return false; }
+static inline void context_tracking_guest_exit(void) { }
+
#endif /* !CONFIG_CONTEXT_TRACKING */
#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
@@ -102,80 +118,4 @@ extern void context_tracking_init(void);
static inline void context_tracking_init(void) { }
#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-/* must be called with irqs disabled */
-static __always_inline void guest_enter_irqoff(void)
-{
- instrumentation_begin();
- if (vtime_accounting_enabled_this_cpu())
- vtime_guest_enter(current);
- else
- current->flags |= PF_VCPU;
- instrumentation_end();
-
- if (context_tracking_enabled())
- __context_tracking_enter(CONTEXT_GUEST);
-
- /* KVM does not hold any references to rcu protected data when it
- * switches CPU into a guest mode. In fact switching to a guest mode
- * is very similar to exiting to userspace from rcu point of view. In
- * addition CPU may stay in a guest mode for quite a long time (up to
- * one time slice). Lets treat guest mode as quiescent state, just like
- * we do with user-mode execution.
- */
- if (!context_tracking_enabled_this_cpu()) {
- instrumentation_begin();
- rcu_virt_note_context_switch(smp_processor_id());
- instrumentation_end();
- }
-}
-
-static __always_inline void guest_exit_irqoff(void)
-{
- if (context_tracking_enabled())
- __context_tracking_exit(CONTEXT_GUEST);
-
- instrumentation_begin();
- if (vtime_accounting_enabled_this_cpu())
- vtime_guest_exit(current);
- else
- current->flags &= ~PF_VCPU;
- instrumentation_end();
-}
-
-#else
-static __always_inline void guest_enter_irqoff(void)
-{
- /*
- * This is running in ioctl context so its safe
- * to assume that it's the stime pending cputime
- * to flush.
- */
- instrumentation_begin();
- vtime_account_kernel(current);
- current->flags |= PF_VCPU;
- rcu_virt_note_context_switch(smp_processor_id());
- instrumentation_end();
-}
-
-static __always_inline void guest_exit_irqoff(void)
-{
- instrumentation_begin();
- /* Flush the guest cputime we spent on the guest */
- vtime_account_kernel(current);
- current->flags &= ~PF_VCPU;
- instrumentation_end();
-}
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-
-static inline void guest_exit(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- guest_exit_irqoff();
- local_irq_restore(flags);
-}
-
#endif
diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h
index 24a59cb..cc8fa10 100644
--- a/include/linux/kconfig.h
+++ b/include/linux/kconfig.h
@@ -70,10 +70,4 @@
*/
#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
-/*
- * IF_ENABLED(CONFIG_FOO, ptr) evaluates to (ptr) if CONFIG_FOO is set to 'y'
- * or 'm', NULL otherwise.
- */
-#define IF_ENABLED(option, ptr) (IS_ENABLED(option) ? (ptr) : NULL)
-
#endif /* __LINUX_KCONFIG_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 09035ac..15d8bad 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -41,6 +41,8 @@
*/
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL)
+
#define u64_to_user_ptr(x) ( \
{ \
typecheck(u64, (x)); \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8895b95..2f34487 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -338,6 +338,51 @@ struct kvm_vcpu {
struct kvm_dirty_ring dirty_ring;
};
+/* must be called with irqs disabled */
+static __always_inline void guest_enter_irqoff(void)
+{
+ /*
+ * This is running in ioctl context so its safe to assume that it's the
+ * stime pending cputime to flush.
+ */
+ instrumentation_begin();
+ vtime_account_guest_enter();
+ instrumentation_end();
+
+ /*
+ * KVM does not hold any references to rcu protected data when it
+ * switches CPU into a guest mode. In fact switching to a guest mode
+ * is very similar to exiting to userspace from rcu point of view. In
+ * addition CPU may stay in a guest mode for quite a long time (up to
+ * one time slice). Lets treat guest mode as quiescent state, just like
+ * we do with user-mode execution.
+ */
+ if (!context_tracking_guest_enter()) {
+ instrumentation_begin();
+ rcu_virt_note_context_switch(smp_processor_id());
+ instrumentation_end();
+ }
+}
+
+static __always_inline void guest_exit_irqoff(void)
+{
+ context_tracking_guest_exit();
+
+ instrumentation_begin();
+ /* Flush the guest cputime we spent on the guest */
+ vtime_account_guest_exit();
+ instrumentation_end();
+}
+
+static inline void guest_exit(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ guest_exit_irqoff();
+ local_irq_restore(flags);
+}
+
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{
/*
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 2aab961..4f9a4b3 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -53,8 +53,7 @@ int arpt_register_table(struct net *net, const struct xt_table *table,
const struct arpt_replace *repl,
const struct nf_hook_ops *ops);
void arpt_unregister_table(struct net *net, const char *name);
-void arpt_unregister_table_pre_exit(struct net *net, const char *name,
- const struct nf_hook_ops *ops);
+void arpt_unregister_table_pre_exit(struct net *net, const char *name);
extern unsigned int arpt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 5b4c67c9..15004c46 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -452,6 +452,7 @@ enum lock_type4 {
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_MODE_UMASK (1UL << 17)
#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18)
@@ -709,6 +710,14 @@ struct nl4_server {
} u;
};
+enum nfs4_change_attr_type {
+ NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+ NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+ NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+ NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+ NFS4_CHANGE_TYPE_IS_UNDEFINED = 4,
+};
+
/*
* Options for setxattr. These match the flags for setxattr(2).
*/
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index eadaabd..ffba254 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -246,11 +246,15 @@ struct nfs4_copy_state {
BIT(13) /* Deferred cache invalidation */
#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */
#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */
+#define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */
+#define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */
#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \
| NFS_INO_INVALID_CTIME \
| NFS_INO_INVALID_MTIME \
| NFS_INO_INVALID_SIZE \
+ | NFS_INO_INVALID_NLINK \
+ | NFS_INO_INVALID_MODE \
| NFS_INO_INVALID_OTHER) /* inode metadata is invalid */
/*
@@ -386,7 +390,7 @@ extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
extern int nfs_permission(struct user_namespace *, struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_attribute_cache_expired(struct inode *inode);
-extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
+extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_clear_invalid_mapping(struct address_space *mapping);
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a28d71b..d71a0e9 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -156,6 +156,7 @@ struct nfs_server {
#define NFS_MOUNT_WRITE_EAGER 0x01000000
#define NFS_MOUNT_WRITE_WAIT 0x02000000
+ unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
unsigned int rpages; /* read size (in pages) */
@@ -180,6 +181,9 @@ struct nfs_server {
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
+ enum nfs4_change_attr_type
+ change_attr_type;/* Description of change attribute */
+
struct nfs_fsid fsid;
__u64 maxfilesize; /* maximum file size */
struct timespec64 time_delta; /* smallest time granularity */
@@ -265,16 +269,7 @@ struct nfs_server {
#define NFS_CAP_SYMLINKS (1U << 2)
#define NFS_CAP_ACLS (1U << 3)
#define NFS_CAP_ATOMIC_OPEN (1U << 4)
-/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */
#define NFS_CAP_LGOPEN (1U << 5)
-#define NFS_CAP_FILEID (1U << 6)
-#define NFS_CAP_MODE (1U << 7)
-#define NFS_CAP_NLINK (1U << 8)
-#define NFS_CAP_OWNER (1U << 9)
-#define NFS_CAP_OWNER_GROUP (1U << 10)
-#define NFS_CAP_ATIME (1U << 11)
-#define NFS_CAP_CTIME (1U << 12)
-#define NFS_CAP_MTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3327239..717ecc8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -15,6 +15,8 @@
#define NFS_DEF_FILE_IO_SIZE (4096U)
#define NFS_MIN_FILE_IO_SIZE (1024U)
+#define NFS_BITMASK_SZ 3
+
struct nfs4_string {
unsigned int len;
char *data;
@@ -150,6 +152,8 @@ struct nfs_fsinfo {
__u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
+ enum nfs4_change_attr_type
+ change_attr_type; /* Info about change attr */
__u32 xattr_support; /* User xattrs supported */
};
@@ -525,7 +529,8 @@ struct nfs_closeargs {
struct nfs_seqid * seqid;
fmode_t fmode;
u32 share_access;
- u32 * bitmask;
+ const u32 * bitmask;
+ u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args;
};
@@ -608,7 +613,8 @@ struct nfs4_delegreturnargs {
struct nfs4_sequence_args seq_args;
const struct nfs_fh *fhandle;
const nfs4_stateid *stateid;
- u32 * bitmask;
+ const u32 *bitmask;
+ u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args;
};
@@ -648,7 +654,8 @@ struct nfs_pgio_args {
union {
unsigned int replen; /* used by read */
struct {
- u32 * bitmask; /* used by write */
+ const u32 * bitmask; /* used by write */
+ u32 bitmask_store[NFS_BITMASK_SZ]; /* used by write */
enum nfs3_stable_how stable; /* used by write */
};
};
diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index 6035d9a..45f53af 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -5679,6 +5679,7 @@ enum tcpc_cc_polarity {
#define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0)
#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1)
+#define PD_STATUS_EVENT_HARD_RESET BIT(2)
struct ec_params_typec_status {
uint8_t port;
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 669e35c..510519e 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,7 +53,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask);
-int smp_call_function_single_async(int cpu, call_single_data_t *csd);
+int smp_call_function_single_async(int cpu, struct __call_single_data *csd);
/*
* Cpus stopping functions in panic. All have default weak definitions.
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index d2e97ee..d81fe8b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -247,6 +247,7 @@ struct rpc_xprt {
struct rpc_task * snd_task; /* Task blocked in send */
struct list_head xmit_queue; /* Send queue */
+ atomic_long_t xmit_queuelen;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 041d652..3684487 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -3,12 +3,46 @@
#define _LINUX_KERNEL_VTIME_H
#include <linux/context_tracking_state.h>
+#include <linux/sched.h>
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm/vtime.h>
#endif
+/*
+ * Common vtime APIs
+ */
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void vtime_account_kernel(struct task_struct *tsk);
+extern void vtime_account_idle(struct task_struct *tsk);
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
-struct task_struct;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_user_enter(struct task_struct *tsk);
+extern void vtime_user_exit(struct task_struct *tsk);
+extern void vtime_guest_enter(struct task_struct *tsk);
+extern void vtime_guest_exit(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk, int cpu);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
+static inline void vtime_user_enter(struct task_struct *tsk) { }
+static inline void vtime_user_exit(struct task_struct *tsk) { }
+static inline void vtime_guest_enter(struct task_struct *tsk) { }
+static inline void vtime_guest_exit(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset);
+extern void vtime_account_softirq(struct task_struct *tsk);
+extern void vtime_account_hardirq(struct task_struct *tsk);
+extern void vtime_flush(struct task_struct *tsk);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { }
+static inline void vtime_account_softirq(struct task_struct *tsk) { }
+static inline void vtime_account_hardirq(struct task_struct *tsk) { }
+static inline void vtime_flush(struct task_struct *tsk) { }
+#endif
/*
* vtime_accounting_enabled_this_cpu() definitions/declarations
@@ -18,6 +52,18 @@ struct task_struct;
static inline bool vtime_accounting_enabled_this_cpu(void) { return true; }
extern void vtime_task_switch(struct task_struct *prev);
+static __always_inline void vtime_account_guest_enter(void)
+{
+ vtime_account_kernel(current);
+ current->flags |= PF_VCPU;
+}
+
+static __always_inline void vtime_account_guest_exit(void)
+{
+ vtime_account_kernel(current);
+ current->flags &= ~PF_VCPU;
+}
+
#elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
/*
@@ -49,49 +95,37 @@ static inline void vtime_task_switch(struct task_struct *prev)
vtime_task_switch_generic(prev);
}
+static __always_inline void vtime_account_guest_enter(void)
+{
+ if (vtime_accounting_enabled_this_cpu())
+ vtime_guest_enter(current);
+ else
+ current->flags |= PF_VCPU;
+}
+
+static __always_inline void vtime_account_guest_exit(void)
+{
+ if (vtime_accounting_enabled_this_cpu())
+ vtime_guest_exit(current);
+ else
+ current->flags &= ~PF_VCPU;
+}
+
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-static inline bool vtime_accounting_enabled_cpu(int cpu) {return false; }
static inline bool vtime_accounting_enabled_this_cpu(void) { return false; }
static inline void vtime_task_switch(struct task_struct *prev) { }
-#endif
+static __always_inline void vtime_account_guest_enter(void)
+{
+ current->flags |= PF_VCPU;
+}
-/*
- * Common vtime APIs
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void vtime_account_kernel(struct task_struct *tsk);
-extern void vtime_account_idle(struct task_struct *tsk);
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-static inline void vtime_account_kernel(struct task_struct *tsk) { }
-#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
+static __always_inline void vtime_account_guest_exit(void)
+{
+ current->flags &= ~PF_VCPU;
+}
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_user_enter(struct task_struct *tsk);
-extern void vtime_user_exit(struct task_struct *tsk);
-extern void vtime_guest_enter(struct task_struct *tsk);
-extern void vtime_guest_exit(struct task_struct *tsk);
-extern void vtime_init_idle(struct task_struct *tsk, int cpu);
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-static inline void vtime_user_enter(struct task_struct *tsk) { }
-static inline void vtime_user_exit(struct task_struct *tsk) { }
-static inline void vtime_guest_enter(struct task_struct *tsk) { }
-static inline void vtime_guest_exit(struct task_struct *tsk) { }
-static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
-#endif
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-extern void vtime_account_irq(struct task_struct *tsk, unsigned int offset);
-extern void vtime_account_softirq(struct task_struct *tsk);
-extern void vtime_account_hardirq(struct task_struct *tsk);
-extern void vtime_flush(struct task_struct *tsk);
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-static inline void vtime_account_irq(struct task_struct *tsk, unsigned int offset) { }
-static inline void vtime_account_softirq(struct task_struct *tsk) { }
-static inline void vtime_account_hardirq(struct task_struct *tsk) { }
-static inline void vtime_flush(struct task_struct *tsk) { }
#endif
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index e8df72e..5e84888 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -68,7 +68,6 @@ enum sctp_verb {
SCTP_CMD_ASSOC_FAILED, /* Handle association failure. */
SCTP_CMD_DISCARD_PACKET, /* Discard the whole packet. */
SCTP_CMD_GEN_SHUTDOWN, /* Generate a SHUTDOWN chunk. */
- SCTP_CMD_UPDATE_ASSOC, /* Update association information. */
SCTP_CMD_PURGE_OUTQUEUE, /* Purge all data waiting to be sent. */
SCTP_CMD_SETUP_T2, /* Hi-level, setup T2-shutdown parms. */
SCTP_CMD_RTO_PENDING, /* Set transport's rto_pending. */
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index c838e7a..bd55908 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -60,6 +60,46 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
), \
TP_ARGS(wc, cid))
+DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class,
+ TP_PROTO(
+ const struct ib_wc *wc,
+ const struct rpc_rdma_cid *cid
+ ),
+
+ TP_ARGS(wc, cid),
+
+ TP_STRUCT__entry(
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(unsigned long, status)
+ __field(unsigned int, vendor_err)
+ ),
+
+ TP_fast_assign(
+ __entry->cq_id = cid->ci_queue_id;
+ __entry->completion_id = cid->ci_completion_id;
+ __entry->status = wc->status;
+ if (wc->status)
+ __entry->vendor_err = wc->vendor_err;
+ else
+ __entry->vendor_err = 0;
+ ),
+
+ TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)",
+ __entry->cq_id, __entry->completion_id,
+ rdma_show_wc_status(__entry->status),
+ __entry->status, __entry->vendor_err
+ )
+);
+
+#define DEFINE_MR_COMPLETION_EVENT(name) \
+ DEFINE_EVENT(rpcrdma_mr_completion_class, name, \
+ TP_PROTO( \
+ const struct ib_wc *wc, \
+ const struct rpc_rdma_cid *cid \
+ ), \
+ TP_ARGS(wc, cid))
+
DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
TP_PROTO(
const struct ib_wc *wc,
@@ -150,19 +190,17 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt,
TP_ARGS(r_xprt),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p",
- __get_str(addr), __get_str(port), __entry->r_xprt
+ TP_printk("peer=[%s]:%s",
+ __get_str(addr), __get_str(port)
)
);
@@ -182,7 +220,6 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
TP_ARGS(r_xprt, rc),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(int, rc)
__field(int, connect_status)
__string(addr, rpcrdma_addrstr(r_xprt))
@@ -190,15 +227,14 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->rc = rc;
__entry->connect_status = r_xprt->rx_ep->re_connect_status;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s rc=%d connection status=%d",
+ __get_str(addr), __get_str(port),
__entry->rc, __entry->connect_status
)
);
@@ -343,7 +379,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class,
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
@@ -384,7 +420,7 @@ DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class,
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
@@ -495,22 +531,19 @@ TRACE_EVENT(xprtrdma_op_connect,
TP_ARGS(r_xprt, delay),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned long, delay)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->delay = delay;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu",
- __get_str(addr), __get_str(port), __entry->r_xprt,
- __entry->delay
+ TP_printk("peer=[%s]:%s delay=%lu",
+ __get_str(addr), __get_str(port), __entry->delay
)
);
@@ -525,7 +558,6 @@ TRACE_EVENT(xprtrdma_op_set_cto,
TP_ARGS(r_xprt, connect, reconnect),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned long, connect)
__field(unsigned long, reconnect)
__string(addr, rpcrdma_addrstr(r_xprt))
@@ -533,51 +565,18 @@ TRACE_EVENT(xprtrdma_op_set_cto,
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->connect = connect;
__entry->reconnect = reconnect;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu",
+ __get_str(addr), __get_str(port),
__entry->connect / HZ, __entry->reconnect / HZ
)
);
-TRACE_EVENT(xprtrdma_qp_event,
- TP_PROTO(
- const struct rpcrdma_ep *ep,
- const struct ib_event *event
- ),
-
- TP_ARGS(ep, event),
-
- TP_STRUCT__entry(
- __field(unsigned long, event)
- __string(name, event->device->name)
- __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
- __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
- ),
-
- TP_fast_assign(
- const struct rdma_cm_id *id = ep->re_id;
-
- __entry->event = event->event;
- __assign_str(name, event->device->name);
- memcpy(__entry->srcaddr, &id->route.addr.src_addr,
- sizeof(struct sockaddr_in6));
- memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
- sizeof(struct sockaddr_in6));
- ),
-
- TP_printk("%pISpc -> %pISpc device=%s %s (%lu)",
- __entry->srcaddr, __entry->dstaddr, __get_str(name),
- rdma_show_ib_event(__entry->event), __entry->event
- )
-);
-
/**
** Call events
**/
@@ -591,22 +590,19 @@ TRACE_EVENT(xprtrdma_createmrs,
TP_ARGS(r_xprt, count),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
__field(unsigned int, count)
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->count = count;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
- __get_str(addr), __get_str(port), __entry->r_xprt,
- __entry->count
+ TP_printk("peer=[%s]:%s created %u MRs",
+ __get_str(addr), __get_str(port), __entry->count
)
);
@@ -829,7 +825,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
TP_ARGS(r_xprt, count, status),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
+ __field(u32, cq_id)
__field(unsigned int, count)
__field(int, status)
__field(int, posted)
@@ -838,16 +834,18 @@ TRACE_EVENT(xprtrdma_post_recvs,
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
+ const struct rpcrdma_ep *ep = r_xprt->rx_ep;
+
+ __entry->cq_id = ep->re_attr.recv_cq->res.id;
__entry->count = count;
__entry->status = status;
- __entry->posted = r_xprt->rx_ep->re_receive_count;
+ __entry->posted = ep->re_receive_count;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)",
+ __get_str(addr), __get_str(port), __entry->cq_id,
__entry->count, __entry->posted, __entry->status
)
);
@@ -886,10 +884,10 @@ TRACE_EVENT(xprtrdma_post_linv_err,
DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done);
TRACE_EVENT(xprtrdma_frwr_alloc,
TP_PROTO(
@@ -905,7 +903,7 @@ TRACE_EVENT(xprtrdma_frwr_alloc,
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->rc = rc;
),
@@ -933,7 +931,7 @@ TRACE_EVENT(xprtrdma_frwr_dereg,
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
@@ -966,7 +964,7 @@ TRACE_EVENT(xprtrdma_frwr_sgerr,
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir;
__entry->nents = sg_nents;
@@ -996,7 +994,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir;
__entry->num_mapped = num_mapped;
@@ -1010,11 +1008,12 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
)
);
+DEFINE_MR_EVENT(fastreg);
DEFINE_MR_EVENT(localinv);
+DEFINE_MR_EVENT(reminv);
DEFINE_MR_EVENT(map);
DEFINE_ANON_MR_EVENT(unmap);
-DEFINE_ANON_MR_EVENT(recycle);
TRACE_EVENT(xprtrdma_dma_maperr,
TP_PROTO(
@@ -1248,22 +1247,19 @@ TRACE_EVENT(xprtrdma_cb_setup,
TP_ARGS(r_xprt, reqs),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned int, reqs)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->reqs = reqs;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs",
- __get_str(addr), __get_str(port),
- __entry->r_xprt, __entry->reqs
+ TP_printk("peer=[%s]:%s %u reqs",
+ __get_str(addr), __get_str(port), __entry->reqs
)
);
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index bda16e9..d02e01a 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1079,6 +1079,46 @@ TRACE_EVENT(xprt_transmit,
__entry->seqno, __entry->status)
);
+TRACE_EVENT(xprt_retransmit,
+ TP_PROTO(
+ const struct rpc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(int, ntrans)
+ __field(int, version)
+ __string(progname,
+ rqst->rq_task->tk_client->cl_program->name)
+ __string(procedure,
+ rqst->rq_task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_fast_assign(
+ struct rpc_task *task = rqst->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->ntrans = rqst->rq_ntrans;
+ __assign_str(progname,
+ task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_printk(
+ "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->ntrans)
+);
+
TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status),
@@ -1141,7 +1181,6 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
DEFINE_WRITELOCK_EVENT(reserve_xprt);
DEFINE_WRITELOCK_EVENT(release_xprt);
-DEFINE_WRITELOCK_EVENT(transmit_queued);
DECLARE_EVENT_CLASS(xprt_cong_event,
TP_PROTO(
diff --git a/include/uapi/linux/netfilter/xt_SECMARK.h b/include/uapi/linux/netfilter/xt_SECMARK.h
index 1f2a7084..beb2cad 100644
--- a/include/uapi/linux/netfilter/xt_SECMARK.h
+++ b/include/uapi/linux/netfilter/xt_SECMARK.h
@@ -20,4 +20,10 @@ struct xt_secmark_target_info {
char secctx[SECMARK_SECCTX_MAX];
};
+struct xt_secmark_target_info_v1 {
+ __u8 mode;
+ char secctx[SECMARK_SECCTX_MAX];
+ __u32 secid;
+};
+
#endif /*_XT_SECMARK_H_target */
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index 3b39ef1d..5ae3ace8 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -27,6 +27,7 @@ enum {
SEG6_LOCAL_OIF,
SEG6_LOCAL_BPF,
SEG6_LOCAL_VRFTABLE,
+ SEG6_LOCAL_COUNTERS,
__SEG6_LOCAL_MAX,
};
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
@@ -78,4 +79,33 @@ enum {
#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
+/* SRv6 Behavior counters are encoded as netlink attributes guaranteeing the
+ * correct alignment.
+ * Each counter is identified by a different attribute type (i.e.
+ * SEG6_LOCAL_CNT_PACKETS).
+ *
+ * - SEG6_LOCAL_CNT_PACKETS: identifies a counter that counts the number of
+ * packets that have been CORRECTLY processed by an SRv6 Behavior instance
+ * (i.e., packets that generate errors or are dropped are NOT counted).
+ *
+ * - SEG6_LOCAL_CNT_BYTES: identifies a counter that counts the total amount
+ * of traffic in bytes of all packets that have been CORRECTLY processed by
+ * an SRv6 Behavior instance (i.e., packets that generate errors or are
+ * dropped are NOT counted).
+ *
+ * - SEG6_LOCAL_CNT_ERRORS: identifies a counter that counts the number of
+ * packets that have NOT been properly processed by an SRv6 Behavior instance
+ * (i.e., packets that generate errors or are dropped).
+ */
+enum {
+ SEG6_LOCAL_CNT_UNSPEC,
+ SEG6_LOCAL_CNT_PAD, /* pad for 64 bits values */
+ SEG6_LOCAL_CNT_PACKETS,
+ SEG6_LOCAL_CNT_BYTES,
+ SEG6_LOCAL_CNT_ERRORS,
+ __SEG6_LOCAL_CNT_MAX,
+};
+
+#define SEG6_LOCAL_CNT_MAX (__SEG6_LOCAL_CNT_MAX - 1)
+
#endif
diff --git a/kernel/.gitignore b/kernel/.gitignore
index 78701ea..c6b299a 100644
--- a/kernel/.gitignore
+++ b/kernel/.gitignore
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-kheaders.md5
-timeconst.h
-hz.bc
+/config_data
+/kheaders.md5
diff --git a/kernel/Makefile b/kernel/Makefile
index e8a6715f..4df609b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -142,10 +142,15 @@
$(obj)/configs.o: $(obj)/config_data.gz
-targets += config_data.gz
-$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
+targets += config_data config_data.gz
+$(obj)/config_data.gz: $(obj)/config_data FORCE
$(call if_changed,gzip)
+filechk_cat = cat $<
+
+$(obj)/config_data: $(KCONFIG_CONFIG) FORCE
+ $(call filechk,cat)
+
$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz
quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8fd552c..757476c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6496,6 +6496,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
{
struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
struct bpf_verifier_state *vstate = env->cur_state;
+ bool off_is_imm = tnum_is_const(off_reg->var_off);
bool off_is_neg = off_reg->smin_value < 0;
bool ptr_is_dst_reg = ptr_reg == dst_reg;
u8 opcode = BPF_OP(insn->code);
@@ -6526,6 +6527,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
alu_limit = abs(tmp_aux->alu_limit - alu_limit);
} else {
alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+ alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
alu_state |= ptr_is_dst_reg ?
BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
}
@@ -12371,7 +12373,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
struct bpf_insn *patch = &insn_buf[0];
- bool issrc, isneg;
+ bool issrc, isneg, isimm;
u32 off_reg;
aux = &env->insn_aux_data[i + delta];
@@ -12382,28 +12384,29 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
BPF_ALU_SANITIZE_SRC;
+ isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
off_reg = issrc ? insn->src_reg : insn->dst_reg;
- if (isneg)
- *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
- *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
- *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
- *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
- if (issrc) {
- *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
- off_reg);
- insn->src_reg = BPF_REG_AX;
+ if (isimm) {
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
} else {
- *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
- BPF_REG_AX);
+ if (isneg)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
}
+ if (!issrc)
+ *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
+ insn->src_reg = BPF_REG_AX;
if (isneg)
insn->code = insn->code == code_add ?
code_sub : code_add;
*patch++ = *insn;
- if (issrc && isneg)
+ if (issrc && isneg && !isimm)
*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
cnt = patch - insn_buf;
diff --git a/kernel/futex.c b/kernel/futex.c
index c98b825..4938a00 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3710,8 +3710,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
if (op & FUTEX_CLOCK_REALTIME) {
flags |= FLAGS_CLOCKRT;
- if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
- cmd != FUTEX_WAIT_REQUEUE_PI)
+ if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
}
@@ -3758,42 +3757,52 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
return -ENOSYS;
}
+static __always_inline bool futex_cmd_has_timeout(u32 cmd)
+{
+ switch (cmd) {
+ case FUTEX_WAIT:
+ case FUTEX_LOCK_PI:
+ case FUTEX_WAIT_BITSET:
+ case FUTEX_WAIT_REQUEUE_PI:
+ return true;
+ }
+ return false;
+}
+
+static __always_inline int
+futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
+{
+ if (!timespec64_valid(ts))
+ return -EINVAL;
+
+ *t = timespec64_to_ktime(*ts);
+ if (cmd == FUTEX_WAIT)
+ *t = ktime_add_safe(ktime_get(), *t);
+ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
+ *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
+ return 0;
+}
SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
const struct __kernel_timespec __user *, utime,
u32 __user *, uaddr2, u32, val3)
{
- struct timespec64 ts;
+ int ret, cmd = op & FUTEX_CMD_MASK;
ktime_t t, *tp = NULL;
- u32 val2 = 0;
- int cmd = op & FUTEX_CMD_MASK;
+ struct timespec64 ts;
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET ||
- cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ if (utime && futex_cmd_has_timeout(cmd)) {
if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
return -EFAULT;
if (get_timespec64(&ts, utime))
return -EFAULT;
- if (!timespec64_valid(&ts))
- return -EINVAL;
-
- t = timespec64_to_ktime(ts);
- if (cmd == FUTEX_WAIT)
- t = ktime_add_safe(ktime_get(), t);
- else if (!(op & FUTEX_CLOCK_REALTIME))
- t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
+ ret = futex_init_timeout(cmd, op, &ts, &t);
+ if (ret)
+ return ret;
tp = &t;
}
- /*
- * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
- * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
- */
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
- cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- val2 = (u32) (unsigned long) utime;
- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
#ifdef CONFIG_COMPAT
@@ -3959,31 +3968,20 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
u32, val3)
{
- struct timespec64 ts;
+ int ret, cmd = op & FUTEX_CMD_MASK;
ktime_t t, *tp = NULL;
- int val2 = 0;
- int cmd = op & FUTEX_CMD_MASK;
+ struct timespec64 ts;
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET ||
- cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ if (utime && futex_cmd_has_timeout(cmd)) {
if (get_old_timespec32(&ts, utime))
return -EFAULT;
- if (!timespec64_valid(&ts))
- return -EINVAL;
-
- t = timespec64_to_ktime(ts);
- if (cmd == FUTEX_WAIT)
- t = ktime_add_safe(ktime_get(), t);
- else if (!(op & FUTEX_CLOCK_REALTIME))
- t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
+ ret = futex_init_timeout(cmd, op, &ts, &t);
+ if (ret)
+ return ret;
tp = &t;
}
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
- cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
- val2 = (int) (unsigned long) utime;
- return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
#endif /* CONFIG_COMPAT_32BIT_TIME */
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index b94f383..ec36b73 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -66,12 +66,12 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
arch_spin_lock(&lock->wait_lock);
/* Try to acquire the lock directly if no reader is present */
- if (!atomic_read(&lock->cnts) &&
- (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
+ if (!(cnts = atomic_read(&lock->cnts)) &&
+ atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED))
goto unlock;
/* Set the waiting flag to notify readers that a writer is pending */
- atomic_add(_QW_WAITING, &lock->cnts);
+ atomic_or(_QW_WAITING, &lock->cnts);
/* When no more readers or writers, set the locked flag */
do {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9143163..5226cc2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -938,7 +938,7 @@ DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
{
- return clamp_value / UCLAMP_BUCKET_DELTA;
+ return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);
}
static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1d75af1..20aa234 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10878,16 +10878,22 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
{
struct cfs_rq *cfs_rq;
+ list_add_leaf_cfs_rq(cfs_rq_of(se));
+
/* Start to propagate at parent */
se = se->parent;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
- if (cfs_rq_throttled(cfs_rq))
- break;
+ if (!cfs_rq_throttled(cfs_rq)){
+ update_load_avg(cfs_rq, se, UPDATE_TG);
+ list_add_leaf_cfs_rq(cfs_rq);
+ continue;
+ }
- update_load_avg(cfs_rq, se, UPDATE_TG);
+ if (list_add_leaf_cfs_rq(cfs_rq))
+ break;
}
}
#else
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index db27b69..cc25a3c 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -972,7 +972,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
*/
void cgroup_move_task(struct task_struct *task, struct css_set *to)
{
- unsigned int task_flags = 0;
+ unsigned int task_flags;
struct rq_flags rf;
struct rq *rq;
@@ -987,15 +987,31 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
rq = task_rq_lock(task, &rf);
- if (task_on_rq_queued(task)) {
- task_flags = TSK_RUNNING;
- if (task_current(rq, task))
- task_flags |= TSK_ONCPU;
- } else if (task->in_iowait)
- task_flags = TSK_IOWAIT;
-
- if (task->in_memstall)
- task_flags |= TSK_MEMSTALL;
+ /*
+ * We may race with schedule() dropping the rq lock between
+ * deactivating prev and switching to next. Because the psi
+ * updates from the deactivation are deferred to the switch
+ * callback to save cgroup tree updates, the task's scheduling
+ * state here is not coherent with its psi state:
+ *
+ * schedule() cgroup_move_task()
+ * rq_lock()
+ * deactivate_task()
+ * p->on_rq = 0
+ * psi_dequeue() // defers TSK_RUNNING & TSK_IOWAIT updates
+ * pick_next_task()
+ * rq_unlock()
+ * rq_lock()
+ * psi_task_change() // old cgroup
+ * task->cgroups = to
+ * psi_task_change() // new cgroup
+ * rq_unlock()
+ * rq_lock()
+ * psi_sched_switch() // does deferred updates in new cgroup
+ *
+ * Don't rely on the scheduling state. Use psi_flags instead.
+ */
+ task_flags = task->psi_flags;
if (task_flags)
psi_task_change(task, task_flags, 0);
diff --git a/kernel/smp.c b/kernel/smp.c
index e210749..52bf159 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -211,7 +211,7 @@ static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type)
} while (0)
/* Record current CSD work for current CPU, NULL to erase. */
-static void __csd_lock_record(call_single_data_t *csd)
+static void __csd_lock_record(struct __call_single_data *csd)
{
if (!csd) {
smp_mb(); /* NULL cur_csd after unlock. */
@@ -226,13 +226,13 @@ static void __csd_lock_record(call_single_data_t *csd)
/* Or before unlock, as the case may be. */
}
-static __always_inline void csd_lock_record(call_single_data_t *csd)
+static __always_inline void csd_lock_record(struct __call_single_data *csd)
{
if (static_branch_unlikely(&csdlock_debug_enabled))
__csd_lock_record(csd);
}
-static int csd_lock_wait_getcpu(call_single_data_t *csd)
+static int csd_lock_wait_getcpu(struct __call_single_data *csd)
{
unsigned int csd_type;
@@ -282,7 +282,7 @@ static const char *csd_lock_get_type(unsigned int type)
return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type];
}
-static void csd_lock_print_extended(call_single_data_t *csd, int cpu)
+static void csd_lock_print_extended(struct __call_single_data *csd, int cpu)
{
struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu);
unsigned int srccpu = csd->node.src;
@@ -321,7 +321,7 @@ static void csd_lock_print_extended(call_single_data_t *csd, int cpu)
* the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
* so waiting on other types gets much less information.
*/
-static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
+static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id)
{
int cpu = -1;
int cpux;
@@ -387,7 +387,7 @@ static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, in
* previous function call. For multi-cpu calls its even more interesting
* as we'll have to ensure no other cpu is observing our csd.
*/
-static void __csd_lock_wait(call_single_data_t *csd)
+static void __csd_lock_wait(struct __call_single_data *csd)
{
int bug_id = 0;
u64 ts0, ts1;
@@ -401,7 +401,7 @@ static void __csd_lock_wait(call_single_data_t *csd)
smp_acquire__after_ctrl_dep();
}
-static __always_inline void csd_lock_wait(call_single_data_t *csd)
+static __always_inline void csd_lock_wait(struct __call_single_data *csd)
{
if (static_branch_unlikely(&csdlock_debug_enabled)) {
__csd_lock_wait(csd);
@@ -431,17 +431,17 @@ static void __smp_call_single_queue_debug(int cpu, struct llist_node *node)
#else
#define cfd_seq_store(var, src, dst, type)
-static void csd_lock_record(call_single_data_t *csd)
+static void csd_lock_record(struct __call_single_data *csd)
{
}
-static __always_inline void csd_lock_wait(call_single_data_t *csd)
+static __always_inline void csd_lock_wait(struct __call_single_data *csd)
{
smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK));
}
#endif
-static __always_inline void csd_lock(call_single_data_t *csd)
+static __always_inline void csd_lock(struct __call_single_data *csd)
{
csd_lock_wait(csd);
csd->node.u_flags |= CSD_FLAG_LOCK;
@@ -454,7 +454,7 @@ static __always_inline void csd_lock(call_single_data_t *csd)
smp_wmb();
}
-static __always_inline void csd_unlock(call_single_data_t *csd)
+static __always_inline void csd_unlock(struct __call_single_data *csd)
{
WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK));
@@ -501,7 +501,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
* for execution on the given CPU. data must already have
* ->func, ->info, and ->flags set.
*/
-static int generic_exec_single(int cpu, call_single_data_t *csd)
+static int generic_exec_single(int cpu, struct __call_single_data *csd)
{
if (cpu == smp_processor_id()) {
smp_call_func_t func = csd->func;
@@ -784,7 +784,7 @@ EXPORT_SYMBOL(smp_call_function_single);
* NOTE: Be careful, there is unfortunately no current debugging facility to
* validate the correctness of this serialization.
*/
-int smp_call_function_single_async(int cpu, call_single_data_t *csd)
+int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
{
int err = 0;
diff --git a/kernel/up.c b/kernel/up.c
index df50828..a38b8b0 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -25,7 +25,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
}
EXPORT_SYMBOL(smp_call_function_single);
-int smp_call_function_single_async(int cpu, call_single_data_t *csd)
+int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
{
unsigned long flags;
diff --git a/lib/.gitignore b/lib/.gitignore
index 327cb2c..5e7fa54 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-gen_crc32table
-gen_crc64table
-crc32table.h
-crc64table.h
-oid_registry_data.c
+/crc32table.h
+/crc64table.h
+/gen_crc32table
+/gen_crc64table
+/oid_registry_data.c
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 5b6116e..1d051ef 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -828,7 +828,7 @@ int nla_strcmp(const struct nlattr *nla, const char *str)
int attrlen = nla_len(nla);
int d;
- if (attrlen > 0 && buf[attrlen - 1] == '\0')
+ while (attrlen > 0 && buf[attrlen - 1] == '\0')
attrlen--;
d = attrlen - len;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 0456593..e4e6e99 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -103,8 +103,9 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev,
rcu_read_lock();
if (netif_is_bridge_port(dev)) {
- p = br_port_get_rcu(dev);
- vg = nbp_vlan_group_rcu(p);
+ p = br_port_get_check_rcu(dev);
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
} else if (dev->priv_flags & IFF_EBRIDGE) {
br = netdev_priv(dev);
vg = br_vlan_group_rcu(br);
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 290012d..88d8a02 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -387,7 +387,8 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
int ret;
ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- ðtool_genl_family, 0, ctx->ops->reply_cmd);
+ ðtool_genl_family, NLM_F_MULTI,
+ ctx->ops->reply_cmd);
if (!ehdr)
return -EMSGSIZE;
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index b218e45..6852e9b 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -520,6 +520,10 @@ static int fill_frame_info(struct hsr_frame_info *frame,
struct ethhdr *ethhdr;
__be16 proto;
+ /* Check if skb contains hsr_ethhdr */
+ if (skb->mac_len < sizeof(struct hsr_ethhdr))
+ return -EINVAL;
+
memset(frame, 0, sizeof(*frame));
frame->is_supervision = is_supervision_frame(port->hsr, skb);
frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index cf20316..c53f14b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1556,13 +1556,12 @@ int arpt_register_table(struct net *net,
return ret;
}
-void arpt_unregister_table_pre_exit(struct net *net, const char *name,
- const struct nf_hook_ops *ops)
+void arpt_unregister_table_pre_exit(struct net *net, const char *name)
{
struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
if (table)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
EXPORT_SYMBOL(arpt_unregister_table_pre_exit);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index b8f45e9..6922612 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -54,7 +54,7 @@ static int __net_init arptable_filter_table_init(struct net *net)
static void __net_exit arptable_filter_net_pre_exit(struct net *net)
{
- arpt_unregister_table_pre_exit(net, "filter", arpfilter_ops);
+ arpt_unregister_table_pre_exit(net, "filter");
}
static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e14fd0c..f1c1f9e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2039,6 +2039,7 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
(__kernel_size_t)zc->msg_controllen;
cmsg_dummy.msg_flags = in_compat_syscall()
? MSG_CMSG_COMPAT : 0;
+ cmsg_dummy.msg_control_is_user = true;
zc->msg_flags = 0;
if (zc->msg_control == msg_control_addr &&
zc->msg_controllen == cmsg_dummy.msg_controllen) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 563d016..db5831e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -230,6 +230,10 @@ int tcp_set_default_congestion_control(struct net *net, const char *name)
ret = -ENOENT;
} else if (!bpf_try_module_get(ca, ca->owner)) {
ret = -EBUSY;
+ } else if (!net_eq(net, &init_net) &&
+ !(ca->flags & TCP_CONG_NON_RESTRICTED)) {
+ /* Only init netns can set default to a restricted algorithm */
+ ret = -EPERM;
} else {
prev = xchg(&net->ipv4.tcp_congestion_control, ca);
if (prev)
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index d2f8138..e412817 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -122,9 +122,6 @@ static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
hinfo = seg6_hmac_info_lookup(net, hmackeyid);
if (!slen) {
- if (!hinfo)
- err = -ENOENT;
-
err = seg6_hmac_info_del(net, hmackeyid);
goto out_unlock;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index bd71408..4ff38cb 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -93,6 +93,35 @@ struct seg6_end_dt_info {
int hdrlen;
};
+struct pcpu_seg6_local_counters {
+ u64_stats_t packets;
+ u64_stats_t bytes;
+ u64_stats_t errors;
+
+ struct u64_stats_sync syncp;
+};
+
+/* This struct groups all the SRv6 Behavior counters supported so far.
+ *
+ * put_nla_counters() makes use of this data structure to collect all counter
+ * values after the per-CPU counter evaluation has been performed.
+ * Finally, each counter value (in seg6_local_counters) is stored in the
+ * corresponding netlink attribute and sent to user space.
+ *
+ * NB: we don't want to expose this structure to user space!
+ */
+struct seg6_local_counters {
+ __u64 packets;
+ __u64 bytes;
+ __u64 errors;
+};
+
+#define seg6_local_alloc_pcpu_counters(__gfp) \
+ __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \
+ ((__gfp) | __GFP_ZERO))
+
+#define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
+
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
@@ -105,6 +134,7 @@ struct seg6_local_lwt {
#ifdef CONFIG_NET_L3_MASTER_DEV
struct seg6_end_dt_info dt_info;
#endif
+ struct pcpu_seg6_local_counters __percpu *pcpu_counters;
int headroom;
struct seg6_action_desc *desc;
@@ -878,36 +908,43 @@ static struct seg6_action_desc seg6_action_table[] = {
{
.action = SEG6_LOCAL_ACTION_END,
.attrs = 0,
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end,
},
{
.action = SEG6_LOCAL_ACTION_END_X,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_x,
},
{
.action = SEG6_LOCAL_ACTION_END_T,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_t,
},
{
.action = SEG6_LOCAL_ACTION_END_DX2,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_dx2,
},
{
.action = SEG6_LOCAL_ACTION_END_DX6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_dx6,
},
{
.action = SEG6_LOCAL_ACTION_END_DX4,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_dx4,
},
{
.action = SEG6_LOCAL_ACTION_END_DT4,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
#ifdef CONFIG_NET_L3_MASTER_DEV
.input = input_action_end_dt4,
.slwt_ops = {
@@ -919,30 +956,35 @@ static struct seg6_action_desc seg6_action_table[] = {
.action = SEG6_LOCAL_ACTION_END_DT6,
#ifdef CONFIG_NET_L3_MASTER_DEV
.attrs = 0,
- .optattrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
+ .optattrs = SEG6_F_LOCAL_COUNTERS |
+ SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
.slwt_ops = {
.build_state = seg6_end_dt6_build,
},
#else
.attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
#endif
.input = input_action_end_dt6,
},
{
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_b6,
},
{
.action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_b6_encap,
.static_headroom = sizeof(struct ipv6hdr),
},
{
.action = SEG6_LOCAL_ACTION_END_BPF,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_bpf,
},
@@ -963,11 +1005,36 @@ static struct seg6_action_desc *__get_action_desc(int action)
return NULL;
}
+static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
+{
+ return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
+}
+
+static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
+ unsigned int len, int err)
+{
+ struct pcpu_seg6_local_counters *pcounters;
+
+ pcounters = this_cpu_ptr(slwt->pcpu_counters);
+ u64_stats_update_begin(&pcounters->syncp);
+
+ if (likely(!err)) {
+ u64_stats_inc(&pcounters->packets);
+ u64_stats_add(&pcounters->bytes, len);
+ } else {
+ u64_stats_inc(&pcounters->errors);
+ }
+
+ u64_stats_update_end(&pcounters->syncp);
+}
+
static int seg6_local_input(struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc;
struct seg6_local_lwt *slwt;
+ unsigned int len = skb->len;
+ int rc;
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
@@ -977,7 +1044,14 @@ static int seg6_local_input(struct sk_buff *skb)
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
- return desc->input(skb, slwt);
+ rc = desc->input(skb, slwt);
+
+ if (!seg6_lwtunnel_counters_enabled(slwt))
+ return rc;
+
+ seg6_local_update_counters(slwt, len, rc);
+
+ return rc;
}
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
@@ -992,6 +1066,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
+ [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED },
};
static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@@ -1296,6 +1371,112 @@ static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
bpf_prog_put(slwt->bpf.prog);
}
+static const struct
+nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
+ [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 },
+ [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 },
+ [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 },
+};
+
+static int parse_nla_counters(struct nlattr **attrs,
+ struct seg6_local_lwt *slwt)
+{
+ struct pcpu_seg6_local_counters __percpu *pcounters;
+ struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
+ attrs[SEG6_LOCAL_COUNTERS],
+ seg6_local_counters_policy, NULL);
+ if (ret < 0)
+ return ret;
+
+ /* basic support for SRv6 Behavior counters requires at least:
+ * packets, bytes and errors.
+ */
+ if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
+ !tb[SEG6_LOCAL_CNT_ERRORS])
+ return -EINVAL;
+
+ /* counters are always zero initialized */
+ pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
+ if (!pcounters)
+ return -ENOMEM;
+
+ slwt->pcpu_counters = pcounters;
+
+ return 0;
+}
+
+static int seg6_local_fill_nla_counters(struct sk_buff *skb,
+ struct seg6_local_counters *counters)
+{
+ if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
+ SEG6_LOCAL_CNT_PAD))
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
+ SEG6_LOCAL_CNT_PAD))
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
+ SEG6_LOCAL_CNT_PAD))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct seg6_local_counters counters = { 0, 0, 0 };
+ struct nlattr *nest;
+ int rc, i;
+
+ nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ for_each_possible_cpu(i) {
+ struct pcpu_seg6_local_counters *pcounters;
+ u64 packets, bytes, errors;
+ unsigned int start;
+
+ pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&pcounters->syncp);
+
+ packets = u64_stats_read(&pcounters->packets);
+ bytes = u64_stats_read(&pcounters->bytes);
+ errors = u64_stats_read(&pcounters->errors);
+
+ } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start));
+
+ counters.packets += packets;
+ counters.bytes += bytes;
+ counters.errors += errors;
+ }
+
+ rc = seg6_local_fill_nla_counters(skb, &counters);
+ if (rc < 0) {
+ nla_nest_cancel(skb, nest);
+ return rc;
+ }
+
+ return nla_nest_end(skb, nest);
+}
+
+static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ /* a and b are equal if both have pcpu_counters set or not */
+ return (!!((unsigned long)a->pcpu_counters)) ^
+ (!!((unsigned long)b->pcpu_counters));
+}
+
+static void destroy_attr_counters(struct seg6_local_lwt *slwt)
+{
+ free_percpu(slwt->pcpu_counters);
+}
+
struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
@@ -1343,6 +1524,10 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
.put = put_nla_vrftable,
.cmp = cmp_nla_vrftable },
+ [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters,
+ .put = put_nla_counters,
+ .cmp = cmp_nla_counters,
+ .destroy = destroy_attr_counters },
};
/* call the destroy() callback (if available) for each set attribute in
@@ -1645,6 +1830,15 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
nlsize += nla_total_size(4);
+ if (attrs & SEG6_F_LOCAL_COUNTERS)
+ nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
+ /* SEG6_LOCAL_CNT_PACKETS */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* SEG6_LOCAL_CNT_BYTES */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* SEG6_LOCAL_CNT_ERRORS */
+ nla_total_size_64bit(sizeof(__u64));
+
return nlsize;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 82e91b0..a5ede35 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -546,8 +546,7 @@ static void mptcp_sock_destruct(struct sock *sk)
* ESTABLISHED state and will not have the SOCK_DEAD flag.
* Both result in warnings from inet_sock_destruct.
*/
-
- if (sk->sk_state == TCP_ESTABLISHED) {
+ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
sk->sk_state = TCP_CLOSE;
WARN_ON_ONCE(sk->sk_socket);
sock_orphan(sk);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index b22801f..a414274 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -413,7 +413,10 @@ static int help(struct sk_buff *skb,
spin_lock_bh(&nf_ftp_lock);
fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
- BUG_ON(fb_ptr == NULL);
+ if (!fb_ptr) {
+ spin_unlock_bh(&nf_ftp_lock);
+ return NF_ACCEPT;
+ }
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
seq = ntohl(th->seq) + datalen;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 8ba037b..aafaff0 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -146,7 +146,8 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
/* Get first TPKT pointer */
tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
h323_buffer);
- BUG_ON(tpkt == NULL);
+ if (!tpkt)
+ goto clear_out;
/* Validate TPKT identifier */
if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index e40988a..08ee4e7 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -143,7 +143,10 @@ static int help(struct sk_buff *skb, unsigned int protoff,
spin_lock_bh(&irc_buffer_lock);
ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
irc_buffer);
- BUG_ON(ib_ptr == NULL);
+ if (!ib_ptr) {
+ spin_unlock_bh(&irc_buffer_lock);
+ return NF_ACCEPT;
+ }
data = ib_ptr;
data_limit = ib_ptr + skb->len - dataoff;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5105d42..7d5708b 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -544,7 +544,9 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
nexthdr_off = protoff;
tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
- BUG_ON(!tcph);
+ if (!tcph)
+ return NF_ACCEPT;
+
nexthdr_off += tcph->doff * 4;
datalen = tcplen - tcph->doff * 4;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 318b8f7..34e2241 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -338,7 +338,8 @@ static void tcp_options(const struct sk_buff *skb,
ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
length, buff);
- BUG_ON(ptr == NULL);
+ if (!ptr)
+ return;
state->td_scale =
state->flags = 0;
@@ -394,7 +395,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
length, buff);
- BUG_ON(ptr == NULL);
+ if (!ptr)
+ return;
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 1aebd65..fcb33b1 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -95,7 +95,10 @@ static int help(struct sk_buff *skb,
spin_lock_bh(&nf_sane_lock);
sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
- BUG_ON(sb_ptr == NULL);
+ if (!sb_ptr) {
+ spin_unlock_bh(&nf_sane_lock);
+ return NF_ACCEPT;
+ }
if (dir == IP_CT_DIR_ORIGINAL) {
if (datalen != sizeof(struct sane_request))
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0b7fe0a9..d63d2d8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4184,6 +4184,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
unsigned char *udata;
struct nft_set *set;
struct nft_ctx ctx;
+ size_t alloc_size;
u64 timeout;
char *name;
int err, i;
@@ -4329,8 +4330,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
size = 0;
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
-
- set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
+ alloc_size = sizeof(*set) + size + udlen;
+ if (alloc_size < size)
+ return -ENOMEM;
+ set = kvzalloc(alloc_size, GFP_KERNEL);
if (!set)
return -ENOMEM;
@@ -6615,9 +6618,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&obj->list);
return err;
err_trans:
- kfree(obj->key.name);
-err_userdata:
kfree(obj->udata);
+err_userdata:
+ kfree(obj->key.name);
err_strdup:
if (obj->ops->destroy)
obj->ops->destroy(&ctx, obj);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index d7a9628..e8dbd83 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -295,6 +295,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
nfnl_unlock(subsys_id);
break;
default:
+ rcu_read_unlock();
err = -EINVAL;
break;
}
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index e8f8875..0fa2e20 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -186,6 +186,8 @@ static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
sizeof(struct tcphdr), ctx->optsize, opts);
+ if (!ctx->optp)
+ return NULL;
}
return tcp;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 58f576a..7b3d0a7 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -412,9 +412,17 @@ static void nft_rhash_destroy(const struct nft_set *set)
(void *)set);
}
+/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
+#define NFT_MAX_BUCKETS (1U << 31)
+
static u32 nft_hash_buckets(u32 size)
{
- return roundup_pow_of_two(size * 4 / 3);
+ u64 val = div_u64((u64)size * 4, 3);
+
+ if (val >= NFT_MAX_BUCKETS)
+ return NFT_MAX_BUCKETS;
+
+ return roundup_pow_of_two(val);
}
static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -615,7 +623,7 @@ static u64 nft_hash_privsize(const struct nlattr * const nla[],
const struct nft_set_desc *desc)
{
return sizeof(struct nft_hash) +
- nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
+ (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
}
static int nft_hash_init(const struct nft_set *set,
@@ -655,8 +663,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
return false;
est->size = sizeof(struct nft_hash) +
- nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
- desc->size * sizeof(struct nft_hash_elem);
+ (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+ (u64)desc->size * sizeof(struct nft_hash_elem);
est->lookup = NFT_SET_CLASS_O_1;
est->space = NFT_SET_CLASS_O_N;
@@ -673,8 +681,8 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features
return false;
est->size = sizeof(struct nft_hash) +
- nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
- desc->size * sizeof(struct nft_hash_elem);
+ (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+ (u64)desc->size * sizeof(struct nft_hash_elem);
est->lookup = NFT_SET_CLASS_O_1;
est->space = NFT_SET_CLASS_O_N;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 75625d1..498a0bf 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -24,10 +24,9 @@ MODULE_ALIAS("ip6t_SECMARK");
static u8 mode;
static unsigned int
-secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+secmark_tg(struct sk_buff *skb, const struct xt_secmark_target_info_v1 *info)
{
u32 secmark = 0;
- const struct xt_secmark_target_info *info = par->targinfo;
switch (mode) {
case SECMARK_MODE_SEL:
@@ -41,7 +40,7 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static int checkentry_lsm(struct xt_secmark_target_info *info)
+static int checkentry_lsm(struct xt_secmark_target_info_v1 *info)
{
int err;
@@ -73,15 +72,15 @@ static int checkentry_lsm(struct xt_secmark_target_info *info)
return 0;
}
-static int secmark_tg_check(const struct xt_tgchk_param *par)
+static int
+secmark_tg_check(const char *table, struct xt_secmark_target_info_v1 *info)
{
- struct xt_secmark_target_info *info = par->targinfo;
int err;
- if (strcmp(par->table, "mangle") != 0 &&
- strcmp(par->table, "security") != 0) {
+ if (strcmp(table, "mangle") != 0 &&
+ strcmp(table, "security") != 0) {
pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
- par->table);
+ table);
return -EINVAL;
}
@@ -116,25 +115,76 @@ static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
}
}
-static struct xt_target secmark_tg_reg __read_mostly = {
- .name = "SECMARK",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = secmark_tg_check,
- .destroy = secmark_tg_destroy,
- .target = secmark_tg,
- .targetsize = sizeof(struct xt_secmark_target_info),
- .me = THIS_MODULE,
+static int secmark_tg_check_v0(const struct xt_tgchk_param *par)
+{
+ struct xt_secmark_target_info *info = par->targinfo;
+ struct xt_secmark_target_info_v1 newinfo = {
+ .mode = info->mode,
+ };
+ int ret;
+
+ memcpy(newinfo.secctx, info->secctx, SECMARK_SECCTX_MAX);
+
+ ret = secmark_tg_check(par->table, &newinfo);
+ info->secid = newinfo.secid;
+
+ return ret;
+}
+
+static unsigned int
+secmark_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_secmark_target_info *info = par->targinfo;
+ struct xt_secmark_target_info_v1 newinfo = {
+ .secid = info->secid,
+ };
+
+ return secmark_tg(skb, &newinfo);
+}
+
+static int secmark_tg_check_v1(const struct xt_tgchk_param *par)
+{
+ return secmark_tg_check(par->table, par->targinfo);
+}
+
+static unsigned int
+secmark_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ return secmark_tg(skb, par->targinfo);
+}
+
+static struct xt_target secmark_tg_reg[] __read_mostly = {
+ {
+ .name = "SECMARK",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = secmark_tg_check_v0,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v0,
+ .targetsize = sizeof(struct xt_secmark_target_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "SECMARK",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = secmark_tg_check_v1,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v1,
+ .targetsize = sizeof(struct xt_secmark_target_info_v1),
+ .usersize = offsetof(struct xt_secmark_target_info_v1, secid),
+ .me = THIS_MODULE,
+ },
};
static int __init secmark_tg_init(void)
{
- return xt_register_target(&secmark_tg_reg);
+ return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
}
static void __exit secmark_tg_exit(void)
{
- xt_unregister_target(&secmark_tg_reg);
+ xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
}
module_init(secmark_tg_init);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index a3b46f8..53dbe73 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -109,12 +109,14 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
GFP_KERNEL);
if (!llcp_sock->service_name) {
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
ret = -ENOMEM;
goto put_dev;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
kfree(llcp_sock->service_name);
llcp_sock->service_name = NULL;
ret = -EADDRINUSE;
@@ -709,6 +711,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
ret = -ENOMEM;
goto put_dev;
}
@@ -756,6 +759,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
put_dev:
nfc_put_device(dev);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 92a0b67..77d924a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -827,17 +827,17 @@ static void ovs_fragment(struct net *net, struct vport *vport,
}
if (key->eth.type == htons(ETH_P_IP)) {
- struct dst_entry ovs_dst;
+ struct rtable ovs_rt = { 0 };
unsigned long orig_dst;
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
- dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
- ovs_dst.dev = vport->dev;
+ ovs_rt.dst.dev = vport->dev;
orig_dst = skb->_skb_refdst;
- skb_dst_set_noref(skb, &ovs_dst);
+ skb_dst_set_noref(skb, &ovs_rt.dst);
IPCB(skb)->frag_max_size = mru;
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index e1e77d3..8c06381 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -90,16 +90,16 @@ static int sch_fragment(struct net *net, struct sk_buff *skb,
}
if (skb_protocol(skb, true) == htons(ETH_P_IP)) {
- struct dst_entry sch_frag_dst;
+ struct rtable sch_frag_rt = { 0 };
unsigned long orig_dst;
sch_frag_prepare_frag(skb, xmit);
- dst_init(&sch_frag_dst, &sch_frag_dst_ops, NULL, 1,
+ dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
- sch_frag_dst.dev = skb->dev;
+ sch_frag_rt.dst.dev = skb->dev;
orig_dst = skb->_skb_refdst;
- skb_dst_set_noref(skb, &sch_frag_dst);
+ skb_dst_set_noref(skb, &sch_frag_rt.dst);
IPCB(skb)->frag_max_size = mru;
ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5f9a7c0..5b44d22 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -858,11 +858,7 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
struct sctp_chunk *retval;
__u32 ctsn;
- if (chunk && chunk->asoc)
- ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map);
- else
- ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
-
+ ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
shut.cum_tsn_ack = htonl(ctsn);
retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 0948f14..ce15d59 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -826,28 +826,6 @@ static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds,
asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
}
-static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds,
- struct sctp_association *asoc,
- struct sctp_association *new)
-{
- struct net *net = asoc->base.net;
- struct sctp_chunk *abort;
-
- if (!sctp_assoc_update(asoc, new))
- return;
-
- abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
- if (abort) {
- sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
- sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
- }
- sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED));
- sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED,
- SCTP_PERR(SCTP_ERROR_RSRC_LOW));
- SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
-}
-
/* Helper function to change the state of an association. */
static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
@@ -1301,10 +1279,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
sctp_endpoint_add_asoc(ep, asoc);
break;
- case SCTP_CMD_UPDATE_ASSOC:
- sctp_cmd_assoc_update(commands, asoc, cmd->obj.asoc);
- break;
-
case SCTP_CMD_PURGE_OUTQUEUE:
sctp_outq_teardown(&asoc->outqueue);
break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7632714..fd1e319 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1773,6 +1773,30 @@ enum sctp_disposition sctp_sf_do_5_2_3_initack(
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
}
+static int sctp_sf_do_assoc_update(struct sctp_association *asoc,
+ struct sctp_association *new,
+ struct sctp_cmd_seq *cmds)
+{
+ struct net *net = asoc->base.net;
+ struct sctp_chunk *abort;
+
+ if (!sctp_assoc_update(asoc, new))
+ return 0;
+
+ abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
+ if (abort) {
+ sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
+ sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ }
+ sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED));
+ sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED,
+ SCTP_PERR(SCTP_ERROR_RSRC_LOW));
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
+
+ return -ENOMEM;
+}
+
/* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A')
*
* Section 5.2.4
@@ -1852,20 +1876,22 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL());
- repl = sctp_make_cookie_ack(new_asoc, chunk);
+ /* Update the content of current association. */
+ if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands))
+ goto nomem;
+
+ repl = sctp_make_cookie_ack(asoc, chunk);
if (!repl)
goto nomem;
/* Report association restart to upper layer. */
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
- new_asoc->c.sinit_num_ostreams,
- new_asoc->c.sinit_max_instreams,
+ asoc->c.sinit_num_ostreams,
+ asoc->c.sinit_max_instreams,
NULL, GFP_ATOMIC);
if (!ev)
goto nomem_ev;
- /* Update the content of current association. */
- sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
if ((sctp_state(asoc, SHUTDOWN_PENDING) ||
sctp_state(asoc, SHUTDOWN_SENT)) &&
@@ -1925,14 +1951,17 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
return SCTP_DISPOSITION_DISCARD;
- /* Update the content of current association. */
- sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ if (asoc->state < SCTP_STATE_ESTABLISHED)
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
- repl = sctp_make_cookie_ack(new_asoc, chunk);
+ /* Update the content of current association. */
+ if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands))
+ goto nomem;
+
+ repl = sctp_make_cookie_ack(asoc, chunk);
if (!repl)
goto nomem;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b7b9013..40f9f6c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -357,6 +357,18 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
return af;
}
+static void sctp_auto_asconf_init(struct sctp_sock *sp)
+{
+ struct net *net = sock_net(&sp->inet.sk);
+
+ if (net->sctp.default_auto_asconf) {
+ spin_lock(&net->sctp.addr_wq_lock);
+ list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist);
+ spin_unlock(&net->sctp.addr_wq_lock);
+ sp->do_auto_asconf = 1;
+ }
+}
+
/* Bind a local address either to an endpoint or to an association. */
static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
{
@@ -418,8 +430,10 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
return -EADDRINUSE;
/* Refresh ephemeral port. */
- if (!bp->port)
+ if (!bp->port) {
bp->port = inet_sk(sk)->inet_num;
+ sctp_auto_asconf_init(sp);
+ }
/* Add the address to the bind address list.
* Use GFP_ATOMIC since BHs will be disabled.
@@ -1520,9 +1534,11 @@ static void sctp_close(struct sock *sk, long timeout)
/* Supposedly, no process has access to the socket, but
* the net layers still may.
+ * Also, sctp_destroy_sock() needs to be called with addr_wq_lock
+ * held and that should be grabbed before socket lock.
*/
- local_bh_disable();
- bh_lock_sock(sk);
+ spin_lock_bh(&net->sctp.addr_wq_lock);
+ bh_lock_sock_nested(sk);
/* Hold the sock, since sk_common_release() will put sock_put()
* and we have just a little more cleanup.
@@ -1531,7 +1547,7 @@ static void sctp_close(struct sock *sk, long timeout)
sk_common_release(sk);
bh_unlock_sock(sk);
- local_bh_enable();
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
sock_put(sk);
@@ -4991,16 +5007,6 @@ static int sctp_init_sock(struct sock *sk)
sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
- if (net->sctp.default_auto_asconf) {
- spin_lock(&sock_net(sk)->sctp.addr_wq_lock);
- list_add_tail(&sp->auto_asconf_list,
- &net->sctp.auto_asconf_splist);
- sp->do_auto_asconf = 1;
- spin_unlock(&sock_net(sk)->sctp.addr_wq_lock);
- } else {
- sp->do_auto_asconf = 0;
- }
-
local_bh_enable();
return 0;
@@ -5025,9 +5031,7 @@ static void sctp_destroy_sock(struct sock *sk)
if (sp->do_auto_asconf) {
sp->do_auto_asconf = 0;
- spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock);
list_del(&sp->auto_asconf_list);
- spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock);
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
@@ -9398,6 +9402,8 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
return err;
}
+ sctp_auto_asconf_init(newsp);
+
/* Move any messages in the old socket's receive queue that are for the
* peeled off association to the new socket's receive queue.
*/
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index be3e80b..5eff7cc 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2161,6 +2161,9 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
struct smc_sock *smc;
int val, rc;
+ if (level == SOL_TCP && optname == TCP_ULP)
+ return -EOPNOTSUPP;
+
smc = smc_sk(sk);
/* generic setsockopts reaching us here always apply to the
@@ -2185,7 +2188,6 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (rc || smc->use_fallback)
goto out;
switch (optname) {
- case TCP_ULP:
case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
case TCP_FASTOPEN_KEY:
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 612f0a6..f555d33 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1799,7 +1799,6 @@ call_allocate(struct rpc_task *task)
status = xprt->ops->buf_alloc(task);
trace_rpc_buf_alloc(task, status);
- xprt_inject_disconnect(xprt);
if (status == 0)
return;
if (status != -ENOMEM) {
@@ -2458,12 +2457,6 @@ call_decode(struct rpc_task *task)
}
/*
- * Ensure that we see all writes made by xprt_complete_rqst()
- * before it changed req->rq_reply_bytes_recvd.
- */
- smp_rmb();
-
- /*
* Did we ever call xprt_complete_rqst()? If not, we should assume
* the message is incomplete.
*/
@@ -2471,6 +2464,11 @@ call_decode(struct rpc_task *task)
if (!req->rq_reply_bytes_recvd)
goto out;
+ /* Ensure that we see all writes made by xprt_complete_rqst()
+ * before it changed req->rq_reply_bytes_recvd.
+ */
+ smp_rmb();
+
req->rq_rcv_buf.len = req->rq_private_buf.len;
trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 38fe2ce..647b323 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -344,13 +344,15 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
const char *hostname,
struct sockaddr *srvaddr, size_t salen,
int proto, u32 version,
- const struct cred *cred)
+ const struct cred *cred,
+ const struct rpc_timeout *timeo)
{
struct rpc_create_args args = {
.net = net,
.protocol = proto,
.address = srvaddr,
.addrsize = salen,
+ .timeout = timeo,
.servername = hostname,
.nodename = nodename,
.program = &rpcb_program,
@@ -705,7 +707,8 @@ void rpcb_getport_async(struct rpc_task *task)
clnt->cl_nodename,
xprt->servername, sap, salen,
xprt->prot, bind_version,
- clnt->cl_cred);
+ clnt->cl_cred,
+ task->tk_client->cl_timeout);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
goto bailout_nofree;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 691ccf8..e5b5a96 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -698,9 +698,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
int status = 0;
- if (time_before(jiffies, req->rq_minortimeo))
- return status;
if (time_before(jiffies, req->rq_majortimeo)) {
+ if (time_before(jiffies, req->rq_minortimeo))
+ return status;
if (to->to_exponential)
req->rq_timeout <<= 1;
else
@@ -1352,6 +1352,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
out:
+ atomic_long_inc(&xprt->xmit_queuelen);
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
}
@@ -1381,6 +1382,7 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
}
} else
list_del(&req->rq_xmit2);
+ atomic_long_dec(&req->rq_xprt->xmit_queuelen);
}
/**
@@ -1469,8 +1471,6 @@ bool xprt_prepare_transmit(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt;
if (!xprt_lock_write(xprt, task)) {
- trace_xprt_transmit_queued(xprt, task);
-
/* Race breaker: someone may have transmitted us */
if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
rpc_wake_up_queued_task_set_status(&xprt->sending,
@@ -1483,7 +1483,10 @@ bool xprt_prepare_transmit(struct rpc_task *task)
void xprt_end_transmit(struct rpc_task *task)
{
- xprt_release_write(task->tk_rqstp->rq_xprt, task);
+ struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+
+ xprt_inject_disconnect(xprt);
+ xprt_release_write(xprt, task);
}
/**
@@ -1537,8 +1540,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
return status;
}
- if (is_retrans)
+ if (is_retrans) {
task->tk_client->cl_stats->rpcretrans++;
+ trace_xprt_retransmit(req);
+ }
xprt_inject_disconnect(xprt);
@@ -1885,7 +1890,6 @@ void xprt_release(struct rpc_task *task)
spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
- xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf);
xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index a249837..1151efd 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -155,9 +155,11 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_rep *rep = req->rl_reply;
struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
req->rl_reply = NULL;
spin_lock(&xprt->bc_pa_lock);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 766a104..229fcc9 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -49,20 +49,13 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-/**
- * frwr_release_mr - Destroy one MR
- * @mr: MR allocated by frwr_mr_init
- *
- */
-void frwr_release_mr(struct rpcrdma_mr *mr)
+static void frwr_cid_init(struct rpcrdma_ep *ep,
+ struct rpcrdma_mr *mr)
{
- int rc;
+ struct rpc_rdma_cid *cid = &mr->mr_cid;
- rc = ib_dereg_mr(mr->frwr.fr_mr);
- if (rc)
- trace_xprtrdma_frwr_dereg(mr, rc);
- kfree(mr->mr_sg);
- kfree(mr);
+ cid->ci_queue_id = ep->re_attr.send_cq->res.id;
+ cid->ci_completion_id = mr->mr_ibmr->res.id;
}
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
@@ -75,20 +68,22 @@ static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
}
}
-static void frwr_mr_recycle(struct rpcrdma_mr *mr)
+/**
+ * frwr_mr_release - Destroy one MR
+ * @mr: MR allocated by frwr_mr_init
+ *
+ */
+void frwr_mr_release(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+ int rc;
- trace_xprtrdma_mr_recycle(mr);
+ frwr_mr_unmap(mr->mr_xprt, mr);
- frwr_mr_unmap(r_xprt, mr);
-
- spin_lock(&r_xprt->rx_buf.rb_lock);
- list_del(&mr->mr_all);
- r_xprt->rx_stats.mrs_recycled++;
- spin_unlock(&r_xprt->rx_buf.rb_lock);
-
- frwr_release_mr(mr);
+ rc = ib_dereg_mr(mr->mr_ibmr);
+ if (rc)
+ trace_xprtrdma_frwr_dereg(mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static void frwr_mr_put(struct rpcrdma_mr *mr)
@@ -144,10 +139,11 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
goto out_list_err;
mr->mr_xprt = r_xprt;
- mr->frwr.fr_mr = frmr;
+ mr->mr_ibmr = frmr;
mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list);
- init_completion(&mr->frwr.fr_linv_done);
+ init_completion(&mr->mr_linv_done);
+ frwr_cid_init(ep, mr);
sg_init_table(sg, depth);
mr->mr_sg = sg;
@@ -257,6 +253,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ep->re_max_rdma_segs =
@@ -326,7 +323,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
goto out_dmamap_err;
mr->mr_device = ep->re_id->device;
- ibmr = mr->frwr.fr_mr;
+ ibmr = mr->mr_ibmr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
if (n != dma_nents)
goto out_mapmr_err;
@@ -336,7 +333,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
- reg_wr = &mr->frwr.fr_regwr;
+ reg_wr = &mr->mr_regwr;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
@@ -364,29 +361,19 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
* @cq: completion queue
* @wc: WCE for a completed FastReg WR
*
+ * Each flushed MR gets destroyed after the QP has drained.
*/
static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
- /* The MR will get recycled when the associated req is retransmitted */
+ trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
-static void frwr_cid_init(struct rpcrdma_ep *ep,
- struct rpcrdma_frwr *frwr)
-{
- struct rpc_rdma_cid *cid = &frwr->fr_cid;
-
- cid->ci_queue_id = ep->re_attr.send_cq->res.id;
- cid->ci_completion_id = frwr->fr_mr->res.id;
-}
-
/**
* frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance
@@ -403,27 +390,36 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
*/
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
+ struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
+ unsigned int num_wrs;
- post_wr = &req->rl_wr;
+ num_wrs = 1;
+ post_wr = send_wr;
list_for_each_entry(mr, &req->rl_registered, mr_list) {
- struct rpcrdma_frwr *frwr;
+ trace_xprtrdma_mr_fastreg(mr);
- frwr = &mr->frwr;
-
- frwr->fr_cqe.done = frwr_wc_fastreg;
- frwr_cid_init(ep, frwr);
- frwr->fr_regwr.wr.next = post_wr;
- frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
- frwr->fr_regwr.wr.num_sge = 0;
- frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
- frwr->fr_regwr.wr.send_flags = 0;
-
- post_wr = &frwr->fr_regwr.wr;
+ mr->mr_cqe.done = frwr_wc_fastreg;
+ mr->mr_regwr.wr.next = post_wr;
+ mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
+ mr->mr_regwr.wr.num_sge = 0;
+ mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
+ mr->mr_regwr.wr.send_flags = 0;
+ post_wr = &mr->mr_regwr.wr;
+ ++num_wrs;
}
+ if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
+ send_wr->send_flags |= IB_SEND_SIGNALED;
+ ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
+ num_wrs - ep->re_send_count);
+ } else {
+ send_wr->send_flags &= ~IB_SEND_SIGNALED;
+ ep->re_send_count -= num_wrs;
+ }
+
+ trace_xprtrdma_post_send(req);
return ib_post_send(ep->re_id->qp, post_wr, NULL);
}
@@ -440,6 +436,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
+ trace_xprtrdma_mr_reminv(mr);
frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
@@ -447,9 +444,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
- if (wc->status != IB_WC_SUCCESS)
- frwr_mr_recycle(mr);
- else
+ if (likely(wc->status == IB_WC_SUCCESS))
frwr_mr_put(mr);
}
@@ -462,12 +457,10 @@ static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc);
@@ -483,14 +476,12 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
- complete(&frwr->fr_linv_done);
+ complete(&mr->mr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
@@ -511,7 +502,6 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
struct ib_send_wr *first, **prev, *last;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
@@ -520,35 +510,34 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
+ mr = container_of(last, struct rpcrdma_mr, mr_invwr);
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&frwr->fr_linv_done);
+ last->wr_cqe->done = frwr_wc_localinv_wake;
+ reinit_completion(&mr->mr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
@@ -562,22 +551,12 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* not happen, so don't wait in that case.
*/
if (bad_wr != first)
- wait_for_completion(&frwr->fr_linv_done);
+ wait_for_completion(&mr->mr_linv_done);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr,
- fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- list_del_init(&mr->mr_list);
- frwr_mr_recycle(mr);
- }
}
/**
@@ -589,20 +568,24 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
- struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
+ struct rpcrdma_rep *rep;
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
- frwr_mr_done(wc, mr);
+ trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
- /* Ensure @rep is generated before frwr_mr_done */
+ /* Ensure that @rep is generated before the MR is released */
+ rep = mr->mr_req->rl_reply;
smp_rmb();
- rpcrdma_complete_rqst(rep);
- rpcrdma_flush_disconnect(cq->cq_context, wc);
+ if (wc->status != IB_WC_SUCCESS) {
+ if (rep)
+ rpcrdma_unpin_rqst(rep);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
+ return;
+ }
+ frwr_mr_put(mr);
+ rpcrdma_complete_rqst(rep);
}
/**
@@ -619,33 +602,29 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *first, *last, **prev;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
/* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
@@ -655,31 +634,23 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* are complete. The last completion will wake up the
* RPC waiter.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_done;
+ last->wr_cqe->done = frwr_wc_localinv_done;
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless re_id->qp is a valid pointer.
*/
- bad_wr = NULL;
- rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
+ rc = ib_post_send(ep->re_id->qp, first, NULL);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- frwr_mr_recycle(mr);
- }
/* The final LOCAL_INV WR in the chain is supposed to
- * do the wake. If it was never posted, the wake will
- * not happen, so wake here in that case.
+ * do the wake. If it was never posted, the wake does
+ * not happen. Unpin the rqst in preparation for its
+ * retransmission.
*/
- rpcrdma_complete_rqst(req->rl_reply);
+ rpcrdma_unpin_rqst(req->rl_reply);
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 292f066..649f7d8b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1326,9 +1326,35 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return -EIO;
}
-/* Perform XID lookup, reconstruction of the RPC reply, and
- * RPC completion while holding the transport lock to ensure
- * the rep, rqst, and rq_task pointers remain stable.
+/**
+ * rpcrdma_unpin_rqst - Release rqst without completing it
+ * @rep: RPC/RDMA Receive context
+ *
+ * This is done when a connection is lost so that a Reply
+ * can be dropped and its matching Call can be subsequently
+ * retransmitted on a new connection.
+ */
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
+{
+ struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
+ struct rpc_rqst *rqst = rep->rr_rqst;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+
+ req->rl_reply = NULL;
+ rep->rr_rqst = NULL;
+
+ spin_lock(&xprt->queue_lock);
+ xprt_unpin_rqst(rqst);
+ spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * rpcrdma_complete_rqst - Pass completed rqst back to RPC
+ * @rep: RPC/RDMA Receive context
+ *
+ * Reconstruct the RPC reply and complete the transaction
+ * while @rqst is still pinned to ensure the rep, rqst, and
+ * rq_task pointers remain stable.
*/
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
{
@@ -1430,13 +1456,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
+ rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
+ false);
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
- rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
if (unlikely(req->rl_reply))
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(buf, req->rl_reply);
req->rl_reply = rep;
rep->rr_rqst = rqst;
@@ -1464,5 +1491,5 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
trace_xprtrdma_reply_short_err(rep);
out:
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 78d29d1..0995359 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -262,8 +262,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
* xprt_rdma_inject_disconnect - inject a connection fault
* @xprt: transport context
*
- * If @xprt is connected, disconnect it to simulate spurious connection
- * loss.
+ * If @xprt is connected, disconnect it to simulate spurious
+ * connection loss. Caller must hold @xprt's send lock to
+ * ensure that data structures and hardware resources are
+ * stable during the rdma_disconnect() call.
*/
static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index ec912cf9..1e965a3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -101,6 +101,12 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rdma_cm_id *id = ep->re_id;
+ /* Wait for rpcrdma_post_recvs() to leave its critical
+ * section.
+ */
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ wait_for_completion(&ep->re_done);
+
/* Flush Receives, then wait for deferred Reply work
* to complete.
*/
@@ -114,22 +120,6 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
rpcrdma_ep_put(ep);
}
-/**
- * rpcrdma_qp_event_handler - Handle one QP event (error notification)
- * @event: details of the event
- * @context: ep that owns QP where event occurred
- *
- * Called from the RDMA provider (device driver) possibly in an interrupt
- * context. The QP is always destroyed before the ID, so the ID will be
- * reliably available when this handler is invoked.
- */
-static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
-{
- struct rpcrdma_ep *ep = context;
-
- trace_xprtrdma_qp_event(ep, event);
-}
-
/* Ensure xprt_force_disconnect() is invoked exactly once when a
* connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once).
@@ -205,7 +195,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
out_flushed:
rpcrdma_flush_disconnect(r_xprt, wc);
- rpcrdma_rep_destroy(rep);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
}
static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
@@ -414,6 +404,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
__module_get(THIS_MODULE);
device = id->device;
ep->re_id = id;
+ reinit_completion(&ep->re_done);
ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
ep->re_inline_send = xprt_rdma_max_inline_write;
@@ -424,8 +415,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests);
- ep->re_attr.event_handler = rpcrdma_qp_event_handler;
- ep->re_attr.qp_context = ep;
ep->re_attr.srq = NULL;
ep->re_attr.cap.max_inline_data = 0;
ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -535,7 +524,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
- rpcrdma_post_recvs(r_xprt, true);
+ rpcrdma_post_recvs(r_xprt, 1, true);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
@@ -954,13 +943,11 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
rpcrdma_req_reset(req);
}
-/* No locking needed here. This function is called only by the
- * Receive completion handler.
- */
static noinline
struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp)
{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@@ -987,7 +974,10 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
- list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps);
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_all, &buf->rb_all_reps);
+ spin_unlock(&buf->rb_lock);
return rep;
out_free_regbuf:
@@ -998,16 +988,23 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
return NULL;
}
-/* No locking needed here. This function is invoked only by the
- * Receive completion handler, or during transport shutdown.
- */
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
{
- list_del(&rep->rr_all);
rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep);
}
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+}
+
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
@@ -1019,12 +1016,21 @@ static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
return llist_entry(node, struct rpcrdma_rep, rr_node);
}
-static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
- struct rpcrdma_rep *rep)
+/**
+ * rpcrdma_rep_put - Release rpcrdma_rep back to free list
+ * @buf: buffer pool
+ * @rep: rep to release
+ *
+ */
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
{
llist_add(&rep->rr_node, &buf->rb_free_reps);
}
+/* Caller must ensure the QP is quiescent (RQ is drained) before
+ * invoking this function, to guarantee rb_all_reps is not
+ * changing.
+ */
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1032,7 +1038,7 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
- rep->rr_temp = true;
+ rep->rr_temp = true; /* Mark this rep for destruction */
}
}
@@ -1040,8 +1046,18 @@ static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
{
struct rpcrdma_rep *rep;
- while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
- rpcrdma_rep_destroy(rep);
+ spin_lock(&buf->rb_lock);
+ while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
+ struct rpcrdma_rep,
+ rr_all)) != NULL) {
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+
+ spin_lock(&buf->rb_lock);
+ }
+ spin_unlock(&buf->rb_lock);
}
/**
@@ -1104,7 +1120,7 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req)
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
}
rpcrdma_regbuf_free(req->rl_recvbuf);
@@ -1135,7 +1151,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
spin_lock(&buf->rb_lock);
}
@@ -1221,17 +1237,6 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
spin_unlock(&buffers->rb_lock);
}
-/**
- * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
- * @rep: rep to release
- *
- * Used after error conditions.
- */
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
-{
- rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
-}
-
/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
@@ -1342,21 +1347,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
*/
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
- struct ib_send_wr *send_wr = &req->rl_wr;
- struct rpcrdma_ep *ep = r_xprt->rx_ep;
- int rc;
-
- if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) {
- send_wr->send_flags |= IB_SEND_SIGNALED;
- ep->re_send_count = ep->re_send_batch;
- } else {
- send_wr->send_flags &= ~IB_SEND_SIGNALED;
- --ep->re_send_count;
- }
-
- trace_xprtrdma_post_send(req);
- rc = frwr_send(r_xprt, req);
- if (rc)
+ if (frwr_send(r_xprt, req))
return -ENOTCONN;
return 0;
}
@@ -1364,27 +1355,30 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/**
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
- * @temp: mark Receive buffers to be deleted after use
+ * @needed: current credit grant
+ * @temp: mark Receive buffers to be deleted after one use
*
*/
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_recv_wr *wr, *bad_wr;
struct rpcrdma_rep *rep;
- int needed, count, rc;
+ int count, rc;
rc = 0;
count = 0;
- needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ goto out;
+
/* fast path: all needed reps can be found on the free list */
wr = NULL;
while (needed) {
@@ -1410,6 +1404,9 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
+ if (atomic_dec_return(&ep->re_receiving) > 0)
+ complete(&ep->re_done);
+
out:
trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
@@ -1418,7 +1415,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
wr = wr->next;
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
--count;
}
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index fe3be985e..436ad73 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -83,6 +83,7 @@ struct rpcrdma_ep {
unsigned int re_max_inline_recv;
int re_async_rc;
int re_connect_status;
+ atomic_t re_receiving;
atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait;
@@ -228,31 +229,28 @@ struct rpcrdma_sendctx {
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*/
-struct rpcrdma_frwr {
- struct ib_mr *fr_mr;
- struct ib_cqe fr_cqe;
- struct rpc_rdma_cid fr_cid;
- struct completion fr_linv_done;
- union {
- struct ib_reg_wr fr_regwr;
- struct ib_send_wr fr_invwr;
- };
-};
-
struct rpcrdma_req;
struct rpcrdma_mr {
struct list_head mr_list;
struct rpcrdma_req *mr_req;
+
+ struct ib_mr *mr_ibmr;
struct ib_device *mr_device;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
- struct rpcrdma_frwr frwr;
+ struct ib_cqe mr_cqe;
+ struct completion mr_linv_done;
+ union {
+ struct ib_reg_wr mr_regwr;
+ struct ib_send_wr mr_invwr;
+ };
struct rpcrdma_xprt *mr_xprt;
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
struct list_head mr_all;
+ struct rpc_rdma_cid mr_cid;
};
/*
@@ -461,7 +459,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
@@ -480,7 +478,7 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
struct rpcrdma_req *req);
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags);
@@ -527,7 +525,7 @@ rpcrdma_data_dir(bool writing)
void frwr_reset(struct rpcrdma_req *req);
int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device);
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr);
-void frwr_release_mr(struct rpcrdma_mr *mr);
+void frwr_mr_release(struct rpcrdma_mr *mr);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, __be32 xid,
@@ -560,6 +558,7 @@ int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep);
void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e35760f..47aa47a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -558,6 +558,10 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
struct rpc_rqst *req;
ssize_t ret;
+ /* Is this transport associated with the backchannel? */
+ if (!xprt->bc_serv)
+ return -ESHUTDOWN;
+
/* Look up and lock the request corresponding to the given XID */
req = xprt_lookup_bc_request(xprt, transport->recv.xid);
if (!req) {
@@ -1018,6 +1022,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
req->rq_xtime = ktime_get();
+ tcp_sock_set_cork(transport->inet, true);
while (1) {
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
@@ -1032,6 +1037,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
+ if (atomic_long_read(&xprt->xmit_queuelen) == 1)
+ tcp_sock_set_cork(transport->inet, false);
return 0;
}
@@ -2163,6 +2170,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
}
xs_tcp_set_socket_timeouts(xprt, sock);
+ tcp_sock_set_nodelay(sk);
write_lock_bh(&sk->sk_callback_lock);
@@ -2177,7 +2185,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
- tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
xprt_clear_connected(xprt);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1c9ecb1..c99bc4c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -944,8 +944,6 @@ static int vmci_transport_recv_listen(struct sock *sk,
bool old_request = false;
bool old_pkt_proto = false;
- err = 0;
-
/* Because we are in the listen state, we could be receiving a packet
* for ourself or any previous connection requests that we received.
* If it's the latter, we try to find a socket in our list of pending
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 2ac3802..9d2a89d 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -128,13 +128,12 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 chunk, chunk_end;
+ u64 chunk;
- chunk = xp_aligned_extract_addr(pool, desc->addr);
- chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len);
- if (chunk != chunk_end)
+ if (desc->len > pool->chunk_size)
return false;
+ chunk = xp_aligned_extract_addr(pool, desc->addr);
if (chunk >= pool->addrs_cnt)
return false;
diff --git a/samples/auxdisplay/.gitignore b/samples/auxdisplay/.gitignore
index 2ed744c..d023816 100644
--- a/samples/auxdisplay/.gitignore
+++ b/samples/auxdisplay/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-cfag12864b-example
+/cfag12864b-example
diff --git a/samples/binderfs/.gitignore b/samples/binderfs/.gitignore
index eb60241..8fa415a 100644
--- a/samples/binderfs/.gitignore
+++ b/samples/binderfs/.gitignore
@@ -1 +1,2 @@
-binderfs_example
+# SPDX-License-Identifier: GPL-2.0
+/binderfs_example
diff --git a/samples/connector/.gitignore b/samples/connector/.gitignore
index d86f2ff..0e26039 100644
--- a/samples/connector/.gitignore
+++ b/samples/connector/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-ucon
+/ucon
diff --git a/samples/hidraw/.gitignore b/samples/hidraw/.gitignore
index d7a6074..5233ab6 100644
--- a/samples/hidraw/.gitignore
+++ b/samples/hidraw/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-hid-example
+/hid-example
diff --git a/samples/mei/.gitignore b/samples/mei/.gitignore
index db5e802..fe894bc 100644
--- a/samples/mei/.gitignore
+++ b/samples/mei/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-mei-amt-version
+/mei-amt-version
diff --git a/samples/nitro_enclaves/.gitignore b/samples/nitro_enclaves/.gitignore
index 8279341..6a718ee 100644
--- a/samples/nitro_enclaves/.gitignore
+++ b/samples/nitro_enclaves/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0
-ne_ioctl_sample
+/ne_ioctl_sample
diff --git a/samples/pidfd/.gitignore b/samples/pidfd/.gitignore
index eea857f..d4cfa31 100644
--- a/samples/pidfd/.gitignore
+++ b/samples/pidfd/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-pidfd-metadata
+/pidfd-metadata
diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
index 4a5a5b7..a6df0da 100644
--- a/samples/seccomp/.gitignore
+++ b/samples/seccomp/.gitignore
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-bpf-direct
-bpf-fancy
-dropper
-user-trap
+/bpf-direct
+/bpf-fancy
+/dropper
+/user-trap
diff --git a/samples/timers/.gitignore b/samples/timers/.gitignore
index 40510c3..cd9ff7b 100644
--- a/samples/timers/.gitignore
+++ b/samples/timers/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-hpet_example
+/hpet_example
diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore
index 8fdabf7..79212d9 100644
--- a/samples/vfs/.gitignore
+++ b/samples/vfs/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-test-fsmount
-test-statx
+/test-fsmount
+/test-statx
diff --git a/samples/watch_queue/.gitignore b/samples/watch_queue/.gitignore
index 2aa3c7e..823b351 100644
--- a/samples/watch_queue/.gitignore
+++ b/samples/watch_queue/.gitignore
@@ -1 +1,2 @@
-watch_test
+# SPDX-License-Identifier: GPL-2.0-only
+/watch_test
diff --git a/samples/watchdog/.gitignore b/samples/watchdog/.gitignore
index 74153b8..a70a015 100644
--- a/samples/watchdog/.gitignore
+++ b/samples/watchdog/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-watchdog-simple
+/watchdog-simple
diff --git a/scripts/.gitignore b/scripts/.gitignore
index a6c1131..e83c620 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -1,11 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
-bin2c
-kallsyms
-unifdef
-recordmcount
-sorttable
-asn1_compiler
-extract-cert
-sign-file
-insert-sys-cert
+/asn1_compiler
+/bin2c
+/extract-cert
+/insert-sys-cert
+/kallsyms
/module.lds
+/recordmcount
+/sign-file
+/sorttable
+/unifdef
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 5e39b05..949f723 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -354,7 +354,7 @@
targets += $(filter-out $(subdir-builtin), $(real-obj-y))
targets += $(filter-out $(subdir-modorder), $(real-obj-m))
-targets += $(lib-y) $(always-y) $(MAKECMDGOALS)
+targets += $(real-dtb-y) $(lib-y) $(always-y) $(MAKECMDGOALS)
# Linker scripts preprocessor (.lds.S -> .lds)
# ---------------------------------------------------------------------------
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 64daf37..1095055 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -44,19 +44,22 @@
obj-y := $(filter-out %/, $(obj-y))
endif
-# Expand $(foo-objs) $(foo-y) by calling $(call suffix-search,foo.o,-objs -y)
-suffix-search = $(strip $(foreach s, $2, $($(1:.o=$s))))
+# Expand $(foo-objs) $(foo-y) etc. by replacing their individuals
+suffix-search = $(strip $(foreach s, $3, $($(1:%$(strip $2)=%$s))))
+# List composite targets that are constructed by combining other targets
+multi-search = $(sort $(foreach m, $1, $(if $(call suffix-search, $m, $2, $3 -), $m)))
+# List primitive targets that are compiled from source files
+real-search = $(foreach m, $1, $(if $(call suffix-search, $m, $2, $3 -), $(call suffix-search, $m, $2, $3), $m))
+
# If $(foo-objs), $(foo-y), $(foo-m), or $(foo-) exists, foo.o is a composite object
-multi-search = $(sort $(foreach m, $1, $(if $(call suffix-search, $m, $2 -), $m)))
-multi-obj-y := $(call multi-search,$(obj-y),-objs -y)
-multi-obj-m := $(call multi-search,$(obj-m),-objs -y -m)
+multi-obj-y := $(call multi-search, $(obj-y), .o, -objs -y)
+multi-obj-m := $(call multi-search, $(obj-m), .o, -objs -y -m)
multi-obj-ym := $(multi-obj-y) $(multi-obj-m)
# Replace multi-part objects by their individual parts,
# including built-in.a from subdirectories
-real-search = $(foreach m, $1, $(if $(call suffix-search, $m, $2 -), $(call suffix-search, $m, $2), $m))
-real-obj-y := $(call real-search, $(obj-y),-objs -y)
-real-obj-m := $(call real-search, $(obj-m),-objs -y -m)
+real-obj-y := $(call real-search, $(obj-y), .o, -objs -y)
+real-obj-m := $(call real-search, $(obj-m), .o, -objs -y -m)
always-y += $(always-m)
@@ -75,24 +78,18 @@
# If CONFIG_OF_ALL_DTBS is enabled, all DT blobs are built
dtb-$(CONFIG_OF_ALL_DTBS) += $(dtb-)
-# List all dtbs to be generated by fdtoverlay
-overlay-y := $(foreach m,$(dtb-y), $(if $(strip $($(m:.dtb=-dtbs))),$(m),))
-
-# Generate symbols for the base files so overlays can be applied to them.
-$(foreach m,$(overlay-y), $(eval DTC_FLAGS_$(basename $(firstword $($(m:.dtb=-dtbs)))) += -@))
-
-# Add base dtb and overlay dtbo
-dtb-y += $(foreach m,$(overlay-y), $($(m:.dtb=-dtbs)))
+# Composite DTB (i.e. DTB constructed by overlay)
+multi-dtb-y := $(call multi-search, $(dtb-y), .dtb, -dtbs)
+# Primitive DTB compiled from *.dts
+real-dtb-y := $(call real-search, $(dtb-y), .dtb, -dtbs)
+# Base DTB that overlay is applied onto (each first word of $(*-dtbs) expansion)
+base-dtb-y := $(foreach m, $(multi-dtb-y), $(firstword $(call suffix-search, $m, .dtb, -dtbs)))
always-y += $(dtb-y)
ifneq ($(CHECK_DTBS),)
-# Don't run schema checks for dtbs created by fdtoverlay as they don't
-# have corresponding dts files.
-dt-yaml-y := $(filter-out $(overlay-y),$(dtb-y))
-
-always-y += $(patsubst %.dtb,%.dt.yaml, $(dt-yaml-y))
-always-y += $(patsubst %.dtbo,%.dt.yaml, $(dt-yaml-y))
+always-y += $(patsubst %.dtb,%.dt.yaml, $(real-dtb-y))
+always-y += $(patsubst %.dtbo,%.dt.yaml, $(real-dtb-y))
endif
# Add subdir path
@@ -105,12 +102,14 @@
real-obj-y := $(addprefix $(obj)/,$(real-obj-y))
real-obj-m := $(addprefix $(obj)/,$(real-obj-m))
multi-obj-m := $(addprefix $(obj)/, $(multi-obj-m))
+multi-dtb-y := $(addprefix $(obj)/, $(multi-dtb-y))
+real-dtb-y := $(addprefix $(obj)/, $(real-dtb-y))
subdir-ym := $(addprefix $(obj)/,$(subdir-ym))
# Finds the multi-part object the current object will be linked into.
# If the object belongs to two or more multi-part objects, list them all.
modname-multi = $(sort $(foreach m,$(multi-obj-ym),\
- $(if $(filter $*.o, $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))),$(m:.o=))))
+ $(if $(filter $*.o, $(call suffix-search, $m, .o, -objs -y -m)),$(m:.o=))))
__modname = $(if $(modname-multi),$(modname-multi),$(basetarget))
@@ -252,6 +251,9 @@
# Shipped files
# ===========================================================================
+# 'cp' preserves permissions. If you use it to copy a file in read-only srctree,
+# the copy would be read-only as well, leading to an error when executing the
+# rule next time. Use 'cat' instead in order to generate a writable file.
quiet_cmd_shipped = SHIPPED $@
cmd_shipped = cat $< > $@
@@ -319,6 +321,9 @@
DTC_FLAGS += $(DTC_FLAGS_$(basetarget))
+# Set -@ if the target is a base DTB that overlay is applied onto
+DTC_FLAGS += $(if $(filter $(patsubst $(obj)/%,%,$@), $(base-dtb-y)), -@)
+
# Generate an assembly file to wrap the output of the device tree compiler
quiet_cmd_dt_S_dtb= DTB $@
cmd_dt_S_dtb= \
@@ -350,14 +355,12 @@
$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE
$(call if_changed_dep,dtc)
-overlay-y := $(addprefix $(obj)/, $(overlay-y))
-
quiet_cmd_fdtoverlay = DTOVL $@
cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(real-prereqs)
-$(overlay-y): FORCE
+$(multi-dtb-y): FORCE
$(call if_changed,fdtoverlay)
-$(call multi_depend, $(overlay-y), .dtb, -dtbs)
+$(call multi_depend, $(multi-dtb-y), .dtb, -dtbs)
DT_CHECKER ?= dt-validate
DT_CHECKER_FLAGS ?= $(if $(DT_SCHEMA_FILES),,-m)
diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore
index 98ae1f5..961c91c 100644
--- a/scripts/basic/.gitignore
+++ b/scripts/basic/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-fixdep
+/fixdep
diff --git a/scripts/dtc/.gitignore b/scripts/dtc/.gitignore
index 8a8b62b..e0b5c1d 100644
--- a/scripts/dtc/.gitignore
+++ b/scripts/dtc/.gitignore
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-dtc
-fdtoverlay
+/dtc
+/fdtoverlay
diff --git a/scripts/gcc-plugins/.gitignore b/scripts/gcc-plugins/.gitignore
index b04e0f0..5cc385b 100644
--- a/scripts/gcc-plugins/.gitignore
+++ b/scripts/gcc-plugins/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-randomize_layout_seed.h
+/randomize_layout_seed.h
diff --git a/scripts/genksyms/.gitignore b/scripts/genksyms/.gitignore
index 999af71..0b275ab 100644
--- a/scripts/genksyms/.gitignore
+++ b/scripts/genksyms/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-genksyms
+/genksyms
diff --git a/scripts/genksyms/Makefile b/scripts/genksyms/Makefile
index ce4f999..d6a422a 100644
--- a/scripts/genksyms/Makefile
+++ b/scripts/genksyms/Makefile
@@ -22,7 +22,7 @@
endif
-# -I needed for generated C source (shipped source)
+# -I needed for generated C source to include headers in source tree
HOSTCFLAGS_parse.tab.o := -I $(srctree)/$(src)
HOSTCFLAGS_lex.lex.o := -I $(srctree)/$(src)
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 7d11268..f4de4c9 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -320,20 +320,6 @@
rm -f .vmlinux.d
}
-on_exit()
-{
- if [ $? -ne 0 ]; then
- cleanup
- fi
-}
-trap on_exit EXIT
-
-on_signals()
-{
- exit 1
-}
-trap on_signals HUP INT QUIT TERM
-
# Use "make V=1" to debug this script
case "${KBUILD_VERBOSE}" in
*1*)
diff --git a/scripts/mod/.gitignore b/scripts/mod/.gitignore
index 07e4a39..0465ec3 100644
--- a/scripts/mod/.gitignore
+++ b/scripts/mod/.gitignore
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-elfconfig.h
-mk_elfconfig
-modpost
-devicetable-offsets.h
+/devicetable-offsets.h
+/elfconfig.h
+/mk_elfconfig
+/modpost
diff --git a/scripts/nsdeps b/scripts/nsdeps
index e8ce2a4..04c4b96 100644
--- a/scripts/nsdeps
+++ b/scripts/nsdeps
@@ -44,7 +44,7 @@
for source_file in $mod_source_files; do
sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp
offset=$(wc -l ${source_file}.tmp | awk '{print $1;}')
- cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp
+ cat $source_file | grep MODULE_IMPORT_NS | LC_ALL=C sort -u >> ${source_file}.tmp
tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp
if ! diff -q ${source_file} ${source_file}.tmp; then
mv ${source_file}.tmp ${source_file}
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 5053b78..c17e480 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -497,7 +497,7 @@
#
# Step 2: find the sections and mcount call sites
#
-open(IN, "LANG=C $objdump -hdr $inputfile|") || die "error running $objdump";
+open(IN, "LC_ALL=C $objdump -hdr $inputfile|") || die "error running $objdump";
my $text;
diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files
new file mode 100755
index 0000000..c3eb81c
--- /dev/null
+++ b/scripts/remove-stale-files
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+set -e
+
+# When you move, remove or rename generated files, you probably also update
+# .gitignore and cleaning rules in the Makefile. This is the right thing
+# to do. However, people usually do 'git pull', 'git bisect', etc. without
+# running 'make clean'. Then, the stale generated files are left over, often
+# causing build issues.
+#
+# Also, 'git status' shows such stale build artifacts as untracked files.
+# What is worse, some people send a wrong patch to get them back to .gitignore
+# without checking the commit history.
+#
+# So, when you (re)move generated files, please move the cleaning rules from
+# the Makefile to this script. This is run before Kbuild starts building
+# anything, so people will not be annoyed by such garbage files.
+#
+# This script is not intended to grow endlessly. Rather, it is a temporary scrap
+# yard. Stale files stay in this file for a while (for some release cycles?),
+# then will be really dead and removed from the code base entirely.
+
+# These were previously generated source files. When you are building the kernel
+# with O=, make sure to remove the stale files in the output tree. Otherwise,
+# the build system wrongly compiles the stale ones.
+if [ -n "${building_out_of_srctree}" ]; then
+ for f in fdt_rw.c fdt_ro.c fdt_wip.c fdt.c
+ do
+ rm -f arch/arm/boot/compressed/${f}
+ done
+fi
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index bb709ed..db941f6 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -126,7 +126,7 @@
fi
# Check for svn and a svn repo.
- if rev=$(LANG= LC_ALL= LC_MESSAGES=C svn info 2>/dev/null | grep '^Last Changed Rev'); then
+ if rev=$(LC_ALL=C svn info 2>/dev/null | grep '^Last Changed Rev'); then
rev=$(echo $rev | awk '{print $NF}')
printf -- '-svn%s' "$rev"
diff --git a/scripts/tags.sh b/scripts/tags.sh
index fd96734..db8ba41 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -326,5 +326,5 @@
# Remove structure forward declarations.
if [ -n "$remove_structs" ]; then
- LANG=C sed -i -e '/^\([a-zA-Z_][a-zA-Z0-9_]*\)\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1
+ LC_ALL=C sed -i -e '/^\([a-zA-Z_][a-zA-Z0-9_]*\)\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1
fi
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 3998e17..b638fc2 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -1204,11 +1204,17 @@ static const char *get_line_out_pfx(struct hda_codec *codec, int ch,
*index = ch;
return "Headphone";
case AUTO_PIN_LINE_OUT:
- /* This deals with the case where we have two DACs and
- * one LO, one HP and one Speaker */
- if (!ch && cfg->speaker_outs && cfg->hp_outs) {
- bool hp_lo_shared = !path_has_mixer(codec, spec->hp_paths[0], ctl_type);
- bool spk_lo_shared = !path_has_mixer(codec, spec->speaker_paths[0], ctl_type);
+ /* This deals with the case where one HP or one Speaker or
+ * one HP + one Speaker need to share the DAC with LO
+ */
+ if (!ch) {
+ bool hp_lo_shared = false, spk_lo_shared = false;
+
+ if (cfg->speaker_outs)
+ spk_lo_shared = !path_has_mixer(codec,
+ spec->speaker_paths[0], ctl_type);
+ if (cfg->hp_outs)
+ hp_lo_shared = !path_has_mixer(codec, spec->hp_paths[0], ctl_type);
if (hp_lo_shared && spk_lo_shared)
return spec->vmaster_mute.hook ? "PCM" : "Master";
if (hp_lo_shared)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index bd7bfd7..6d58f24 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4338,6 +4338,35 @@ static void alc245_fixup_hp_x360_amp(struct hda_codec *codec,
}
}
+/* toggle GPIO2 at each time stream is started; we use PREPARE state instead */
+static void alc274_hp_envy_pcm_hook(struct hda_pcm_stream *hinfo,
+ struct hda_codec *codec,
+ struct snd_pcm_substream *substream,
+ int action)
+{
+ switch (action) {
+ case HDA_GEN_PCM_ACT_PREPARE:
+ alc_update_gpio_data(codec, 0x04, true);
+ break;
+ case HDA_GEN_PCM_ACT_CLEANUP:
+ alc_update_gpio_data(codec, 0x04, false);
+ break;
+ }
+}
+
+static void alc274_fixup_hp_envy_gpio(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PROBE) {
+ spec->gpio_mask |= 0x04;
+ spec->gpio_dir |= 0x04;
+ spec->gen.pcm_playback_hook = alc274_hp_envy_pcm_hook;
+ }
+}
+
static void alc_update_coef_led(struct hda_codec *codec,
struct alc_coef_led *led,
bool polarity, bool on)
@@ -5695,6 +5724,18 @@ static void alc_fixup_tpt470_dacs(struct hda_codec *codec,
spec->gen.preferred_dacs = preferred_pairs;
}
+static void alc295_fixup_asus_dacs(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ static const hda_nid_t preferred_pairs[] = {
+ 0x17, 0x02, 0x21, 0x03, 0
+ };
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE)
+ spec->gen.preferred_dacs = preferred_pairs;
+}
+
static void alc_shutup_dell_xps13(struct hda_codec *codec)
{
struct alc_spec *spec = codec->spec;
@@ -6453,6 +6494,7 @@ enum {
ALC255_FIXUP_XIAOMI_HEADSET_MIC,
ALC274_FIXUP_HP_MIC,
ALC274_FIXUP_HP_HEADSET_MIC,
+ ALC274_FIXUP_HP_ENVY_GPIO,
ALC256_FIXUP_ASUS_HPE,
ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
ALC287_FIXUP_HP_GPIO_LED,
@@ -6463,6 +6505,8 @@ enum {
ALC256_FIXUP_ACER_HEADSET_MIC,
ALC285_FIXUP_IDEAPAD_S740_COEF,
ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST,
+ ALC295_FIXUP_ASUS_DACS,
+ ALC295_FIXUP_HP_OMEN,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -7894,6 +7938,10 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC274_FIXUP_HP_MIC
},
+ [ALC274_FIXUP_HP_ENVY_GPIO] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc274_fixup_hp_envy_gpio,
+ },
[ALC256_FIXUP_ASUS_HPE] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
@@ -7963,6 +8011,30 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC285_FIXUP_HP_MUTE_LED,
},
+ [ALC295_FIXUP_ASUS_DACS] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc295_fixup_asus_dacs,
+ },
+ [ALC295_FIXUP_HP_OMEN] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x12, 0xb7a60130 },
+ { 0x13, 0x40000000 },
+ { 0x14, 0x411111f0 },
+ { 0x16, 0x411111f0 },
+ { 0x17, 0x90170110 },
+ { 0x18, 0x411111f0 },
+ { 0x19, 0x02a11030 },
+ { 0x1a, 0x411111f0 },
+ { 0x1b, 0x04a19030 },
+ { 0x1d, 0x40600001 },
+ { 0x1e, 0x411111f0 },
+ { 0x21, 0x03211020 },
+ {}
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HP_LINE1_MIC1_LED,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8121,8 +8193,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+ SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN),
SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
@@ -8161,6 +8235,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS),
SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS),
SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
@@ -8524,6 +8599,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
{.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"},
{.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"},
{.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"},
+ {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"},
{}
};
#define ALC225_STANDARD_PINS \
@@ -8801,6 +8877,16 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x19, 0x03a11020},
{0x21, 0x0321101f}),
SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x19, 0x04a11040},
+ {0x21, 0x04211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE,
+ {0x14, 0x90170110},
+ {0x19, 0x04a11040},
+ {0x1d, 0x40600001},
+ {0x21, 0x04211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
{0x14, 0x90170110},
{0x19, 0x04a11040},
{0x21, 0x04211020}),
@@ -8971,10 +9057,6 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
{0x19, 0x40000000},
{0x1a, 0x40000000}),
- SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
- {0x14, 0x90170110},
- {0x19, 0x04a11040},
- {0x21, 0x04211020}),
{}
};
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 646deb6..c5794e8 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -337,6 +337,13 @@ static const struct usbmix_name_map bose_companion5_map[] = {
{ 0 } /* terminator */
};
+/* Sennheiser Communications Headset [PC 8], the dB value is reported as -6 negative maximum */
+static const struct usbmix_dB_map sennheiser_pc8_dB = {-9500, 0};
+static const struct usbmix_name_map sennheiser_pc8_map[] = {
+ { 9, NULL, .dB = &sennheiser_pc8_dB },
+ { 0 } /* terminator */
+};
+
/*
* Dell usb dock with ALC4020 codec had a firmware problem where it got
* screwed up when zero volume is passed; just skip it as a workaround
@@ -593,6 +600,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
.id = USB_ID(0x17aa, 0x1046),
.map = lenovo_p620_rear_map,
},
+ {
+ /* Sennheiser Communications Headset [PC 8] */
+ .id = USB_ID(0x1395, 0x0025),
+ .map = sennheiser_pc8_map,
+ },
{ 0 } /* terminator */
};
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index e7a8d84..1d80ad4 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -202,9 +202,11 @@ static inline int roundup_len(__u32 len)
return (len + 7) / 8 * 8;
}
-static int ringbuf_process_ring(struct ring* r)
+static int64_t ringbuf_process_ring(struct ring* r)
{
- int *len_ptr, len, err, cnt = 0;
+ int *len_ptr, len, err;
+ /* 64-bit to avoid overflow in case of extreme application behavior */
+ int64_t cnt = 0;
unsigned long cons_pos, prod_pos;
bool got_new_data;
void *sample;
@@ -244,12 +246,14 @@ static int ringbuf_process_ring(struct ring* r)
}
/* Consume available ring buffer(s) data without event polling.
- * Returns number of records consumed across all registered ring buffers, or
- * negative number if any of the callbacks return error.
+ * Returns number of records consumed across all registered ring buffers (or
+ * INT_MAX, whichever is less), or negative number if any of the callbacks
+ * return error.
*/
int ring_buffer__consume(struct ring_buffer *rb)
{
- int i, err, res = 0;
+ int64_t err, res = 0;
+ int i;
for (i = 0; i < rb->ring_cnt; i++) {
struct ring *ring = &rb->rings[i];
@@ -259,18 +263,24 @@ int ring_buffer__consume(struct ring_buffer *rb)
return err;
res += err;
}
+ if (res > INT_MAX)
+ return INT_MAX;
return res;
}
/* Poll for available data and consume records, if any are available.
- * Returns number of records consumed, or negative number, if any of the
- * registered callbacks returned error.
+ * Returns number of records consumed (or INT_MAX, whichever is less), or
+ * negative number, if any of the registered callbacks returned error.
*/
int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
{
- int i, cnt, err, res = 0;
+ int i, cnt;
+ int64_t err, res = 0;
cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+ if (cnt < 0)
+ return -errno;
+
for (i = 0; i < cnt; i++) {
__u32 ring_id = rb->events[i].data.fd;
struct ring *ring = &rb->rings[ring_id];
@@ -280,7 +290,9 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
return err;
res += err;
}
- return cnt < 0 ? -errno : res;
+ if (res > INT_MAX)
+ return INT_MAX;
+ return res;
}
/* Get an fd that can be used to sleep until data is available in the ring(s) */
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index a958c22..dffbcaa 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -43,6 +43,8 @@ void test_snprintf_positive(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ skel->bss->pid = getpid();
+
if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index 951a030..e35129b 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -5,6 +5,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+__u32 pid = 0;
+
char num_out[64] = {};
long num_ret = 0;
@@ -42,6 +44,9 @@ int handler(const void *ctx)
static const char str1[] = "str1";
static const char longstr[] = "longstr";
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
/* Integer types */
num_ret = BPF_SNPRINTF(num_out, sizeof(num_out),
"%d %u %x %li %llu %lX",
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 9236609..3c4cb72 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -274,7 +274,7 @@
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
local err=0
- LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+ LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
ip netns delete ${disabled_ns}
diff --git a/usr/.gitignore b/usr/.gitignore
index 935442e..8996e7a 100644
--- a/usr/.gitignore
+++ b/usr/.gitignore
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-gen_init_cpio
-initramfs_data.cpio
+/gen_init_cpio
+/initramfs_data.cpio
/initramfs_inc_data
diff --git a/usr/gen_initramfs.sh b/usr/gen_initramfs.sh
index 8ae8316..63476bb 100755
--- a/usr/gen_initramfs.sh
+++ b/usr/gen_initramfs.sh
@@ -147,7 +147,7 @@
header "$1"
srcdir=$(echo "$1" | sed -e 's://*:/:g')
- dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LANG=C sort)
+ dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LC_ALL=C sort)
# If $dirlist is only one line, then the directory is empty
if [ "$(echo "${dirlist}" | wc -l)" -gt 1 ]; then
diff --git a/usr/include/.gitignore b/usr/include/.gitignore
index d2fab78..17b0ba1 100644
--- a/usr/include/.gitignore
+++ b/usr/include/.gitignore
@@ -1,4 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-*
-!.gitignore
-!Makefile
+/*/