Merge tag 'iommu-fixes-v4.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU fixes from Joerg Roedel:
"Five patches queued up:
- Two patches for the AMD and Intel IOMMU drivers to fix alias
handling and ATS handling.
- Fix build error with arm io-pgtable code
- Two documentation fixes"
* tag 'iommu-fixes-v4.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
iommu: Update struct iommu_ops comments
iommu/vt-d: Fix link to Intel IOMMU Specification
iommu/amd: Correct the wrong setting of alias DTE in do_attach
iommu/vt-d: Don't skip PCI devices when disabling IOTLB
iommu/io-pgtable-arm: Fix io-pgtable-arm build failure
diff --git a/.mailmap b/.mailmap
index b1e9a97..7e6c533 100644
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@
Andrew Morton <akpm@linux-foundation.org>
Andrew Vasquez <andrew.vasquez@qlogic.com>
Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
Archit Taneja <archit@ti.com>
Arnaud Patard <arnaud.patard@rtp-net.org>
Arnd Bergmann <arnd@arndb.de>
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 053f613..07e4cdf 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3025,7 +3025,7 @@
and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
which is the maximum number of possibly pending cpu-local interrupts.
-4.90 KVM_SMI
+4.96 KVM_SMI
Capability: KVM_CAP_X86_SMM
Architectures: x86
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index cd822d8..307237c 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -27,6 +27,8 @@
endif
KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads)
KBUILD_AFLAGS += $(lseinstr)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 18ca9fb..86581f7 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -16,7 +16,6 @@
CONFIG_LOG_BUF_SHIFT=14
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
CONFIG_CGROUP_HUGETLB=y
# CONFIG_UTS_NS is not set
# CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@
CONFIG_ARCH_LAYERSCAPE=y
CONFIG_ARCH_HISI=y
CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
CONFIG_ARCH_ROCKCHIP=y
CONFIG_ARCH_SEATTLE=y
CONFIG_ARCH_RENESAS=y
CONFIG_ARCH_R8A7795=y
CONFIG_ARCH_STRATIX10=y
CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
CONFIG_ARCH_SPRD=y
CONFIG_ARCH_THUNDER=y
CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@
CONFIG_ARCH_ZYNQMP=y
CONFIG_PCI=y
CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
CONFIG_PCI_HOST_GENERIC=y
CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
CONFIG_SCHED_MC=y
CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y
+CONFIG_XEN=y
CONFIG_CMDLINE="console=ttyAMA0"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_COMPAT=y
@@ -100,7 +102,11 @@
CONFIG_NETDEVICES=y
CONFIG_TUN=y
CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
CONFIG_SKY2=y
CONFIG_RAVB=y
CONFIG_SMC91X=y
@@ -117,25 +123,23 @@
CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_8250_MT6577=y
CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_NR_UARTS=11
CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
CONFIG_SERIAL_MSM=y
CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_XILINX_PS_UART=y
CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
CONFIG_VIRTIO_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
CONFIG_I2C_RCAR=y
CONFIG_SPI=y
CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@
CONFIG_MMC_SDHCI_TEGRA=y
CONFIG_MMC_SPI=y
CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
CONFIG_MMC_DW_EXYNOS=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@
CONFIG_LEDS_TRIGGER_CPU=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
CONFIG_RTC_DRV_XGENE=y
CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
CONFIG_QCOM_BAM_DMA=y
CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
CONFIG_COMMON_CLK_CS2000_CP=y
CONFIG_COMMON_CLK_QCOM=y
CONFIG_MSM_GCC_8916=y
CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
CONFIG_QCOM_SMEM=y
CONFIG_QCOM_SMD=y
CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
CONFIG_PHY_XGENE=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
@@ -239,6 +246,7 @@
# CONFIG_FTRACE is not set
CONFIG_MEMTEST=y
CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_ARM64_CRYPTO=y
CONFIG_CRYPTO_SHA1_ARM64_CE=y
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2d545d7a..bf464de3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -67,11 +67,11 @@
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
@@ -153,6 +153,7 @@
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
#define pte_valid_young(pte) \
@@ -278,13 +277,13 @@
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- if (pte_valid_user(pte)) {
- if (!pte_special(pte) && pte_exec(pte))
- __sync_icache_dcache(pte, addr);
+ if (pte_valid(pte)) {
if (pte_sw_dirty(pte) && pte_write(pte))
pte_val(pte) &= ~PTE_RDONLY;
else
pte_val(pte) |= PTE_RDONLY;
+ if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+ __sync_icache_dcache(pte, addr);
}
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ffe9c2b6..917d981 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -514,9 +514,14 @@
#endif
/* EL2 debug */
+ mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
+ sbfx x0, x0, #8, #4
+ cmp x0, #1
+ b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
msr mdcr_el2, x0 // all PMU counters from EL1
+4:
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index bc2abb8..999633b 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -65,6 +65,16 @@
#ifdef CONFIG_EFI
/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym) ABSOLUTE(sym)
+
+/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
* isolate it from the kernel proper. The following symbols are legally
* accessed by the stub, so provide some aliases to make them accessible.
@@ -73,25 +83,25 @@
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
-__efistub_memcmp = __pi_memcmp;
-__efistub_memchr = __pi_memchr;
-__efistub_memcpy = __pi_memcpy;
-__efistub_memmove = __pi_memmove;
-__efistub_memset = __pi_memset;
-__efistub_strlen = __pi_strlen;
-__efistub_strcmp = __pi_strcmp;
-__efistub_strncmp = __pi_strncmp;
-__efistub___flush_dcache_area = __pi___flush_dcache_area;
+__efistub_memcmp = KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr = KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove = KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset = KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen = KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area);
#ifdef CONFIG_KASAN
-__efistub___memcpy = __pi_memcpy;
-__efistub___memmove = __pi_memmove;
-__efistub___memset = __pi_memset;
+__efistub___memcpy = KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove = KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset = KALLSYMS_HIDE(__pi_memset);
#endif
-__efistub__text = _text;
-__efistub__end = _end;
-__efistub__edata = _edata;
+__efistub__text = KALLSYMS_HIDE(_text);
+__efistub__end = KALLSYMS_HIDE(_end);
+__efistub__edata = KALLSYMS_HIDE(_edata);
#endif
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 5a22a11..0adbebb 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -46,7 +46,7 @@
PCI_START_NR,
PCI_END_NR,
MODULES_START_NR,
- MODUELS_END_NR,
+ MODULES_END_NR,
KERNEL_SPACE_NR,
};
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7..cab7a5b 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -120,6 +120,7 @@
void __init kasan_init(void)
{
struct memblock_region *reg;
+ int i;
/*
* We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@
pfn_to_nid(virt_to_pfn(start)));
}
+ /*
+ * KAsan may reuse the contents of kasan_zero_pte directly, so we
+ * should make sure that it maps the zero page read-only.
+ */
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(&kasan_zero_pte[i],
+ pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
memset(kasan_zero_page, 0, PAGE_SIZE);
cpu_set_ttbr1(__pa(swapper_pg_dir));
flush_tlb_all();
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 3571c73..cf62407 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -57,6 +57,9 @@
if (end < MODULES_VADDR || end >= MODULES_END)
return -EINVAL;
+ if (!numpages)
+ return 0;
+
data.set_mask = set_mask;
data.clear_mask = clear_mask;
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 146bd99..e6a30e1 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -84,3 +84,15 @@
b.lo 9998b
dsb \domain
.endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+ .macro reset_pmuserenr_el0, tmpreg
+ mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
+ sbfx \tmpreg, \tmpreg, #8, #4
+ cmp \tmpreg, #1 // Skip if no PMU present
+ b.lt 9000f
+ msr pmuserenr_el0, xzr // Disable PMU access from EL0
+9000:
+ .endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a3d867e..c164d2c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -117,7 +117,7 @@
*/
ubfx x11, x11, #1, #1
msr oslar_el1, x11
- msr pmuserenr_el0, xzr // Disable PMU access from EL0
+ reset_pmuserenr_el0 x0 // Disable PMU access from EL0
mov x0, x12
dsb nsh // Make sure local tlb invalidation completed
isb
@@ -154,7 +154,7 @@
msr cpacr_el1, x0 // Enable FP/ASIMD
mov x0, #1 << 12 // Reset mdscr_el1 and disable
msr mdscr_el1, x0 // access to the DCC from EL0
- msr pmuserenr_el0, xzr // Disable PMU access from EL0
+ reset_pmuserenr_el0 x0 // Disable PMU access from EL0
/*
* Memory region attributes for LPAE:
*
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 271fefb..9d08d8c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,8 +38,7 @@
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS 512
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 774a253..9bf7031 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -377,15 +377,12 @@
static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s;
u64 esid, esid_1t;
int slb_nr;
struct kvmppc_slb *slbe;
dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
- vcpu_book3s = to_book3s(vcpu);
-
esid = GET_ESID(rb);
esid_1t = GET_ESID_1T(rb);
slb_nr = rb & 0xfff;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cff207b..baeddb0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -833,6 +833,24 @@
vcpu->stat.sum_exits++;
+ /*
+ * This can happen if an interrupt occurs in the last stages
+ * of guest entry or the first stages of guest exit (i.e. after
+ * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+ * and before setting it to KVM_GUEST_MODE_HOST_HV).
+ * That can happen due to a bug, or due to a machine check
+ * occurring at just the wrong time.
+ */
+ if (vcpu->arch.shregs.msr & MSR_HV) {
+ printk(KERN_EMERG "KVM trap in HV mode!\n");
+ printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ kvmppc_dump_regs(vcpu);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ return RESUME_HOST;
+ }
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
switch (vcpu->arch.trap) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3c6badc..6ee26de 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2153,7 +2153,7 @@
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
- rlwimi r5, r4, 1, DAWRX_WT
+ rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
std r4, VCPU_DAWR(r3)
std r5, VCPU_DAWRX(r3)
@@ -2404,6 +2404,8 @@
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
+ rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+ bne mc_cont /* if so, exit to host */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6fd2405..a3b182d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -919,21 +919,17 @@
r = -ENXIO;
break;
}
- vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
break;
case KVM_REG_PPC_VRSAVE:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
break;
#endif /* CONFIG_ALTIVEC */
default:
@@ -974,17 +970,21 @@
r = -ENXIO;
break;
}
- val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
break;
case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
break;
#endif /* CONFIG_ALTIVEC */
default:
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6742414..8959ebb 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -546,7 +546,6 @@
struct kvm_s390_sie_block *sie_block;
unsigned int host_acrs[NUM_ACRS];
struct fpu host_fpregs;
- struct fpu guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52c..5ea5af3 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -29,6 +29,7 @@
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select SRCU
+ select KVM_VFIO
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3b5534..d42fa38 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 835d60b..4af21c7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1423,44 +1423,18 @@
return 0;
}
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination. Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
- dst->fpc = current->thread.fpu.fpc;
- dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
- current->thread.fpu.fpc = from->fpc;
- current->thread.fpu.regs = from->regs;
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Save host register state */
save_fpu_regs();
- save_fpu_to(&vcpu->arch.host_fpregs);
+ vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+ vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
- if (test_kvm_facility(vcpu->kvm, 129)) {
- current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
- /*
- * Use the register save area in the SIE-control block
- * for register restore and save in kvm_arch_vcpu_put()
- */
- current->thread.fpu.vxrs =
- (__vector128 *)&vcpu->run->s.regs.vrs;
- } else
- load_fpu_from(&vcpu->arch.guest_fpregs);
-
+ /* Depending on MACHINE_HAS_VX, data stored to vrs either
+ * has vector register or floating point register format.
+ */
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@
atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
+ /* Save guest register state */
save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- if (test_kvm_facility(vcpu->kvm, 129))
- /*
- * kvm_arch_vcpu_load() set up the register save area to
- * the &vcpu->run->s.regs.vrs and, thus, the vector registers
- * are already saved. Only the floating-point control must be
- * copied.
- */
- vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- else
- save_fpu_to(&vcpu->arch.guest_fpregs);
- load_fpu_from(&vcpu->arch.host_fpregs);
+ /* Restore host register state */
+ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+ current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
vcpu->arch.sie_block->gcr[0] = 0xE0UL;
vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
- vcpu->arch.guest_fpregs.fpc = 0;
- asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+ /* make sure the new fpc will be lazily loaded */
+ save_fpu_regs();
+ current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
- /*
- * Allocate a save area for floating-point registers. If the vector
- * extension is available, register contents are saved in the SIE
- * control block. The allocated save area is still required in
- * particular places, for example, in kvm_s390_vcpu_store_status().
- */
- vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
- GFP_KERNEL);
- if (!vcpu->arch.guest_fpregs.fprs)
- goto out_free_sie_block;
-
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_sie_block;
@@ -1879,19 +1837,27 @@
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ /* make sure the new values will be lazily loaded */
+ save_fpu_regs();
if (test_fp_ctl(fpu->fpc))
return -EINVAL;
- memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- vcpu->arch.guest_fpregs.fpc = fpu->fpc;
- save_fpu_regs();
- load_fpu_from(&vcpu->arch.guest_fpregs);
+ current->thread.fpu.fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+ else
+ memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
- fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+ /* make sure we have the latest values */
+ save_fpu_regs();
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+ else
+ memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+ fpu->fpc = current->thread.fpu.fpc;
return 0;
}
@@ -2396,6 +2362,7 @@
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
unsigned char archmode = 1;
+ freg_t fprs[NUM_FPRS];
unsigned int px;
u64 clkcomp;
int rc;
@@ -2411,8 +2378,16 @@
gpa = px;
} else
gpa -= __LC_FPREGS_SAVE_AREA;
- rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
- vcpu->arch.guest_fpregs.fprs, 128);
+
+ /* manually convert vector registers if necessary */
+ if (MACHINE_HAS_VX) {
+ convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ fprs, 128);
+ } else {
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ vcpu->run->s.regs.vrs, 128);
+ }
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@
rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
&px, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
- &vcpu->arch.guest_fpregs.fpc, 4);
+ &vcpu->run->s.regs.fpc, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@
* it into the save area
*/
save_fpu_regs();
- if (test_kvm_facility(vcpu->kvm, 129)) {
- /*
- * If the vector extension is available, the vector registers
- * which overlaps with floating-point registers are saved in
- * the SIE-control block. Hence, extract the floating-point
- * registers and the FPC value and store them in the
- * guest_fpregs structure.
- */
- vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
- convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
- current->thread.fpu.vxrs);
- } else
- save_fpu_to(&vcpu->arch.guest_fpregs);
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr);
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index f887c64..8a84e05 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -33,7 +33,6 @@
#endif
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
#include <asm-generic/barrier.h>
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1699df5..888a7fe 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -70,6 +70,18 @@
return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
}
+static inline unsigned get_max_io_size(struct request_queue *q,
+ struct bio *bio)
+{
+ unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+ unsigned mask = queue_logical_block_size(q) - 1;
+
+ /* aligned to logical block size */
+ sectors &= ~(mask >> 9);
+
+ return sectors;
+}
+
static struct bio *blk_bio_segment_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs,
@@ -81,6 +93,7 @@
unsigned front_seg_size = bio->bi_seg_front_size;
bool do_split = true;
struct bio *new = NULL;
+ const unsigned max_sectors = get_max_io_size(q, bio);
bio_for_each_segment(bv, bio, iter) {
/*
@@ -90,20 +103,19 @@
if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
goto split;
- if (sectors + (bv.bv_len >> 9) >
- blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+ if (sectors + (bv.bv_len >> 9) > max_sectors) {
/*
* Consider this a new segment if we're splitting in
* the middle of this vector.
*/
if (nsegs < queue_max_segments(q) &&
- sectors < blk_max_size_offset(q,
- bio->bi_iter.bi_sector)) {
+ sectors < max_sectors) {
nsegs++;
- sectors = blk_max_size_offset(q,
- bio->bi_iter.bi_sector);
+ sectors = max_sectors;
}
- goto split;
+ if (sectors)
+ goto split;
+ /* Make this single bvec as the 1st segment */
}
if (bvprvp && blk_queue_cluster(q)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 313b0cc..82edf95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2278,60 +2278,60 @@
#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
#define amdgpu_dpm_get_temperature(adev) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
- (adev)->pm.funcs->get_temperature((adev))
+ (adev)->pm.funcs->get_temperature((adev)))
#define amdgpu_dpm_set_fan_control_mode(adev, m) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
- (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+ (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
#define amdgpu_dpm_get_fan_control_mode(adev) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
- (adev)->pm.funcs->get_fan_control_mode((adev))
+ (adev)->pm.funcs->get_fan_control_mode((adev)))
#define amdgpu_dpm_set_fan_speed_percent(adev, s) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
- (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+ (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
#define amdgpu_dpm_get_fan_speed_percent(adev, s) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
- (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+ (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
#define amdgpu_dpm_get_sclk(adev, l) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
- (adev)->pm.funcs->get_sclk((adev), (l))
+ (adev)->pm.funcs->get_sclk((adev), (l)))
#define amdgpu_dpm_get_mclk(adev, l) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
- (adev)->pm.funcs->get_mclk((adev), (l))
+ (adev)->pm.funcs->get_mclk((adev), (l)))
#define amdgpu_dpm_force_performance_level(adev, l) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
- (adev)->pm.funcs->force_performance_level((adev), (l))
+ (adev)->pm.funcs->force_performance_level((adev), (l)))
#define amdgpu_dpm_powergate_uvd(adev, g) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
- (adev)->pm.funcs->powergate_uvd((adev), (g))
+ (adev)->pm.funcs->powergate_uvd((adev), (g)))
#define amdgpu_dpm_powergate_vce(adev, g) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
- (adev)->pm.funcs->powergate_vce((adev), (g))
+ (adev)->pm.funcs->powergate_vce((adev), (g)))
#define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
- (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+ (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
#define amdgpu_dpm_get_current_power_state(adev) \
(adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6f89f8e..b882e81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -478,9 +478,9 @@
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
unsigned i;
- amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
if (!error) {
+ amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
* This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index cfb6caa..9191467 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -333,6 +333,10 @@
if (!adev->mode_info.mode_config_initialized)
return 0;
+ /* don't init fbdev if there are no connectors */
+ if (list_empty(&adev->ddev->mode_config.connector_list))
+ return 0;
+
/* select 8 bpp console on low vram cards */
if (adev->mc.real_vram_size <= (32*1024*1024))
bpp_sel = 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c3ce103..a2a16ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -399,7 +399,8 @@
}
if (fpfn > bo->placements[i].fpfn)
bo->placements[i].fpfn = fpfn;
- if (lpfn && lpfn < bo->placements[i].lpfn)
+ if (!bo->placements[i].lpfn ||
+ (lpfn && lpfn < bo->placements[i].lpfn))
bo->placements[i].lpfn = lpfn;
bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 5ee9a06..b9d0d55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -99,13 +99,24 @@
#ifdef CONFIG_DRM_AMD_POWERPLAY
switch (adev->asic_type) {
- case CHIP_TONGA:
- case CHIP_FIJI:
- adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
- break;
- default:
- adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
- break;
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+ break;
+ /* These chips don't have powerplay implemenations */
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ case CHIP_KAVERI:
+ case CHIP_TOPAZ:
+ default:
+ adev->pp_enabled = false;
+ break;
}
#else
adev->pp_enabled = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 78e9b0f..d1f234d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -487,7 +487,7 @@
seq_printf(m, "rptr: 0x%08x [%5d]\n",
rptr, rptr);
- rptr_next = ~0;
+ rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
ring->wptr, ring->wptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index aefc668..9599f75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1282,7 +1282,7 @@
{
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT * 8);
- unsigned pd_size, pd_entries, pts_size;
+ unsigned pd_size, pd_entries;
int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@
pd_entries = amdgpu_vm_num_pdes(adev);
/* allocate page table array */
- pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
- vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+ vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
if (vm->page_tables == NULL) {
DRM_ERROR("Cannot allocate memory for page table array\n");
return -ENOMEM;
@@ -1361,7 +1360,7 @@
for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
- kfree(vm->page_tables);
+ drm_free_large(vm->page_tables);
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 13235d8..95c0cdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4186,7 +4186,18 @@
gfx_v8_0_cp_gfx_enable(adev, false);
/* Disable MEC parsing/prefetching */
- /* XXX todo */
+ gfx_v8_0_cp_compute_enable(adev, false);
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_STALL, 1);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_CLEAR, 1);
+ WREG32(mmGMCON_DEBUG, tmp);
+
+ udelay(50);
+ }
if (grbm_soft_reset) {
tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4226,16 @@
WREG32(mmSRBM_SOFT_RESET, tmp);
tmp = RREG32(mmSRBM_SOFT_RESET);
}
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_STALL, 0);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_CLEAR, 0);
+ WREG32(mmGMCON_DEBUG, tmp);
+ }
+
/* Wait a little for things to settle down */
udelay(50);
gfx_v8_0_print_status((void *)adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index f4a1346..0497784 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -122,25 +122,12 @@
static int tonga_dpm_suspend(void *handle)
{
- return 0;
+ return tonga_dpm_hw_fini(handle);
}
static int tonga_dpm_resume(void *handle)
{
- int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- mutex_lock(&adev->pm.mutex);
-
- ret = tonga_smu_start(adev);
- if (ret) {
- DRM_ERROR("SMU start failed\n");
- goto fail;
- }
-
-fail:
- mutex_unlock(&adev->pm.mutex);
- return ret;
+ return tonga_dpm_hw_init(handle);
}
static int tonga_dpm_set_clockgating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8f5d5ed..aa67244 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -64,6 +64,11 @@
if (ret == 0)
ret = hwmgr->hwmgr_func->backend_init(hwmgr);
+ if (ret)
+ printk("amdgpu: powerplay initialization failed\n");
+ else
+ printk("amdgpu: powerplay initialized\n");
+
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 873a8d2..ec222c6 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -272,6 +272,9 @@
UCODE_ID_CP_MEC_JT1_MASK |
UCODE_ID_CP_MEC_JT2_MASK;
+ if (smumgr->chip_id == CHIP_STONEY)
+ fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
cz_request_smu_load_fw(smumgr);
cz_check_fw_load_finish(smumgr, fw_to_check);
@@ -282,7 +285,7 @@
return ret;
}
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
enum cz_scratch_entry firmware_enum)
{
uint8_t ret = 0;
@@ -292,7 +295,10 @@
ret = UCODE_ID_SDMA0;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
- ret = UCODE_ID_SDMA1;
+ if (smumgr->chip_id == CHIP_STONEY)
+ ret = UCODE_ID_SDMA0;
+ else
+ ret = UCODE_ID_SDMA1;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@
ret = UCODE_ID_CP_MEC_JT1;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
- ret = UCODE_ID_CP_MEC_JT2;
+ if (smumgr->chip_id == CHIP_STONEY)
+ ret = UCODE_ID_CP_MEC_JT1;
+ else
+ ret = UCODE_ID_CP_MEC_JT2;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@
struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
task->type = type;
- task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+ task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@
struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
task->type = TASK_TYPE_UCODE_LOAD;
- task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+ task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@
CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
- cz_smu_populate_single_ucode_load_task(smumgr,
+
+ if (smumgr->chip_id == CHIP_STONEY)
+ cz_smu_populate_single_ucode_load_task(smumgr,
+ CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+ else
+ cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
@@ -551,7 +566,11 @@
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
- cz_smu_populate_single_ucode_load_task(smumgr,
+ if (smumgr->chip_id == CHIP_STONEY)
+ cz_smu_populate_single_ucode_load_task(smumgr,
+ CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+ else
+ cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@
CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
- cz_smu_populate_single_ucode_load_task(smumgr,
+ if (smumgr->chip_id == CHIP_STONEY)
+ cz_smu_populate_single_ucode_load_task(smumgr,
+ CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+ else
+ cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@
for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
- firmware_type = cz_translate_firmware_enum_to_arg(
+ firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
firmware_list[i]);
ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 57cccd6..7c52306 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -946,9 +946,23 @@
}
}
-static bool framebuffer_changed(struct drm_device *dev,
- struct drm_atomic_state *old_state,
- struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update. This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+ struct drm_atomic_state *old_state,
+ struct drm_crtc *crtc)
{
struct drm_plane *plane;
struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@
return false;
}
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
/**
* drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@
if (old_state->legacy_cursor_update)
continue;
- if (!framebuffer_changed(dev, old_state, crtc))
+ if (!drm_atomic_helper_framebuffer_changed(dev,
+ old_state, crtc))
continue;
ret = drm_crtc_vblank_get(crtc);
diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h
index 9e585d5..e881482 100644
--- a/drivers/gpu/drm/etnaviv/common.xml.h
+++ b/drivers/gpu/drm/etnaviv/common.xml.h
@@ -8,8 +8,8 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state_vg.xml ( 5973 bytes, from 2015-03-25 11:26:01)
-- common.xml ( 18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml ( 24309 bytes, from 2015-12-12 09:02:53)
+- common.xml ( 18379 bytes, from 2015-12-12 09:02:53)
Copyright (C) 2015
*/
@@ -30,15 +30,19 @@
#define ENDIAN_MODE_NO_SWAP 0x00000000
#define ENDIAN_MODE_SWAP_16 0x00000001
#define ENDIAN_MODE_SWAP_32 0x00000002
+#define chipModel_GC200 0x00000200
#define chipModel_GC300 0x00000300
#define chipModel_GC320 0x00000320
+#define chipModel_GC328 0x00000328
#define chipModel_GC350 0x00000350
#define chipModel_GC355 0x00000355
#define chipModel_GC400 0x00000400
#define chipModel_GC410 0x00000410
#define chipModel_GC420 0x00000420
+#define chipModel_GC428 0x00000428
#define chipModel_GC450 0x00000450
#define chipModel_GC500 0x00000500
+#define chipModel_GC520 0x00000520
#define chipModel_GC530 0x00000530
#define chipModel_GC600 0x00000600
#define chipModel_GC700 0x00000700
@@ -46,9 +50,16 @@
#define chipModel_GC860 0x00000860
#define chipModel_GC880 0x00000880
#define chipModel_GC1000 0x00001000
+#define chipModel_GC1500 0x00001500
#define chipModel_GC2000 0x00002000
#define chipModel_GC2100 0x00002100
+#define chipModel_GC2200 0x00002200
+#define chipModel_GC2500 0x00002500
+#define chipModel_GC3000 0x00003000
#define chipModel_GC4000 0x00004000
+#define chipModel_GC5000 0x00005000
+#define chipModel_GC5200 0x00005200
+#define chipModel_GC6400 0x00006400
#define RGBA_BITS_R 0x00000001
#define RGBA_BITS_G 0x00000002
#define RGBA_BITS_B 0x00000004
@@ -160,7 +171,7 @@
#define chipMinorFeatures2_UNK8 0x00000100
#define chipMinorFeatures2_UNK9 0x00000200
#define chipMinorFeatures2_UNK10 0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16 0x00000800
+#define chipMinorFeatures2_HALTI1 0x00000800
#define chipMinorFeatures2_UNK12 0x00001000
#define chipMinorFeatures2_UNK13 0x00002000
#define chipMinorFeatures2_UNK14 0x00004000
@@ -189,7 +200,7 @@
#define chipMinorFeatures3_UNK5 0x00000020
#define chipMinorFeatures3_UNK6 0x00000040
#define chipMinorFeatures3_UNK7 0x00000080
-#define chipMinorFeatures3_UNK8 0x00000100
+#define chipMinorFeatures3_FAST_MSAA 0x00000100
#define chipMinorFeatures3_UNK9 0x00000200
#define chipMinorFeatures3_BUG_FIXES10 0x00000400
#define chipMinorFeatures3_UNK11 0x00000800
@@ -199,7 +210,7 @@
#define chipMinorFeatures3_UNK15 0x00008000
#define chipMinorFeatures3_UNK16 0x00010000
#define chipMinorFeatures3_UNK17 0x00020000
-#define chipMinorFeatures3_UNK18 0x00040000
+#define chipMinorFeatures3_ACE 0x00040000
#define chipMinorFeatures3_UNK19 0x00080000
#define chipMinorFeatures3_UNK20 0x00100000
#define chipMinorFeatures3_UNK21 0x00200000
@@ -207,7 +218,7 @@
#define chipMinorFeatures3_UNK23 0x00800000
#define chipMinorFeatures3_UNK24 0x01000000
#define chipMinorFeatures3_UNK25 0x02000000
-#define chipMinorFeatures3_UNK26 0x04000000
+#define chipMinorFeatures3_NEW_HZ 0x04000000
#define chipMinorFeatures3_UNK27 0x08000000
#define chipMinorFeatures3_UNK28 0x10000000
#define chipMinorFeatures3_UNK29 0x20000000
@@ -229,9 +240,9 @@
#define chipMinorFeatures4_UNK13 0x00002000
#define chipMinorFeatures4_UNK14 0x00004000
#define chipMinorFeatures4_UNK15 0x00008000
-#define chipMinorFeatures4_UNK16 0x00010000
+#define chipMinorFeatures4_HALTI2 0x00010000
#define chipMinorFeatures4_UNK17 0x00020000
-#define chipMinorFeatures4_UNK18 0x00040000
+#define chipMinorFeatures4_SMALL_MSAA 0x00040000
#define chipMinorFeatures4_UNK19 0x00080000
#define chipMinorFeatures4_UNK20 0x00100000
#define chipMinorFeatures4_UNK21 0x00200000
@@ -245,5 +256,37 @@
#define chipMinorFeatures4_UNK29 0x20000000
#define chipMinorFeatures4_UNK30 0x40000000
#define chipMinorFeatures4_UNK31 0x80000000
+#define chipMinorFeatures5_UNK0 0x00000001
+#define chipMinorFeatures5_UNK1 0x00000002
+#define chipMinorFeatures5_UNK2 0x00000004
+#define chipMinorFeatures5_UNK3 0x00000008
+#define chipMinorFeatures5_UNK4 0x00000010
+#define chipMinorFeatures5_UNK5 0x00000020
+#define chipMinorFeatures5_UNK6 0x00000040
+#define chipMinorFeatures5_UNK7 0x00000080
+#define chipMinorFeatures5_UNK8 0x00000100
+#define chipMinorFeatures5_HALTI3 0x00000200
+#define chipMinorFeatures5_UNK10 0x00000400
+#define chipMinorFeatures5_UNK11 0x00000800
+#define chipMinorFeatures5_UNK12 0x00001000
+#define chipMinorFeatures5_UNK13 0x00002000
+#define chipMinorFeatures5_UNK14 0x00004000
+#define chipMinorFeatures5_UNK15 0x00008000
+#define chipMinorFeatures5_UNK16 0x00010000
+#define chipMinorFeatures5_UNK17 0x00020000
+#define chipMinorFeatures5_UNK18 0x00040000
+#define chipMinorFeatures5_UNK19 0x00080000
+#define chipMinorFeatures5_UNK20 0x00100000
+#define chipMinorFeatures5_UNK21 0x00200000
+#define chipMinorFeatures5_UNK22 0x00400000
+#define chipMinorFeatures5_UNK23 0x00800000
+#define chipMinorFeatures5_UNK24 0x01000000
+#define chipMinorFeatures5_UNK25 0x02000000
+#define chipMinorFeatures5_UNK26 0x04000000
+#define chipMinorFeatures5_UNK27 0x08000000
+#define chipMinorFeatures5_UNK28 0x10000000
+#define chipMinorFeatures5_UNK29 0x20000000
+#define chipMinorFeatures5_UNK30 0x40000000
+#define chipMinorFeatures5_UNK31 0x80000000
#endif /* COMMON_XML */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 5c89ebb..e885898 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -668,7 +668,6 @@
.probe = etnaviv_pdev_probe,
.remove = etnaviv_pdev_remove,
.driver = {
- .owner = THIS_MODULE,
.name = "etnaviv",
.of_match_table = dt_match,
},
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index d6bd438..1cd6046 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -85,7 +85,7 @@
struct dma_buf_attachment *attach, struct sg_table *sg);
int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
struct timespec *timeout);
int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index bf8fa85..4a29eea 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -201,7 +201,9 @@
obj = vram->object;
+ mutex_lock(&obj->lock);
pages = etnaviv_gem_get_pages(obj);
+ mutex_unlock(&obj->lock);
if (pages) {
int j;
@@ -213,8 +215,8 @@
iter.hdr->iova = cpu_to_le64(vram->iova);
- vaddr = etnaviv_gem_vaddr(&obj->base);
- if (vaddr && !IS_ERR(vaddr))
+ vaddr = etnaviv_gem_vmap(&obj->base);
+ if (vaddr)
memcpy(iter.data, vaddr, obj->base.size);
etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 9f77c3b..4b519e4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -353,25 +353,39 @@
drm_gem_object_unreference_unlocked(obj);
}
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
{
struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+ if (etnaviv_obj->vaddr)
+ return etnaviv_obj->vaddr;
+
mutex_lock(&etnaviv_obj->lock);
- if (!etnaviv_obj->vaddr) {
- struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
- if (IS_ERR(pages))
- return ERR_CAST(pages);
-
- etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
- VM_MAP, pgprot_writecombine(PAGE_KERNEL));
- }
+ /*
+ * Need to check again, as we might have raced with another thread
+ * while waiting for the mutex.
+ */
+ if (!etnaviv_obj->vaddr)
+ etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
mutex_unlock(&etnaviv_obj->lock);
return etnaviv_obj->vaddr;
}
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+ struct page **pages;
+
+ lockdep_assert_held(&obj->lock);
+
+ pages = etnaviv_gem_get_pages(obj);
+ if (IS_ERR(pages))
+ return NULL;
+
+ return vmap(pages, obj->base.size >> PAGE_SHIFT,
+ VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
{
if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@
static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
.get_pages = etnaviv_gem_shmem_get_pages,
.release = etnaviv_gem_shmem_release,
+ .vmap = etnaviv_gem_vmap_impl,
};
void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@
static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
.get_pages = etnaviv_gem_userptr_get_pages,
.release = etnaviv_gem_userptr_release,
+ .vmap = etnaviv_gem_vmap_impl,
};
int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index a300b4b..ab5df81 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -78,6 +78,7 @@
struct etnaviv_gem_ops {
int (*get_pages)(struct etnaviv_gem_object *);
void (*release)(struct etnaviv_gem_object *);
+ void *(*vmap)(struct etnaviv_gem_object *);
};
static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index e94db4f..4e67395 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -31,7 +31,7 @@
void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
{
- return etnaviv_gem_vaddr(obj);
+ return etnaviv_gem_vmap(obj);
}
void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@
drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
}
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+ lockdep_assert_held(&etnaviv_obj->lock);
+
+ return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
/* .get_pages should never be called */
.release = etnaviv_gem_prime_release,
+ .vmap = etnaviv_gem_prime_vmap_impl,
};
struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 056a72e..a33162c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -72,6 +72,14 @@
*value = gpu->identity.minor_features3;
break;
+ case ETNAVIV_PARAM_GPU_FEATURES_5:
+ *value = gpu->identity.minor_features4;
+ break;
+
+ case ETNAVIV_PARAM_GPU_FEATURES_6:
+ *value = gpu->identity.minor_features5;
+ break;
+
case ETNAVIV_PARAM_GPU_STREAM_COUNT:
*value = gpu->identity.stream_count;
break;
@@ -112,6 +120,10 @@
*value = gpu->identity.num_constants;
break;
+ case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+ *value = gpu->identity.varyings_count;
+ break;
+
default:
DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
return -EINVAL;
@@ -120,46 +132,56 @@
return 0;
}
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+ ((gpu)->identity.model == chipModel_##mod && \
+ (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+ (((val) & field##__MASK) >> field##__SHIFT)
+
static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
{
if (gpu->identity.minor_features0 &
chipMinorFeatures0_MORE_MINOR_FEATURES) {
- u32 specs[2];
+ u32 specs[4];
+ unsigned int streams;
specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
+ specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+ specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
- gpu->identity.stream_count =
- (specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
- gpu->identity.register_max =
- (specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
- >> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
- gpu->identity.thread_count =
- (specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
- gpu->identity.vertex_cache_size =
- (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
- >> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
- gpu->identity.shader_core_count =
- (specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
- gpu->identity.pixel_pipes =
- (specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
- >> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+ gpu->identity.stream_count = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+ gpu->identity.register_max = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+ gpu->identity.thread_count = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+ gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+ gpu->identity.shader_core_count = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+ gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
gpu->identity.vertex_output_buffer_size =
- (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
- >> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
+ etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
- gpu->identity.buffer_size =
- (specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
- >> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
- gpu->identity.instruction_count =
- (specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
- gpu->identity.num_constants =
- (specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
- >> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+ gpu->identity.buffer_size = etnaviv_field(specs[1],
+ VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+ gpu->identity.instruction_count = etnaviv_field(specs[1],
+ VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+ gpu->identity.num_constants = etnaviv_field(specs[1],
+ VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+ gpu->identity.varyings_count = etnaviv_field(specs[2],
+ VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+ /* This overrides the value from older register if non-zero */
+ streams = etnaviv_field(specs[3],
+ VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+ if (streams)
+ gpu->identity.stream_count = streams;
}
/* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@
/* Convert the register max value */
if (gpu->identity.register_max)
gpu->identity.register_max = 1 << gpu->identity.register_max;
- else if (gpu->identity.model == 0x0400)
+ else if (gpu->identity.model == chipModel_GC400)
gpu->identity.register_max = 32;
else
gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@
/* Convert thread count */
if (gpu->identity.thread_count)
gpu->identity.thread_count = 1 << gpu->identity.thread_count;
- else if (gpu->identity.model == 0x0400)
+ else if (gpu->identity.model == chipModel_GC400)
gpu->identity.thread_count = 64;
- else if (gpu->identity.model == 0x0500 ||
- gpu->identity.model == 0x0530)
+ else if (gpu->identity.model == chipModel_GC500 ||
+ gpu->identity.model == chipModel_GC530)
gpu->identity.thread_count = 128;
else
gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@
if (gpu->identity.vertex_output_buffer_size) {
gpu->identity.vertex_output_buffer_size =
1 << gpu->identity.vertex_output_buffer_size;
- } else if (gpu->identity.model == 0x0400) {
+ } else if (gpu->identity.model == chipModel_GC400) {
if (gpu->identity.revision < 0x4000)
gpu->identity.vertex_output_buffer_size = 512;
else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@
switch (gpu->identity.instruction_count) {
case 0:
- if ((gpu->identity.model == 0x2000 &&
- gpu->identity.revision == 0x5108) ||
- gpu->identity.model == 0x880)
+ if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+ gpu->identity.model == chipModel_GC880)
gpu->identity.instruction_count = 512;
else
gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@
if (gpu->identity.num_constants == 0)
gpu->identity.num_constants = 168;
+
+ if (gpu->identity.varyings_count == 0) {
+ if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+ gpu->identity.varyings_count = 12;
+ else
+ gpu->identity.varyings_count = 8;
+ }
+
+ /*
+ * For some cores, two varyings are consumed for position, so the
+ * maximum varying count needs to be reduced by one.
+ */
+ if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+ etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+ etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+ etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+ etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+ etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+ etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+ etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+ etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+ etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+ etnaviv_is_model_rev(gpu, GC880, 0x5106))
+ gpu->identity.varyings_count -= 1;
}
static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@
chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
/* Special case for older graphic cores. */
- if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
- >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) == 0x01) {
- gpu->identity.model = 0x500; /* gc500 */
- gpu->identity.revision =
- (chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
- >> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+ if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+ gpu->identity.model = chipModel_GC500;
+ gpu->identity.revision = etnaviv_field(chipIdentity,
+ VIVS_HI_CHIP_IDENTITY_REVISION);
} else {
gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@
* same. Only for GC400 family.
*/
if ((gpu->identity.model & 0xff00) == 0x0400 &&
- gpu->identity.model != 0x0420) {
+ gpu->identity.model != chipModel_GC420) {
gpu->identity.model = gpu->identity.model & 0x0400;
}
/* Another special case */
- if (gpu->identity.model == 0x300 &&
- gpu->identity.revision == 0x2201) {
+ if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
@@ -295,11 +337,13 @@
gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
/* Disable fast clear on GC700. */
- if (gpu->identity.model == 0x700)
+ if (gpu->identity.model == chipModel_GC700)
gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
- if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
- (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+ if ((gpu->identity.model == chipModel_GC500 &&
+ gpu->identity.revision < 2) ||
+ (gpu->identity.model == chipModel_GC300 &&
+ gpu->identity.revision < 0x2000)) {
/*
* GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@
gpu->identity.minor_features1 = 0;
gpu->identity.minor_features2 = 0;
gpu->identity.minor_features3 = 0;
+ gpu->identity.minor_features4 = 0;
+ gpu->identity.minor_features5 = 0;
} else
gpu->identity.minor_features0 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
gpu->identity.minor_features3 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+ gpu->identity.minor_features4 =
+ gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+ gpu->identity.minor_features5 =
+ gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
}
/* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@
{
u16 prefetch;
- if (gpu->identity.model == chipModel_GC320 &&
- gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
- (gpu->identity.revision == 0x5007 ||
- gpu->identity.revision == 0x5220)) {
+ if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+ etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+ gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
u32 mc_memory_debug;
mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@
VIVS_HI_AXI_CONFIG_ARCACHE(2));
/* GC2000 rev 5108 needs a special bus config */
- if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+ if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@
if (gpu->identity.model == 0) {
dev_err(gpu->dev, "Unknown GPU model\n");
- pm_runtime_put_autosuspend(gpu->dev);
- return -ENXIO;
+ ret = -ENXIO;
+ goto fail;
+ }
+
+ /* Exclude VG cores with FE2.0 */
+ if (gpu->identity.features & chipFeatures_PIPE_VG &&
+ gpu->identity.features & chipFeatures_FE20) {
+ dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+ ret = -ENXIO;
+ goto fail;
}
ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@
goto fail;
}
- /* TODO: we will leak here memory - fix it! */
-
gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
if (!gpu->mmu) {
+ iommu_domain_free(iommu);
ret = -ENOMEM;
goto fail;
}
@@ -552,7 +608,7 @@
if (!gpu->buffer) {
ret = -ENOMEM;
dev_err(gpu->dev, "could not create command buffer\n");
- goto fail;
+ goto destroy_iommu;
}
if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
ret = -EINVAL;
@@ -582,6 +638,9 @@
free_buffer:
etnaviv_gpu_cmdbuf_free(gpu->buffer);
gpu->buffer = NULL;
+destroy_iommu:
+ etnaviv_iommu_destroy(gpu->mmu);
+ gpu->mmu = NULL;
fail:
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@
gpu->identity.minor_features2);
seq_printf(m, "\t minor_features3: 0x%08x\n",
gpu->identity.minor_features3);
+ seq_printf(m, "\t minor_features4: 0x%08x\n",
+ gpu->identity.minor_features4);
+ seq_printf(m, "\t minor_features5: 0x%08x\n",
+ gpu->identity.minor_features5);
seq_puts(m, "\tspecs\n");
seq_printf(m, "\t stream_count: %d\n",
@@ -664,6 +727,8 @@
gpu->identity.instruction_count);
seq_printf(m, "\t num_constants: %d\n",
gpu->identity.num_constants);
+ seq_printf(m, "\t varyings_count: %d\n",
+ gpu->identity.varyings_count);
seq_printf(m, "\taxi: 0x%08x\n", axi);
seq_printf(m, "\tidle: 0x%08x\n", idle);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index c75d503..f233ac4 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -46,6 +46,12 @@
/* Supported minor feature 3 fields. */
u32 minor_features3;
+ /* Supported minor feature 4 fields. */
+ u32 minor_features4;
+
+ /* Supported minor feature 5 fields. */
+ u32 minor_features5;
+
/* Number of streams supported. */
u32 stream_count;
@@ -75,6 +81,9 @@
/* Buffer size */
u32 buffer_size;
+
+ /* Number of varyings */
+ u8 varyings_count;
};
struct etnaviv_event {
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 0064f26..6a7de5f 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -8,8 +8,8 @@
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state_hi.xml ( 23420 bytes, from 2015-03-25 11:47:21)
-- common.xml ( 18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml ( 24309 bytes, from 2015-12-12 09:02:53)
+- common.xml ( 18437 bytes, from 2015-12-12 09:02:53)
Copyright (C) 2015
*/
@@ -182,8 +182,25 @@
#define VIVS_HI_CHIP_MINOR_FEATURE_3 0x00000088
+#define VIVS_HI_CHIP_SPECS_3 0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK 0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT 4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK 0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT 0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
#define VIVS_HI_CHIP_MINOR_FEATURE_4 0x00000094
+#define VIVS_HI_CHIP_SPECS_4 0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK 0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT 12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5 0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID 0x000000a8
+
#define VIVS_PM 0x00000000
#define VIVS_PM_POWER_CONTROLS 0x00000100
@@ -206,6 +223,11 @@
#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE 0x00000001
#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE 0x00000002
#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE 0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH 0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA 0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE 0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA 0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX 0x00000080
#define VIVS_PM_PULSE_EATER 0x0000010c
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 6bfc463..367a916 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -304,18 +304,10 @@
unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
DENTIST_DPREFCLK_WDIVIDER_MASK) >>
DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
- if (div < 128 && div >= 96)
- div -= 64;
- else if (div >= 64)
- div = div / 2 - 16;
- else if (div >= 8)
- div /= 4;
- else
- div = 0;
+ div = radeon_audio_decode_dfs_div(div);
if (div)
- clock = rdev->clock.gpupll_outputfreq * 10 / div;
+ clock = clock * 100 / div;
WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 9953356..3cf04a2 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -289,6 +289,16 @@
* number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
* is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
*/
+ if (ASIC_IS_DCE41(rdev)) {
+ unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+ DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+ DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+ div = radeon_audio_decode_dfs_div(div);
+
+ if (div)
+ clock = 100 * clock / div;
+ }
+
WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
}
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 4aa5f75..13b6029 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -511,6 +511,11 @@
#define DCCG_AUDIO_DTO1_CNTL 0x05cc
# define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
+#define DCE41_DENTIST_DISPCLK_CNTL 0x049c
+# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24)
+# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24)
+# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24
+
/* DCE 4.0 AFMT */
#define HDMI_CONTROL 0x7030
# define HDMI_KEEPOUT_MODE (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5ae6db9..78a51b3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -268,7 +268,7 @@
uint32_t current_dispclk;
uint32_t dp_extclk;
uint32_t max_pixel_clock;
- uint32_t gpupll_outputfreq;
+ uint32_t vco_freq;
};
/*
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 08fc1b5..de9a2ff 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1106,6 +1106,31 @@
ATOM_FIRMWARE_INFO_V2_2 info_22;
};
+union igp_info {
+ struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+ struct radeon_mode_info *mode_info = &rdev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+ union igp_info *igp_info;
+ u8 frev, crev;
+ u16 data_offset;
+
+ if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ igp_info = (union igp_info *)(mode_info->atom_context->bios +
+ data_offset);
+ rdev->clock.vco_freq =
+ le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+ }
+}
+
bool radeon_atom_get_clock_info(struct drm_device *dev)
{
struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@
rdev->mode_info.firmware_flags =
le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
- if (ASIC_IS_DCE8(rdev)) {
- rdev->clock.gpupll_outputfreq =
+ if (ASIC_IS_DCE8(rdev))
+ rdev->clock.vco_freq =
le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
- if (rdev->clock.gpupll_outputfreq == 0)
- rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */
- }
+ else if (ASIC_IS_DCE5(rdev))
+ rdev->clock.vco_freq = rdev->clock.current_dispclk;
+ else if (ASIC_IS_DCE41(rdev))
+ radeon_atombios_get_dentist_vco_freq(rdev);
+ else
+ rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+ if (rdev->clock.vco_freq == 0)
+ rdev->clock.vco_freq = 360000; /* 3.6 GHz */
return true;
}
@@ -1270,14 +1301,6 @@
return false;
}
-union igp_info {
- struct _ATOM_INTEGRATED_SYSTEM_INFO info;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
bool radeon_atombios_sideport_present(struct radeon_device *rdev)
{
struct radeon_mode_info *mode_info = &rdev->mode_info;
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 2c02e99..b214663 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -739,9 +739,6 @@
struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
- struct radeon_connector *radeon_connector = to_radeon_connector(connector);
- struct radeon_connector_atom_dig *dig_connector =
- radeon_connector->con_priv;
if (!dig || !dig->afmt)
return;
@@ -753,10 +750,7 @@
radeon_audio_write_speaker_allocation(encoder);
radeon_audio_write_sad_regs(encoder);
radeon_audio_write_latency_fields(encoder, mode);
- if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
- radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
- else
- radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+ radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
radeon_audio_set_audio_packet(encoder);
radeon_audio_select_pin(encoder);
@@ -781,3 +775,15 @@
if (radeon_encoder->audio && radeon_encoder->audio->dpms)
radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
}
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+ if (div >= 8 && div < 64)
+ return (div - 8) * 25 + 200;
+ else if (div >= 64 && div < 96)
+ return (div - 64) * 50 + 1600;
+ else if (div >= 96 && div < 128)
+ return (div - 96) * 100 + 3200;
+ else
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 059cc30..5c70cce 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -79,5 +79,6 @@
void radeon_audio_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode);
void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index b3bb923..298ea1c 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1670,8 +1670,10 @@
/* setup afmt */
radeon_afmt_init(rdev);
- radeon_fbdev_init(rdev);
- drm_kms_helper_poll_init(rdev->ddev);
+ if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+ radeon_fbdev_init(rdev);
+ drm_kms_helper_poll_init(rdev->ddev);
+ }
/* do pm late init */
ret = radeon_pm_late_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 3dcc573..e26c963f 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -663,6 +663,7 @@
bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
if (!bo_va) {
args->operation = RADEON_VA_RESULT_ERROR;
+ radeon_bo_unreserve(rbo);
drm_gem_object_unreference_unlocked(gobj);
return -ENOENT;
}
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index 07a0d37..a01efe3 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -178,12 +178,12 @@
return -EINVAL;
}
- for (i = 0; i < sign->num; ++i) {
- if (sign->val[i].chip_id == chip_id)
+ for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+ if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
break;
}
- if (i == sign->num)
+ if (i == le32_to_cpu(sign->num))
return -EINVAL;
data += (256 - 64) / 4;
@@ -191,18 +191,18 @@
data[1] = sign->val[i].nonce[1];
data[2] = sign->val[i].nonce[2];
data[3] = sign->val[i].nonce[3];
- data[4] = sign->len + 64;
+ data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
memset(&data[5], 0, 44);
memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
- data += data[4] / 4;
+ data += le32_to_cpu(data[4]) / 4;
data[0] = sign->val[i].sigval[0];
data[1] = sign->val[i].sigval[1];
data[2] = sign->val[i].sigval[2];
data[3] = sign->val[i].sigval[3];
- rdev->vce.keyselect = sign->val[i].keyselect;
+ rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
return 0;
}
diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index d1dc0f7..f6a809a 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -2,11 +2,11 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
- rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+ rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
- rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
index 7bfe243..f8f8f29 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
@@ -461,10 +461,11 @@
static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
{
- unsigned int bpp, i, pre;
+ unsigned int i, pre;
unsigned long mpclk, pllref, tmp;
unsigned int m = 1, n = 1, target_mbps = 1000;
unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+ int bpp;
bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
if (bpp < 0) {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 8397d1b..a0d51cc 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -55,14 +55,12 @@
return arm_iommu_attach_device(dev, mapping);
}
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
struct device *dev)
{
arm_iommu_detach_device(dev);
}
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@
return 0;
}
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
{
@@ -89,7 +86,6 @@
priv->crtc_funcs[pipe] = NULL;
}
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
int pipe)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index f784488..3b8f652 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -39,7 +39,6 @@
return rk_fb->obj[plane];
}
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
{
@@ -177,8 +176,23 @@
crtc_funcs->wait_for_update(crtc);
}
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ * | <-- HW vsync irq and reg take effect
+ * plane_commit --> |
+ * get_vblank and wait --> |
+ * | <-- handle_vblank, vblank->count + 1
+ * cleanup_fb --> |
+ * iommu crash --> |
+ * | <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
{
struct drm_crtc_state *old_crtc_state;
struct drm_crtc *crtc;
@@ -194,6 +208,10 @@
if (!crtc->state->active)
continue;
+ if (!drm_atomic_helper_framebuffer_changed(dev,
+ old_state, crtc))
+ continue;
+
ret = drm_crtc_vblank_get(crtc);
if (ret != 0)
continue;
@@ -241,7 +259,7 @@
drm_atomic_helper_commit_planes(dev, state, true);
- rockchip_atomic_wait_for_complete(state);
+ rockchip_atomic_wait_for_complete(dev, state);
drm_atomic_helper_cleanup_planes(dev, state);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
index 50432e9..73718c5 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
@@ -15,7 +15,18 @@
#ifndef _ROCKCHIP_DRM_FBDEV_H
#define _ROCKCHIP_DRM_FBDEV_H
+#ifdef CONFIG_DRM_FBDEV_EMULATION
int rockchip_drm_fbdev_init(struct drm_device *dev);
void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+ return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
#endif /* _ROCKCHIP_DRM_FBDEV_H */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index d908321..18e0733 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -234,13 +234,8 @@
/*
* align to 64 bytes since Mali requires it.
*/
- min_pitch = ALIGN(min_pitch, 64);
-
- if (args->pitch < min_pitch)
- args->pitch = min_pitch;
-
- if (args->size < args->pitch * args->height)
- args->size = args->pitch * args->height;
+ args->pitch = ALIGN(min_pitch, 64);
+ args->size = args->pitch * args->height;
rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
&args->handle);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 46c2a8d..fd37054 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -43,8 +43,8 @@
#define REG_SET(x, base, reg, v, mode) \
__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
- __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+ __REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
#define VOP_WIN_SET(x, win, name, v) \
REG_SET(x, win->base, win->phy->name, v, RELAXED)
@@ -58,16 +58,18 @@
#define VOP_INTR_GET(vop, name) \
vop_read_reg(vop, 0, &vop->data->ctrl->name)
-#define VOP_INTR_SET(vop, name, v) \
- REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+ REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
#define VOP_INTR_SET_TYPE(vop, name, type, v) \
do { \
- int i, reg = 0; \
+ int i, reg = 0, mask = 0; \
for (i = 0; i < vop->data->intr->nintrs; i++) { \
- if (vop->data->intr->intrs[i] & type) \
+ if (vop->data->intr->intrs[i] & type) { \
reg |= (v) << i; \
+ mask |= 1 << i; \
+ } \
} \
- VOP_INTR_SET(vop, name, reg); \
+ VOP_INTR_SET(vop, name, mask, reg); \
} while (0)
#define VOP_INTR_GET_TYPE(vop, name, type) \
vop_get_intr_type(vop, &vop->data->intr->name, type)
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 424d515..314ff71 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -144,19 +144,16 @@
}
#endif /* CONFIG_DEBUG_FS */
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
int
vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
{
- if (on)
- return pm_generic_poweroff(&vc4->v3d->pdev->dev);
- else
- return pm_generic_resume(&vc4->v3d->pdev->dev);
+ /* XXX: This interface is needed for GPU reset, and the way to
+ * do it is to turn our power domain off and back on. We
+ * can't just reset from within the driver, because the reset
+ * bits are in the power domain's register area, and get set
+ * during the poweron process.
+ */
+ return 0;
}
static void vc4_v3d_init_hw(struct drm_device *dev)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c49812b..24fb348 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
#include <linux/module.h>
+#include <linux/console.h>
#include <drm/drmP.h>
#include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@
static int __init vmwgfx_init(void)
{
int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+ if (vgacon_text_force())
+ return -EINVAL;
+#endif
+
ret = drm_pci_init(&driver, &vmw_pci_driver);
if (ret)
DRM_ERROR("Failed initializing DRM.\n");
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c848789..c43318d 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -932,6 +932,17 @@
static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
{
/*
+ * CPU fan speed going up and down on Dell Studio XPS 8000
+ * for unknown reasons.
+ */
+ .ident = "Dell Studio XPS 8000",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"),
+ },
+ },
+ {
+ /*
* CPU fan speed going up and down on Dell Studio XPS 8100
* for unknown reasons.
*/
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index f77eb971..4f695d8 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -90,7 +90,15 @@
pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5),
REG_TDP_LIMIT3, &val);
- tdp_limit = val >> 16;
+ /*
+ * On Carrizo and later platforms, ApmTdpLimit bit field
+ * is extended to 16:31 from 16:28.
+ */
+ if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+ tdp_limit = val >> 16;
+ else
+ tdp_limit = (val >> 16) & 0x1fff;
+
curr_pwr_watts = ((u64)(tdp_limit +
data->base_tdp)) << running_avg_range;
curr_pwr_watts -= running_avg_capture;
diff --git a/drivers/scsi/hisi_sas/Kconfig b/drivers/scsi/hisi_sas/Kconfig
index 37a0c71..b676618 100644
--- a/drivers/scsi/hisi_sas/Kconfig
+++ b/drivers/scsi/hisi_sas/Kconfig
@@ -1,5 +1,7 @@
config SCSI_HISI_SAS
tristate "HiSilicon SAS"
+ depends on HAS_DMA
+ depends on ARM64 || COMPILE_TEST
select SCSI_SAS_LIBSAS
select BLK_DEV_INTEGRITY
help
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 4e08d1cd..bb669d3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2893,7 +2893,7 @@
sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
rw_max = q->limits.io_opt =
- logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+ sdkp->opt_xfer_blocks * sdp->sector_size;
else
rw_max = BLK_DEF_MAX_SECTORS;
@@ -3268,8 +3268,8 @@
struct scsi_disk *sdkp = dev_get_drvdata(dev);
int ret = 0;
- if (!sdkp)
- return 0; /* this can happen */
+ if (!sdkp) /* E.g.: runtime suspend following sd_remove() */
+ return 0;
if (sdkp->WCE && sdkp->media_present) {
sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
@@ -3308,6 +3308,9 @@
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
+ if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */
+ return 0;
+
if (!sdkp->device->manage_start_stop)
return 0;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 8bd54a6..64c8674 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -144,6 +144,9 @@
{
struct scsi_cd *cd = dev_get_drvdata(dev);
+ if (!cd) /* E.g.: runtime suspend following sr_remove() */
+ return 0;
+
if (cd->media_present)
return -EBUSY;
else
@@ -985,6 +988,7 @@
scsi_autopm_get_device(cd->device);
del_gendisk(cd->disk);
+ dev_set_drvdata(dev, NULL);
mutex_lock(&sr_ref_mutex);
kref_put(&cd->kref, sr_kref_release);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 36205c2..f6bed86c 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -545,6 +545,7 @@
static void virtio_pci_remove(struct pci_dev *pci_dev)
{
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+ struct device *dev = get_device(&vp_dev->vdev.dev);
unregister_virtio_device(&vp_dev->vdev);
@@ -554,6 +555,7 @@
virtio_pci_modern_remove(vp_dev);
pci_disable_device(pci_dev);
+ put_device(dev);
}
static struct pci_driver virtio_pci_driver = {
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 89d008d..fe5efada 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -42,6 +42,10 @@
struct drm_atomic_state *state,
bool async);
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+ struct drm_atomic_state *old_state,
+ struct drm_crtc *crtc);
+
void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
struct drm_atomic_state *old_state);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0639dcc..81de712 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,7 +165,6 @@
ftrace_func_t saved_func;
int __percpu *disabled;
#ifdef CONFIG_DYNAMIC_FTRACE
- int nr_trampolines;
struct ftrace_ops_hash local_hash;
struct ftrace_ops_hash *func_hash;
struct ftrace_ops_hash old_hash;
diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
index 98feb1b..d6dfa05 100644
--- a/include/trace/events/fence.h
+++ b/include/trace/events/fence.h
@@ -17,7 +17,7 @@
TP_STRUCT__entry(
__string(driver, fence->ops->get_driver_name(fence))
- __string(timeline, fence->ops->get_driver_name(fence))
+ __string(timeline, fence->ops->get_timeline_name(fence))
__field(unsigned int, context)
__field(unsigned int, seqno)
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 4cc989a..f95e1c4 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -48,6 +48,8 @@
#define ETNAVIV_PARAM_GPU_FEATURES_2 0x05
#define ETNAVIV_PARAM_GPU_FEATURES_3 0x06
#define ETNAVIV_PARAM_GPU_FEATURES_4 0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5 0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6 0x09
#define ETNAVIV_PARAM_GPU_STREAM_COUNT 0x10
#define ETNAVIV_PARAM_GPU_REGISTER_MAX 0x11
@@ -59,6 +61,7 @@
#define ETNAVIV_PARAM_GPU_BUFFER_SIZE 0x17
#define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT 0x18
#define ETNAVIV_PARAM_GPU_NUM_CONSTANTS 0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS 0x1a
#define ETNA_MAX_PIPES 4
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 580ac2d..15a1795 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -316,24 +316,24 @@
put_seccomp_filter(thread);
smp_store_release(&thread->seccomp.filter,
caller->seccomp.filter);
+
+ /*
+ * Don't let an unprivileged task work around
+ * the no_new_privs restriction by creating
+ * a thread that sets it up, enters seccomp,
+ * then dies.
+ */
+ if (task_no_new_privs(caller))
+ task_set_no_new_privs(thread);
+
/*
* Opt the other thread into seccomp if needed.
* As threads are considered to be trust-realm
* equivalent (see ptrace_may_access), it is safe to
* allow one thread to transition the other.
*/
- if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
- /*
- * Don't let an unprivileged task work around
- * the no_new_privs restriction by creating
- * a thread that sets it up, enters seccomp,
- * then dies.
- */
- if (task_no_new_privs(caller))
- task_set_no_new_privs(thread);
-
+ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
- }
}
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87fb980..d929340 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1751,7 +1751,7 @@
{
__buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+ ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
diff --git a/security/keys/key.c b/security/keys/key.c
index 07a8731..09ef276 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -430,7 +430,8 @@
/* and link it into the destination keyring */
if (keyring) {
- set_bit(KEY_FLAG_KEEP, &key->flags);
+ if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+ set_bit(KEY_FLAG_KEEP, &key->flags);
__key_link(key, _edit);
}
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index e3e9491..a2a1e24 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -97,11 +97,11 @@
bool "PCM timer interface" if EXPERT
default y
help
- If you disable this option, pcm timer will be inavailable, so
- those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+ If you disable this option, pcm timer will be unavailable, so
+ those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
incorrectlly.
- For some embedded device, we may disable it to reduce memory
+ For some embedded devices, we may disable it to reduce memory
footprint, about 20KB on x86_64 platform.
config SND_SEQUENCER_OSS
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 18b8dc4..7fac3cae 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -46,6 +46,13 @@
#include <sound/compress_offload.h>
#include <sound/compress_driver.h>
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
/* TODO:
* - add substream support for multiple devices in case of
* SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@
return retval;
}
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
static int
snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
{
@@ -463,6 +471,7 @@
kfree(caps);
return retval;
}
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
/* revisit this with snd_pcm_preallocate_xxx */
static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@
case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
retval = snd_compr_get_caps(stream, arg);
break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
retval = snd_compr_get_codec_caps(stream, arg);
break;
+#endif
case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
retval = snd_compr_set_params(stream, arg);
break;
diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index b1221b2..6779e82b 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c
@@ -202,7 +202,7 @@
dp->index = i;
if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
- pr_err("ALSA: seq_oss: too many applications\n");
+ pr_debug("ALSA: seq_oss: too many applications\n");
rc = -ENOMEM;
goto _error;
}
diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c
index 0f3b381..b16dbef 100644
--- a/sound/core/seq/oss/seq_oss_synth.c
+++ b/sound/core/seq/oss/seq_oss_synth.c
@@ -308,7 +308,7 @@
struct seq_oss_synth *rec;
struct seq_oss_synthinfo *info;
- if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+ if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
return;
for (i = 0; i < dp->max_synthdev; i++) {
info = &dp->synths[i];
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index 75b7485..bde3330 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -87,7 +87,7 @@
module_param(fake_buffer, bool, 0444);
MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations.");
#ifdef CONFIG_HIGH_RES_TIMERS
-module_param(hrtimer, bool, 0644);
+module_param(hrtimer, bool, 0444);
MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source.");
#endif
diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index 926e5dc..5022c9b 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c
@@ -47,14 +47,16 @@
[6] = 0x07,
};
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
- if (snd_bebob_rate_table[i] == rate)
- return i;
+ if (snd_bebob_rate_table[i] == rate) {
+ *index = i;
+ return 0;
+ }
}
return -EINVAL;
}
@@ -425,7 +427,9 @@
goto end;
/* confirm params for both streams */
- index = get_formation_index(rate);
+ err = get_formation_index(rate, &index);
+ if (err < 0)
+ goto end;
pcm_channels = bebob->tx_stream_formations[index].pcm;
midi_channels = bebob->tx_stream_formations[index].midi;
err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index 0216475..37adcc6 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -3,6 +3,7 @@
config SND_WSS_LIB
tristate
select SND_PCM
+ select SND_TIMER
config SND_SB_COMMON
tristate
@@ -42,6 +43,7 @@
select SND_OPL3_LIB
select SND_MPU401_UART
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for Analog Devices SoundPort
AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@
tristate "Gravis UltraSound Classic"
select SND_RAWMIDI
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for Gravis UltraSound Classic
soundcards.
@@ -221,6 +224,7 @@
select SND_OPL3_LIB
select SND_MPU401_UART
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for Gravis UltraSound Extreme
soundcards.
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 656ce39..8f6594a 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -155,6 +155,7 @@
select SND_PCM
select SND_RAWMIDI
select SND_AC97_CODEC
+ select SND_TIMER
depends on ZONE_DMA
help
Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@
select SND_HWDEP
select SND_RAWMIDI
select SND_AC97_CODEC
+ select SND_TIMER
depends on ZONE_DMA
help
Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@
select SND_OPL3_LIB
select SND_MPU401_UART
select SND_AC97_CODEC
+ select SND_TIMER
help
Say Y here to include support for Yamaha PCI audio chips -
YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 256e6cd..4045dca 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -90,6 +90,8 @@
#define NVIDIA_HDA_ENABLE_COHBIT 0x01
/* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL 0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE (0x1 << 6)
#define INTEL_SCH_HDA_DEVC 0x78
#define INTEL_SCH_HDA_DEVC_NOSNOOP (0x1<<11)
@@ -534,10 +536,21 @@
{
struct hdac_bus *bus = azx_bus(chip);
struct pci_dev *pci = chip->pci;
+ u32 val;
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
snd_hdac_set_codec_wakeup(bus, true);
+ if (IS_BROXTON(pci)) {
+ pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+ val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+ pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+ }
azx_init_chip(chip, full_reset);
+ if (IS_BROXTON(pci)) {
+ pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+ val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+ pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+ }
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
snd_hdac_set_codec_wakeup(bus, false);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 426a29a1..1f52b55 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -3653,6 +3653,7 @@
HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch),
HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi),
HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP", patch_via_hdmi),
diff --git a/sound/sparc/Kconfig b/sound/sparc/Kconfig
index d75deba..dfcd386 100644
--- a/sound/sparc/Kconfig
+++ b/sound/sparc/Kconfig
@@ -22,6 +22,7 @@
config SND_SUN_CS4231
tristate "Sun CS4231"
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for CS4231 sound device on Sun.
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 23ea6d8..a75d9ce 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1205,8 +1205,12 @@
* "Playback Design" products need a 50ms delay after setting the
* USB interface.
*/
- if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+ switch (le16_to_cpu(dev->descriptor.idVendor)) {
+ case 0x23ba: /* Playback Design */
+ case 0x0644: /* TEAC Corp. */
mdelay(50);
+ break;
+ }
}
void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1225,14 @@
(requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
mdelay(20);
+ /*
+ * "TEAC Corp." products need a 20ms delay after each
+ * class compliant request
+ */
+ if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+ (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+ mdelay(20);
+
/* Marantz/Denon devices with USB DAC functionality need a delay
* after each class compliant request
*/
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926..ba34f9e 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
#if defined(__i386__) || defined(__x86_64__)
#define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb() mb()
-# define dma_rmb() barrier()
-# define dma_wmb() barrier()
-# define smp_rmb() barrier()
-# define smp_wmb() barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value) do { \
+ typeof(var) virt_store_mb_value = (value); \
+ __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+ __ATOMIC_SEQ_CST); \
+ barrier(); \
+} while (0);
/* Weak barriers should be used. If not - it's a bug */
-# define rmb() abort()
-# define wmb() abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
#else
#error Please fill in barrier macros
#endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 0000000..845960e
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+ (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d56..0338499 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
#include <assert.h>
#include <stdarg.h>
+#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 0000000..feaa64a
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+ -rm main.o
+ -rm ring.o ring
+ -rm virtio_ring_0_9.o virtio_ring_0_9
+ -rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 0000000..34e94c4
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 0000000..3a5ff43
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+ unsigned long long v = 1;
+ int r;
+
+ vmexit();
+ r = write(fd, &v, sizeof v);
+ assert(r == sizeof v);
+ vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+ unsigned long long v = 1;
+ int r;
+
+ vmexit();
+ r = read(fd, &v, sizeof v);
+ assert(r == sizeof v);
+ vmentry();
+}
+
+void kick(void)
+{
+ notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+ wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+ notify(callfd);
+}
+
+void wait_for_call(void)
+{
+ wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+ cpu_set_t cpuset;
+ int ret;
+ pthread_t self;
+ long int cpu;
+ char *endptr;
+
+ if (!arg)
+ return;
+
+ cpu = strtol(arg, &endptr, 0);
+ assert(!*endptr);
+
+ assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+ self = pthread_self();
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+ assert(!ret);
+}
+
+static void run_guest(void)
+{
+ int completed_before;
+ int completed = 0;
+ int started = 0;
+ int bufs = runcycles;
+ int spurious = 0;
+ int r;
+ unsigned len;
+ void *buf;
+ int tokick = batch;
+
+ for (;;) {
+ if (do_sleep)
+ disable_call();
+ completed_before = completed;
+ do {
+ if (started < bufs &&
+ started - completed < max_outstanding) {
+ r = add_inbuf(0, NULL, "Hello, world!");
+ if (__builtin_expect(r == 0, true)) {
+ ++started;
+ if (!--tokick) {
+ tokick = batch;
+ if (do_sleep)
+ kick_available();
+ }
+
+ }
+ } else
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ if (get_buf(&len, &buf)) {
+ ++completed;
+ if (__builtin_expect(completed == bufs, false))
+ return;
+ r = 0;
+ }
+ } while (r == 0);
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ assert(started <= bufs);
+ if (do_sleep) {
+ if (enable_call())
+ wait_for_call();
+ } else {
+ poll_used();
+ }
+ }
+}
+
+static void run_host(void)
+{
+ int completed_before;
+ int completed = 0;
+ int spurious = 0;
+ int bufs = runcycles;
+ unsigned len;
+ void *buf;
+
+ for (;;) {
+ if (do_sleep) {
+ if (enable_kick())
+ wait_for_kick();
+ } else {
+ poll_avail();
+ }
+ if (do_sleep)
+ disable_kick();
+ completed_before = completed;
+ while (__builtin_expect(use_buf(&len, &buf), true)) {
+ if (do_sleep)
+ call_used();
+ ++completed;
+ if (__builtin_expect(completed == bufs, false))
+ return;
+ }
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ if (completed == bufs)
+ break;
+ }
+}
+
+void *start_guest(void *arg)
+{
+ set_affinity(arg);
+ run_guest();
+ pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+ set_affinity(arg);
+ run_host();
+ pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .val = 'h',
+ },
+ {
+ .name = "host-affinity",
+ .has_arg = required_argument,
+ .val = 'H',
+ },
+ {
+ .name = "guest-affinity",
+ .has_arg = required_argument,
+ .val = 'G',
+ },
+ {
+ .name = "ring-size",
+ .has_arg = required_argument,
+ .val = 'R',
+ },
+ {
+ .name = "run-cycles",
+ .has_arg = required_argument,
+ .val = 'C',
+ },
+ {
+ .name = "outstanding",
+ .has_arg = required_argument,
+ .val = 'o',
+ },
+ {
+ .name = "batch",
+ .has_arg = required_argument,
+ .val = 'b',
+ },
+ {
+ .name = "sleep",
+ .has_arg = no_argument,
+ .val = 's',
+ },
+ {
+ .name = "relax",
+ .has_arg = no_argument,
+ .val = 'x',
+ },
+ {
+ .name = "exit",
+ .has_arg = no_argument,
+ .val = 'e',
+ },
+ {
+ }
+};
+
+static void help(void)
+{
+ fprintf(stderr, "Usage: <test> [--help]"
+ " [--host-affinity H]"
+ " [--guest-affinity G]"
+ " [--ring-size R (default: %d)]"
+ " [--run-cycles C (default: %d)]"
+ " [--batch b]"
+ " [--outstanding o]"
+ " [--sleep]"
+ " [--relax]"
+ " [--exit]"
+ "\n",
+ ring_size,
+ runcycles);
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+ pthread_t host, guest;
+ void *tret;
+ char *host_arg = NULL;
+ char *guest_arg = NULL;
+ char *endptr;
+ long int c;
+
+ kickfd = eventfd(0, 0);
+ assert(kickfd >= 0);
+ callfd = eventfd(0, 0);
+ assert(callfd >= 0);
+
+ for (;;) {
+ int o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help();
+ exit(2);
+ case 'H':
+ host_arg = optarg;
+ break;
+ case 'G':
+ guest_arg = optarg;
+ break;
+ case 'R':
+ ring_size = strtol(optarg, &endptr, 0);
+ assert(ring_size && !(ring_size & (ring_size - 1)));
+ assert(!*endptr);
+ break;
+ case 'C':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ runcycles = c;
+ break;
+ case 'o':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ max_outstanding = c;
+ break;
+ case 'b':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ batch = c;
+ break;
+ case 's':
+ do_sleep = true;
+ break;
+ case 'x':
+ do_relax = true;
+ break;
+ case 'e':
+ do_exit = true;
+ break;
+ default:
+ help();
+ exit(4);
+ break;
+ }
+ }
+
+ /* does nothing here, used to make sure all smp APIs compile */
+ smp_acquire();
+ smp_release();
+ smp_mb();
+done:
+
+ if (batch > max_outstanding)
+ batch = max_outstanding;
+
+ if (optind < argc) {
+ help();
+ exit(4);
+ }
+ alloc_ring();
+
+ ret = pthread_create(&host, NULL, start_host, host_arg);
+ assert(!ret);
+ ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+ assert(!ret);
+
+ ret = pthread_join(guest, &tret);
+ assert(!ret);
+ ret = pthread_join(host, &tret);
+ assert(!ret);
+ return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 0000000..16917ac
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+ unsigned long long t;
+
+ t = __rdtsc();
+ while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+ _Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+ if (!do_exit)
+ return;
+
+ wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+ if (!do_exit)
+ return;
+
+ wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+ if (do_relax)
+ cpu_relax();
+ else
+ /* prevent compiler from removing busy loops */
+ barrier();
+}
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+ barrier(); \
+ __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+ __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+ barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 0000000..c25c8d2
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+ unsigned short next,
+ unsigned short prev)
+{
+ return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+ unsigned short flags;
+ unsigned short index;
+ unsigned len;
+ unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+ unsigned short kick_index;
+ unsigned char reserved0[HOST_GUEST_PADDING - 2];
+ unsigned short call_index;
+ unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+ void *buf; /* descriptor is writeable, we can't get buf from there */
+ void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+ unsigned avail_idx;
+ unsigned last_used_idx;
+ unsigned num_free;
+ unsigned kicked_avail_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+ /* we do not need to track last avail index
+ * unless we have more than one in flight.
+ */
+ unsigned used_idx;
+ unsigned called_used_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret;
+ int i;
+
+ ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+ if (ret) {
+ perror("Unable to allocate ring buffer.\n");
+ exit(3);
+ }
+ event = malloc(sizeof *event);
+ if (!event) {
+ perror("Unable to allocate event buffer.\n");
+ exit(3);
+ }
+ memset(event, 0, sizeof *event);
+ guest.avail_idx = 0;
+ guest.kicked_avail_idx = -1;
+ guest.last_used_idx = 0;
+ host.used_idx = 0;
+ host.called_used_idx = -1;
+ for (i = 0; i < ring_size; ++i) {
+ struct desc desc = {
+ .index = i,
+ };
+ ring[i] = desc;
+ }
+ guest.num_free = ring_size;
+ data = malloc(ring_size * sizeof *data);
+ if (!data) {
+ perror("Unable to allocate data buffer.\n");
+ exit(3);
+ }
+ memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ unsigned head, index;
+
+ if (!guest.num_free)
+ return -1;
+
+ guest.num_free--;
+ head = (ring_size - 1) & (guest.avail_idx++);
+
+ /* Start with a write. On MESI architectures this helps
+ * avoid a shared state with consumer that is polling this descriptor.
+ */
+ ring[head].addr = (unsigned long)(void*)buf;
+ ring[head].len = len;
+ /* read below might bypass write above. That is OK because it's just an
+ * optimization. If this happens, we will get the cache line in a
+ * shared state which is unfortunate, but probably not worth it to
+ * add an explicit full barrier to avoid this.
+ */
+ barrier();
+ index = ring[head].index;
+ data[index].buf = buf;
+ data[index].data = datap;
+ /* Barrier A (for pairing) */
+ smp_release();
+ ring[head].flags = DESC_HW;
+
+ return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+ unsigned index;
+ void *datap;
+
+ if (ring[head].flags & DESC_HW)
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ *lenp = ring[head].len;
+ index = ring[head].index & (ring_size - 1);
+ datap = data[index].data;
+ *bufp = data[index].buf;
+ data[index].buf = NULL;
+ data[index].data = NULL;
+ guest.num_free++;
+ guest.last_used_idx++;
+ return datap;
+}
+
+void poll_used(void)
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+ while (ring[head].flags & DESC_HW)
+ busy_wait();
+}
+
+void disable_call()
+{
+ /* Doing nothing to disable calls might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_call()
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+ event->call_index = guest.last_used_idx;
+ /* Flush call index write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier C (for pairing) */
+ smp_mb();
+ if (!need_event(event->kick_index,
+ guest.avail_idx,
+ guest.kicked_avail_idx))
+ return;
+
+ guest.kicked_avail_idx = guest.avail_idx;
+ kick();
+}
+
+/* host side */
+void disable_kick()
+{
+ /* Doing nothing to disable kicks might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_kick()
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ event->kick_index = host.used_idx;
+ /* Barrier C (for pairing) */
+ smp_mb();
+ return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ while (!(ring[head].flags & DESC_HW))
+ busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ if (!(ring[head].flags & DESC_HW))
+ return false;
+
+ /* make sure length read below is not speculated */
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ /* simple in-order completion: we don't need
+ * to touch index at all. This also means we
+ * can just modify the descriptor in-place.
+ */
+ ring[head].len--;
+ /* Make sure len is valid before flags.
+ * Note: alternative is to write len and flags in one access -
+ * possible on 64 bit architectures but wmb is free on Intel anyway
+ * so I have no way to test whether it's a gain.
+ */
+ /* Barrier B (for pairing) */
+ smp_release();
+ ring[head].flags = 0;
+ host.used_idx++;
+ return true;
+}
+
+void call_used(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ if (!need_event(event->call_index,
+ host.used_idx,
+ host.called_used_idx))
+ return;
+
+ host.called_used_idx = host.used_idx;
+ call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 0000000..52b0f71
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+ #Don't run guest and host on same CPU
+ #It actually works ok if using signalling
+ if
+ (echo "$@" | grep -e "--sleep" > /dev/null) || \
+ test $HOST_AFFINITY '!=' $cpu
+ then
+ echo "GUEST AFFINITY $cpu"
+ "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+ fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 0000000..47c9a1a
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+ void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+ unsigned short avail_idx;
+ unsigned short last_used_idx;
+ unsigned short num_free;
+ unsigned short kicked_avail_idx;
+ unsigned short free_head;
+ unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+ /* we do not need to track last avail index
+ * unless we have more than one in flight.
+ */
+ unsigned short used_idx;
+ unsigned short called_used_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret;
+ int i;
+ void *p;
+
+ ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+ if (ret) {
+ perror("Unable to allocate ring buffer.\n");
+ exit(3);
+ }
+ memset(p, 0, vring_size(ring_size, 0x1000));
+ vring_init(&ring, ring_size, p, 0x1000);
+
+ guest.avail_idx = 0;
+ guest.kicked_avail_idx = -1;
+ guest.last_used_idx = 0;
+ /* Put everything in free lists. */
+ guest.free_head = 0;
+ for (i = 0; i < ring_size - 1; i++)
+ ring.desc[i].next = i + 1;
+ host.used_idx = 0;
+ host.called_used_idx = -1;
+ guest.num_free = ring_size;
+ data = malloc(ring_size * sizeof *data);
+ if (!data) {
+ perror("Unable to allocate data buffer.\n");
+ exit(3);
+ }
+ memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ unsigned head, avail;
+ struct vring_desc *desc;
+
+ if (!guest.num_free)
+ return -1;
+
+ head = guest.free_head;
+ guest.num_free--;
+
+ desc = ring.desc;
+ desc[head].flags = VRING_DESC_F_NEXT;
+ desc[head].addr = (unsigned long)(void *)buf;
+ desc[head].len = len;
+ /* We do it like this to simulate the way
+ * we'd have to flip it if we had multiple
+ * descriptors.
+ */
+ desc[head].flags &= ~VRING_DESC_F_NEXT;
+ guest.free_head = desc[head].next;
+
+ data[head].data = datap;
+
+#ifdef RING_POLL
+ /* Barrier A (for pairing) */
+ smp_release();
+ avail = guest.avail_idx++;
+ ring.avail->ring[avail & (ring_size - 1)] =
+ (head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+ avail = (ring_size - 1) & (guest.avail_idx++);
+ ring.avail->ring[avail] = head;
+ /* Barrier A (for pairing) */
+ smp_release();
+#endif
+ ring.avail->idx = guest.avail_idx;
+ return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head;
+ unsigned index;
+ void *datap;
+
+#ifdef RING_POLL
+ head = (ring_size - 1) & guest.last_used_idx;
+ index = ring.used->ring[head].id;
+ if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ index &= ring_size - 1;
+#else
+ if (ring.used->idx == guest.last_used_idx)
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ head = (ring_size - 1) & guest.last_used_idx;
+ index = ring.used->ring[head].id;
+#endif
+ *lenp = ring.used->ring[head].len;
+ datap = data[index].data;
+ *bufp = (void*)(unsigned long)ring.desc[index].addr;
+ data[index].data = NULL;
+ ring.desc[index].next = guest.free_head;
+ guest.free_head = index;
+ guest.num_free++;
+ guest.last_used_idx++;
+ return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+ for (;;) {
+ unsigned index = ring.used->ring[head].id;
+
+ if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+ busy_wait();
+ else
+ break;
+ }
+#else
+ unsigned head = guest.last_used_idx;
+
+ while (ring.used->idx == head)
+ busy_wait();
+#endif
+}
+
+void disable_call()
+{
+ /* Doing nothing to disable calls might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_call()
+{
+ unsigned short last_used_idx;
+
+ vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+ /* Flush call index write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+#ifdef RING_POLL
+ {
+ unsigned short head = last_used_idx & (ring_size - 1);
+ unsigned index = ring.used->ring[head].id;
+
+ return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+ }
+#else
+ return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier C (for pairing) */
+ smp_mb();
+ if (!vring_need_event(vring_avail_event(&ring),
+ guest.avail_idx,
+ guest.kicked_avail_idx))
+ return;
+
+ guest.kicked_avail_idx = guest.avail_idx;
+ kick();
+}
+
+/* host side */
+void disable_kick()
+{
+ /* Doing nothing to disable kicks might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_kick()
+{
+ unsigned head = host.used_idx;
+
+ vring_avail_event(&ring) = head;
+ /* Barrier C (for pairing) */
+ smp_mb();
+#ifdef RING_POLL
+ {
+ unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+ return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+ }
+#else
+ return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+ unsigned head = host.used_idx;
+#ifdef RING_POLL
+ for (;;) {
+ unsigned index = ring.avail->ring[head & (ring_size - 1)];
+ if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+ busy_wait();
+ else
+ break;
+ }
+#else
+ while (ring.avail->idx == head)
+ busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ unsigned used_idx = host.used_idx;
+ struct vring_desc *desc;
+ unsigned head;
+
+#ifdef RING_POLL
+ head = ring.avail->ring[used_idx & (ring_size - 1)];
+ if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+ return false;
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ used_idx &= ring_size - 1;
+ desc = &ring.desc[head & (ring_size - 1)];
+#else
+ if (used_idx == ring.avail->idx)
+ return false;
+
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ used_idx &= ring_size - 1;
+ head = ring.avail->ring[used_idx];
+ desc = &ring.desc[head];
+#endif
+
+ *lenp = desc->len;
+ *bufp = (void *)(unsigned long)desc->addr;
+
+ /* now update used ring */
+ ring.used->ring[used_idx].id = head;
+ ring.used->ring[used_idx].len = desc->len - 1;
+ /* Barrier B (for pairing) */
+ smp_release();
+ host.used_idx++;
+ ring.used->idx = host.used_idx;
+
+ return true;
+}
+
+void call_used(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ if (!vring_need_event(vring_used_event(&ring),
+ host.used_idx,
+ host.called_used_idx))
+ return;
+
+ host.called_used_idx = host.used_idx;
+ call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 0000000..84fc2c5
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"