Merge tag 'kvm-s390-20140325' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into kvm-next
3 fixes
- memory leak on certain SIGP conditions
- wrong size for idle bitmap (always too big)
- clear local interrupts on initial CPU reset
1 performance improvement
- improve performance with many guests on certain workloads
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4714f28..2cb1640 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -586,8 +586,8 @@
4.24 KVM_CREATE_IRQCHIP
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM, arm64
+Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
+Architectures: x86, ia64, ARM, arm64, s390
Type: vm ioctl
Parameters: none
Returns: 0 on success, -1 on error
@@ -596,7 +596,10 @@
ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
-created.
+created. On s390, a dummy irq routing table is created.
+
+Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
+before KVM_CREATE_IRQCHIP can be used.
4.25 KVM_IRQ_LINE
@@ -932,9 +935,9 @@
4.37 KVM_ENABLE_CAP
-Capability: KVM_CAP_ENABLE_CAP
+Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
Architectures: ppc, s390
-Type: vcpu ioctl
+Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
Parameters: struct kvm_enable_cap (in)
Returns: 0 on success; -1 on error
@@ -965,6 +968,8 @@
__u8 pad[64];
};
+The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl
+for vm-wide capabilities.
4.38 KVM_GET_MP_STATE
@@ -1334,7 +1339,7 @@
4.52 KVM_SET_GSI_ROUTING
Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64
+Architectures: x86 ia64 s390
Type: vm ioctl
Parameters: struct kvm_irq_routing (in)
Returns: 0 on success, -1 on error
@@ -1357,6 +1362,7 @@
union {
struct kvm_irq_routing_irqchip irqchip;
struct kvm_irq_routing_msi msi;
+ struct kvm_irq_routing_s390_adapter adapter;
__u32 pad[8];
} u;
};
@@ -1364,6 +1370,7 @@
/* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
No flags are specified so far, the corresponding field must be set to zero.
@@ -1379,6 +1386,14 @@
__u32 pad;
};
+struct kvm_irq_routing_s390_adapter {
+ __u64 ind_addr;
+ __u64 summary_addr;
+ __u64 ind_offset;
+ __u32 summary_offset;
+ __u32 adapter_id;
+};
+
4.53 KVM_ASSIGN_SET_MSIX_NR
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index 410fa67..4ceef53 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -12,6 +12,7 @@
- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
- enable/disable for the guest transparent async page faults
+- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
Groups:
KVM_DEV_FLIC_ENQUEUE
@@ -44,3 +45,47 @@
Disables async page faults for the guest and waits until already pending
async page faults are done. This is necessary to trigger a completion interrupt
for every init interrupt before migrating the interrupt list.
+
+ KVM_DEV_FLIC_ADAPTER_REGISTER
+ Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
+ describing the adapter to register:
+
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 pad;
+};
+
+ id contains the unique id for the adapter, isc the I/O interruption subclass
+ to use, maskable whether this adapter may be masked (interrupts turned off)
+ and swap whether the indicators need to be byte swapped.
+
+
+ KVM_DEV_FLIC_ADAPTER_MODIFY
+ Modifies attributes of an existing I/O adapter interrupt source. Takes
+ a kvm_s390_io_adapter_req specifiying the adapter and the operation:
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
+ id specifies the adapter and type the operation. The supported operations
+ are:
+
+ KVM_S390_IO_ADAPTER_MASK
+ mask or unmask the adapter, as specified in mask
+
+ KVM_S390_IO_ADAPTER_MAP
+ perform a gmap translation for the guest address provided in addr,
+ pin a userspace page for the translated address and add it to the
+ list of mappings
+
+ KVM_S390_IO_ADAPTER_UNMAP
+ release a userspace page for the translated address specified in addr
+ from the list of mappings
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index a995fce..060aaa6 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -30,16 +30,16 @@
/* Special address that contains the comm page, used for reducing # of traps */
-#define KVM_GUEST_COMMPAGE_ADDR 0x0
+#define KVM_GUEST_COMMPAGE_ADDR 0x0
#define KVM_GUEST_KERNEL_MODE(vcpu) ((kvm_read_c0_guest_status(vcpu->arch.cop0) & (ST0_EXL | ST0_ERL)) || \
((kvm_read_c0_guest_status(vcpu->arch.cop0) & KSU_USER) == 0))
-#define KVM_GUEST_KUSEG 0x00000000UL
-#define KVM_GUEST_KSEG0 0x40000000UL
-#define KVM_GUEST_KSEG23 0x60000000UL
-#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000)
-#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff)
+#define KVM_GUEST_KUSEG 0x00000000UL
+#define KVM_GUEST_KSEG0 0x40000000UL
+#define KVM_GUEST_KSEG23 0x60000000UL
+#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000)
+#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff)
#define KVM_GUEST_CKSEG0ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0)
#define KVM_GUEST_CKSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1)
@@ -52,17 +52,17 @@
#define KVM_GUEST_KSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1)
#define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23)
-#define KVM_INVALID_PAGE 0xdeadbeef
-#define KVM_INVALID_INST 0xdeadbeef
-#define KVM_INVALID_ADDR 0xdeadbeef
+#define KVM_INVALID_PAGE 0xdeadbeef
+#define KVM_INVALID_INST 0xdeadbeef
+#define KVM_INVALID_ADDR 0xdeadbeef
-#define KVM_MALTA_GUEST_RTC_ADDR 0xb8000070UL
+#define KVM_MALTA_GUEST_RTC_ADDR 0xb8000070UL
-#define GUEST_TICKS_PER_JIFFY (40000000/HZ)
-#define MS_TO_NS(x) (x * 1E6L)
+#define GUEST_TICKS_PER_JIFFY (40000000/HZ)
+#define MS_TO_NS(x) (x * 1E6L)
-#define CAUSEB_DC 27
-#define CAUSEF_DC (_ULCAST_(1) << 27)
+#define CAUSEB_DC 27
+#define CAUSEF_DC (_ULCAST_(1) << 27)
struct kvm;
struct kvm_run;
@@ -126,8 +126,8 @@
int commpage_tlb;
};
-#define N_MIPS_COPROC_REGS 32
-#define N_MIPS_COPROC_SEL 8
+#define N_MIPS_COPROC_REGS 32
+#define N_MIPS_COPROC_SEL 8
struct mips_coproc {
unsigned long reg[N_MIPS_COPROC_REGS][N_MIPS_COPROC_SEL];
@@ -139,124 +139,124 @@
/*
* Coprocessor 0 register names
*/
-#define MIPS_CP0_TLB_INDEX 0
-#define MIPS_CP0_TLB_RANDOM 1
-#define MIPS_CP0_TLB_LOW 2
-#define MIPS_CP0_TLB_LO0 2
-#define MIPS_CP0_TLB_LO1 3
-#define MIPS_CP0_TLB_CONTEXT 4
-#define MIPS_CP0_TLB_PG_MASK 5
-#define MIPS_CP0_TLB_WIRED 6
-#define MIPS_CP0_HWRENA 7
-#define MIPS_CP0_BAD_VADDR 8
-#define MIPS_CP0_COUNT 9
-#define MIPS_CP0_TLB_HI 10
-#define MIPS_CP0_COMPARE 11
-#define MIPS_CP0_STATUS 12
-#define MIPS_CP0_CAUSE 13
-#define MIPS_CP0_EXC_PC 14
-#define MIPS_CP0_PRID 15
-#define MIPS_CP0_CONFIG 16
-#define MIPS_CP0_LLADDR 17
-#define MIPS_CP0_WATCH_LO 18
-#define MIPS_CP0_WATCH_HI 19
-#define MIPS_CP0_TLB_XCONTEXT 20
-#define MIPS_CP0_ECC 26
-#define MIPS_CP0_CACHE_ERR 27
-#define MIPS_CP0_TAG_LO 28
-#define MIPS_CP0_TAG_HI 29
-#define MIPS_CP0_ERROR_PC 30
-#define MIPS_CP0_DEBUG 23
-#define MIPS_CP0_DEPC 24
-#define MIPS_CP0_PERFCNT 25
-#define MIPS_CP0_ERRCTL 26
-#define MIPS_CP0_DATA_LO 28
-#define MIPS_CP0_DATA_HI 29
-#define MIPS_CP0_DESAVE 31
+#define MIPS_CP0_TLB_INDEX 0
+#define MIPS_CP0_TLB_RANDOM 1
+#define MIPS_CP0_TLB_LOW 2
+#define MIPS_CP0_TLB_LO0 2
+#define MIPS_CP0_TLB_LO1 3
+#define MIPS_CP0_TLB_CONTEXT 4
+#define MIPS_CP0_TLB_PG_MASK 5
+#define MIPS_CP0_TLB_WIRED 6
+#define MIPS_CP0_HWRENA 7
+#define MIPS_CP0_BAD_VADDR 8
+#define MIPS_CP0_COUNT 9
+#define MIPS_CP0_TLB_HI 10
+#define MIPS_CP0_COMPARE 11
+#define MIPS_CP0_STATUS 12
+#define MIPS_CP0_CAUSE 13
+#define MIPS_CP0_EXC_PC 14
+#define MIPS_CP0_PRID 15
+#define MIPS_CP0_CONFIG 16
+#define MIPS_CP0_LLADDR 17
+#define MIPS_CP0_WATCH_LO 18
+#define MIPS_CP0_WATCH_HI 19
+#define MIPS_CP0_TLB_XCONTEXT 20
+#define MIPS_CP0_ECC 26
+#define MIPS_CP0_CACHE_ERR 27
+#define MIPS_CP0_TAG_LO 28
+#define MIPS_CP0_TAG_HI 29
+#define MIPS_CP0_ERROR_PC 30
+#define MIPS_CP0_DEBUG 23
+#define MIPS_CP0_DEPC 24
+#define MIPS_CP0_PERFCNT 25
+#define MIPS_CP0_ERRCTL 26
+#define MIPS_CP0_DATA_LO 28
+#define MIPS_CP0_DATA_HI 29
+#define MIPS_CP0_DESAVE 31
-#define MIPS_CP0_CONFIG_SEL 0
-#define MIPS_CP0_CONFIG1_SEL 1
-#define MIPS_CP0_CONFIG2_SEL 2
-#define MIPS_CP0_CONFIG3_SEL 3
+#define MIPS_CP0_CONFIG_SEL 0
+#define MIPS_CP0_CONFIG1_SEL 1
+#define MIPS_CP0_CONFIG2_SEL 2
+#define MIPS_CP0_CONFIG3_SEL 3
/* Config0 register bits */
-#define CP0C0_M 31
-#define CP0C0_K23 28
-#define CP0C0_KU 25
-#define CP0C0_MDU 20
-#define CP0C0_MM 17
-#define CP0C0_BM 16
-#define CP0C0_BE 15
-#define CP0C0_AT 13
-#define CP0C0_AR 10
-#define CP0C0_MT 7
-#define CP0C0_VI 3
-#define CP0C0_K0 0
+#define CP0C0_M 31
+#define CP0C0_K23 28
+#define CP0C0_KU 25
+#define CP0C0_MDU 20
+#define CP0C0_MM 17
+#define CP0C0_BM 16
+#define CP0C0_BE 15
+#define CP0C0_AT 13
+#define CP0C0_AR 10
+#define CP0C0_MT 7
+#define CP0C0_VI 3
+#define CP0C0_K0 0
/* Config1 register bits */
-#define CP0C1_M 31
-#define CP0C1_MMU 25
-#define CP0C1_IS 22
-#define CP0C1_IL 19
-#define CP0C1_IA 16
-#define CP0C1_DS 13
-#define CP0C1_DL 10
-#define CP0C1_DA 7
-#define CP0C1_C2 6
-#define CP0C1_MD 5
-#define CP0C1_PC 4
-#define CP0C1_WR 3
-#define CP0C1_CA 2
-#define CP0C1_EP 1
-#define CP0C1_FP 0
+#define CP0C1_M 31
+#define CP0C1_MMU 25
+#define CP0C1_IS 22
+#define CP0C1_IL 19
+#define CP0C1_IA 16
+#define CP0C1_DS 13
+#define CP0C1_DL 10
+#define CP0C1_DA 7
+#define CP0C1_C2 6
+#define CP0C1_MD 5
+#define CP0C1_PC 4
+#define CP0C1_WR 3
+#define CP0C1_CA 2
+#define CP0C1_EP 1
+#define CP0C1_FP 0
/* Config2 Register bits */
-#define CP0C2_M 31
-#define CP0C2_TU 28
-#define CP0C2_TS 24
-#define CP0C2_TL 20
-#define CP0C2_TA 16
-#define CP0C2_SU 12
-#define CP0C2_SS 8
-#define CP0C2_SL 4
-#define CP0C2_SA 0
+#define CP0C2_M 31
+#define CP0C2_TU 28
+#define CP0C2_TS 24
+#define CP0C2_TL 20
+#define CP0C2_TA 16
+#define CP0C2_SU 12
+#define CP0C2_SS 8
+#define CP0C2_SL 4
+#define CP0C2_SA 0
/* Config3 Register bits */
-#define CP0C3_M 31
-#define CP0C3_ISA_ON_EXC 16
-#define CP0C3_ULRI 13
-#define CP0C3_DSPP 10
-#define CP0C3_LPA 7
-#define CP0C3_VEIC 6
-#define CP0C3_VInt 5
-#define CP0C3_SP 4
-#define CP0C3_MT 2
-#define CP0C3_SM 1
-#define CP0C3_TL 0
+#define CP0C3_M 31
+#define CP0C3_ISA_ON_EXC 16
+#define CP0C3_ULRI 13
+#define CP0C3_DSPP 10
+#define CP0C3_LPA 7
+#define CP0C3_VEIC 6
+#define CP0C3_VInt 5
+#define CP0C3_SP 4
+#define CP0C3_MT 2
+#define CP0C3_SM 1
+#define CP0C3_TL 0
/* Have config1, Cacheable, noncoherent, write-back, write allocate*/
-#define MIPS_CONFIG0 \
+#define MIPS_CONFIG0 \
((1 << CP0C0_M) | (0x3 << CP0C0_K0))
/* Have config2, no coprocessor2 attached, no MDMX support attached,
no performance counters, watch registers present,
no code compression, EJTAG present, no FPU, no watch registers */
-#define MIPS_CONFIG1 \
-((1 << CP0C1_M) | \
- (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \
- (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \
+#define MIPS_CONFIG1 \
+((1 << CP0C1_M) | \
+ (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \
+ (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \
(0 << CP0C1_FP))
/* Have config3, no tertiary/secondary caches implemented */
-#define MIPS_CONFIG2 \
+#define MIPS_CONFIG2 \
((1 << CP0C2_M))
/* No config4, no DSP ASE, no large physaddr (PABITS),
no external interrupt controller, no vectored interrupts,
no 1kb pages, no SmartMIPS ASE, no trace logic */
-#define MIPS_CONFIG3 \
-((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \
- (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \
+#define MIPS_CONFIG3 \
+((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \
+ (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \
(0 << CP0C3_SM) | (0 << CP0C3_TL))
/* MMU types, the first four entries have the same layout as the
@@ -274,36 +274,36 @@
/*
* Trap codes
*/
-#define T_INT 0 /* Interrupt pending */
-#define T_TLB_MOD 1 /* TLB modified fault */
-#define T_TLB_LD_MISS 2 /* TLB miss on load or ifetch */
-#define T_TLB_ST_MISS 3 /* TLB miss on a store */
-#define T_ADDR_ERR_LD 4 /* Address error on a load or ifetch */
-#define T_ADDR_ERR_ST 5 /* Address error on a store */
-#define T_BUS_ERR_IFETCH 6 /* Bus error on an ifetch */
-#define T_BUS_ERR_LD_ST 7 /* Bus error on a load or store */
-#define T_SYSCALL 8 /* System call */
-#define T_BREAK 9 /* Breakpoint */
-#define T_RES_INST 10 /* Reserved instruction exception */
-#define T_COP_UNUSABLE 11 /* Coprocessor unusable */
-#define T_OVFLOW 12 /* Arithmetic overflow */
+#define T_INT 0 /* Interrupt pending */
+#define T_TLB_MOD 1 /* TLB modified fault */
+#define T_TLB_LD_MISS 2 /* TLB miss on load or ifetch */
+#define T_TLB_ST_MISS 3 /* TLB miss on a store */
+#define T_ADDR_ERR_LD 4 /* Address error on a load or ifetch */
+#define T_ADDR_ERR_ST 5 /* Address error on a store */
+#define T_BUS_ERR_IFETCH 6 /* Bus error on an ifetch */
+#define T_BUS_ERR_LD_ST 7 /* Bus error on a load or store */
+#define T_SYSCALL 8 /* System call */
+#define T_BREAK 9 /* Breakpoint */
+#define T_RES_INST 10 /* Reserved instruction exception */
+#define T_COP_UNUSABLE 11 /* Coprocessor unusable */
+#define T_OVFLOW 12 /* Arithmetic overflow */
/*
* Trap definitions added for r4000 port.
*/
-#define T_TRAP 13 /* Trap instruction */
-#define T_VCEI 14 /* Virtual coherency exception */
-#define T_FPE 15 /* Floating point exception */
-#define T_WATCH 23 /* Watch address reference */
-#define T_VCED 31 /* Virtual coherency data */
+#define T_TRAP 13 /* Trap instruction */
+#define T_VCEI 14 /* Virtual coherency exception */
+#define T_FPE 15 /* Floating point exception */
+#define T_WATCH 23 /* Watch address reference */
+#define T_VCED 31 /* Virtual coherency data */
/* Resume Flags */
-#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */
-#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
+#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */
+#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
-#define RESUME_GUEST 0
-#define RESUME_GUEST_DR RESUME_FLAG_DR
-#define RESUME_HOST RESUME_FLAG_HOST
+#define RESUME_GUEST 0
+#define RESUME_GUEST_DR RESUME_FLAG_DR
+#define RESUME_HOST RESUME_FLAG_HOST
enum emulation_result {
EMULATE_DONE, /* no further processing */
@@ -313,24 +313,27 @@
EMULATE_PRIV_FAIL,
};
-#define MIPS3_PG_G 0x00000001 /* Global; ignore ASID if in lo0 & lo1 */
-#define MIPS3_PG_V 0x00000002 /* Valid */
-#define MIPS3_PG_NV 0x00000000
-#define MIPS3_PG_D 0x00000004 /* Dirty */
+#define MIPS3_PG_G 0x00000001 /* Global; ignore ASID if in lo0 & lo1 */
+#define MIPS3_PG_V 0x00000002 /* Valid */
+#define MIPS3_PG_NV 0x00000000
+#define MIPS3_PG_D 0x00000004 /* Dirty */
#define mips3_paddr_to_tlbpfn(x) \
- (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME)
+ (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME)
#define mips3_tlbpfn_to_paddr(x) \
- ((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT)
+ ((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT)
-#define MIPS3_PG_SHIFT 6
-#define MIPS3_PG_FRAME 0x3fffffc0
+#define MIPS3_PG_SHIFT 6
+#define MIPS3_PG_FRAME 0x3fffffc0
-#define VPN2_MASK 0xffffe000
-#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && ((x).tlb_lo1 & MIPS3_PG_G))
-#define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK)
-#define TLB_ASID(x) ((x).tlb_hi & ASID_MASK)
-#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) ? ((x).tlb_lo1 & MIPS3_PG_V) : ((x).tlb_lo0 & MIPS3_PG_V))
+#define VPN2_MASK 0xffffe000
+#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \
+ ((x).tlb_lo1 & MIPS3_PG_G))
+#define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK)
+#define TLB_ASID(x) ((x).tlb_hi & ASID_MASK)
+#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \
+ ? ((x).tlb_lo1 & MIPS3_PG_V) \
+ : ((x).tlb_lo0 & MIPS3_PG_V))
struct kvm_mips_tlb {
long tlb_mask;
@@ -339,7 +342,7 @@
long tlb_lo1;
};
-#define KVM_MIPS_GUEST_TLB_SIZE 64
+#define KVM_MIPS_GUEST_TLB_SIZE 64
struct kvm_vcpu_arch {
void *host_ebase, *guest_ebase;
unsigned long host_stack;
@@ -400,65 +403,67 @@
};
-#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
-#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
-#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
-#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
-#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
-#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
-#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
-#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
-#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
-#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
-#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
-#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0])
-#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
-#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0])
-#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val))
-#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0])
-#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
-#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0])
-#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val))
-#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0])
-#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val))
-#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1])
-#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val))
-#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0])
-#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val))
-#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0])
-#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
-#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0])
-#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val))
-#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1])
-#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val))
-#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0])
-#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1])
-#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2])
-#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3])
-#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7])
-#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
-#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
-#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
-#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
-#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
-#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
-#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
+#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
+#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
+#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
+#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
+#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
+#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
+#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
+#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
+#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
+#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
+#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
+#define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0])
+#define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val))
+#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0])
+#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
+#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0])
+#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val))
+#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0])
+#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
+#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0])
+#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val))
+#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0])
+#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val))
+#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1])
+#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val))
+#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0])
+#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val))
+#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0])
+#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
+#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0])
+#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val))
+#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1])
+#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val))
+#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0])
+#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1])
+#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2])
+#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3])
+#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7])
+#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
+#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
+#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
+#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
+#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
+#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
+#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
-#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
-#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
-#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
-#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
-#define kvm_change_c0_guest_cause(cop0, change, val) \
-{ \
- kvm_clear_c0_guest_cause(cop0, change); \
- kvm_set_c0_guest_cause(cop0, ((val) & (change))); \
+#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
+#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
+#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
+#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
+#define kvm_change_c0_guest_cause(cop0, change, val) \
+{ \
+ kvm_clear_c0_guest_cause(cop0, change); \
+ kvm_set_c0_guest_cause(cop0, ((val) & (change))); \
}
-#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val))
-#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
-#define kvm_change_c0_guest_ebase(cop0, change, val) \
-{ \
- kvm_clear_c0_guest_ebase(cop0, change); \
- kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \
+#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val))
+#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
+#define kvm_change_c0_guest_ebase(cop0, change, val) \
+{ \
+ kvm_clear_c0_guest_ebase(cop0, change); \
+ kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \
}
diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c
index 4b6274b..e3fec99 100644
--- a/arch/mips/kvm/kvm_mips_emul.c
+++ b/arch/mips/kvm/kvm_mips_emul.c
@@ -436,13 +436,6 @@
sel = inst & 0x7;
co_bit = (inst >> 25) & 1;
- /* Verify that the register is valid */
- if (rd > MIPS_CP0_DESAVE) {
- printk("Invalid rd: %d\n", rd);
- er = EMULATE_FAIL;
- goto done;
- }
-
if (co_bit) {
op = (inst) & 0xff;
@@ -1542,8 +1535,15 @@
}
if ((inst & OPCODE) == SPEC3 && (inst & FUNC) == RDHWR) {
+ int usermode = !KVM_GUEST_KERNEL_MODE(vcpu);
int rd = (inst & RD) >> 11;
int rt = (inst & RT) >> 16;
+ /* If usermode, check RDHWR rd is allowed by guest HWREna */
+ if (usermode && !(kvm_read_c0_guest_hwrena(cop0) & BIT(rd))) {
+ kvm_debug("RDHWR %#x disallowed by HWREna @ %p\n",
+ rd, opc);
+ goto emulate_ri;
+ }
switch (rd) {
case 0: /* CPU number */
arch->gprs[rt] = 0;
@@ -1567,31 +1567,27 @@
}
break;
case 29:
-#if 1
arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0);
-#else
- /* UserLocal not implemented */
- er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
-#endif
break;
default:
- printk("RDHWR not supported\n");
- er = EMULATE_FAIL;
- break;
+ kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc);
+ goto emulate_ri;
}
} else {
- printk("Emulate RI not supported @ %p: %#x\n", opc, inst);
- er = EMULATE_FAIL;
+ kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst);
+ goto emulate_ri;
}
+ return EMULATE_DONE;
+
+emulate_ri:
/*
- * Rollback PC only if emulation was unsuccessful
+ * Rollback PC (if in branch delay slot then the PC already points to
+ * branch target), and pass the RI exception to the guest OS.
*/
- if (er == EMULATE_FAIL) {
- vcpu->arch.pc = curr_pc;
- }
- return er;
+ vcpu->arch.pc = curr_pc;
+ return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
}
enum emulation_result
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c36cd35..68897fc 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -19,10 +19,19 @@
#include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
+#include <asm/isc.h>
#define KVM_MAX_VCPUS 64
#define KVM_USER_MEM_SLOTS 32
+/*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 4096
+
struct sca_entry {
atomic_t scn;
__u32 reserved;
@@ -244,6 +253,27 @@
struct kvm_arch_memory_slot {
};
+struct s390_map_info {
+ struct list_head list;
+ __u64 guest_addr;
+ __u64 addr;
+ struct page *page;
+};
+
+struct s390_io_adapter {
+ unsigned int id;
+ int isc;
+ bool maskable;
+ bool masked;
+ bool swap;
+ struct rw_semaphore maps_lock;
+ struct list_head maps;
+ atomic_t nr_maps;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
@@ -251,6 +281,8 @@
struct kvm_device *flic;
struct gmap *gmap;
int css_support;
+ int use_irqchip;
+ struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
};
#define KVM_HVA_ERR_BAD (-1UL)
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 2f0ade2..c003c6a 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -22,6 +22,8 @@
#define KVM_DEV_FLIC_CLEAR_IRQS 3
#define KVM_DEV_FLIC_APF_ENABLE 4
#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -32,6 +34,26 @@
#define KVM_S390_MAX_FLOAT_IRQS 266250
#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 pad;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index c8bacbc..10d529a 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -25,6 +25,8 @@
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQ_ROUTING
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index a47d2c3..d3adb37 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 05bffd7..200a8f9 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -13,6 +13,7 @@
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <asm/asm-offsets.h>
@@ -1068,6 +1069,171 @@
return r;
}
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+ if (id >= MAX_S390_IO_ADAPTERS)
+ return NULL;
+ return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct s390_io_adapter *adapter;
+ struct kvm_s390_io_adapter adapter_info;
+
+ if (copy_from_user(&adapter_info,
+ (void __user *)attr->addr, sizeof(adapter_info)))
+ return -EFAULT;
+
+ if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
+ (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+ return -EINVAL;
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&adapter->maps);
+ init_rwsem(&adapter->maps_lock);
+ atomic_set(&adapter->nr_maps, 0);
+ adapter->id = adapter_info.id;
+ adapter->isc = adapter_info.isc;
+ adapter->maskable = adapter_info.maskable;
+ adapter->masked = false;
+ adapter->swap = adapter_info.swap;
+ dev->kvm->arch.adapters[adapter->id] = adapter;
+
+ return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+ int ret;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter || !adapter->maskable)
+ return -EINVAL;
+ ret = adapter->masked;
+ adapter->masked = masked;
+ return ret;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map;
+ int ret;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&map->list);
+ map->guest_addr = addr;
+ map->addr = gmap_translate(addr, kvm->arch.gmap);
+ if (map->addr == -EFAULT) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+ if (ret < 0)
+ goto out;
+ BUG_ON(ret != 1);
+ down_write(&adapter->maps_lock);
+ if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
+ list_add_tail(&map->list, &adapter->maps);
+ ret = 0;
+ } else {
+ put_page(map->page);
+ ret = -EINVAL;
+ }
+ up_write(&adapter->maps_lock);
+out:
+ if (ret)
+ kfree(map);
+ return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ struct s390_map_info *map, *tmp;
+ int found = 0;
+
+ if (!adapter || !addr)
+ return -EINVAL;
+
+ down_write(&adapter->maps_lock);
+ list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+ if (map->guest_addr == addr) {
+ found = 1;
+ atomic_dec(&adapter->nr_maps);
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ break;
+ }
+ }
+ up_write(&adapter->maps_lock);
+
+ return found ? 0 : -EINVAL;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+ int i;
+ struct s390_map_info *map, *tmp;
+
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+ if (!kvm->arch.adapters[i])
+ continue;
+ list_for_each_entry_safe(map, tmp,
+ &kvm->arch.adapters[i]->maps, list) {
+ list_del(&map->list);
+ put_page(map->page);
+ kfree(map);
+ }
+ kfree(kvm->arch.adapters[i]);
+ }
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_io_adapter_req req;
+ struct s390_io_adapter *adapter;
+ int ret;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ adapter = get_io_adapter(dev->kvm, req.id);
+ if (!adapter)
+ return -EINVAL;
+ switch (req.type) {
+ case KVM_S390_IO_ADAPTER_MASK:
+ ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+ if (ret > 0)
+ ret = 0;
+ break;
+ case KVM_S390_IO_ADAPTER_MAP:
+ ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
+ break;
+ case KVM_S390_IO_ADAPTER_UNMAP:
+ ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
@@ -1096,6 +1262,12 @@
kvm_for_each_vcpu(i, vcpu, dev->kvm)
kvm_clear_async_pf_completion_queue(vcpu);
break;
+ case KVM_DEV_FLIC_ADAPTER_REGISTER:
+ r = register_io_adapter(dev, attr);
+ break;
+ case KVM_DEV_FLIC_ADAPTER_MODIFY:
+ r = modify_io_adapter(dev, attr);
+ break;
default:
r = -EINVAL;
}
@@ -1127,3 +1299,123 @@
.create = flic_create,
.destroy = flic_destroy,
};
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+ unsigned long bit;
+
+ bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+ return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+ u64 addr)
+{
+ struct s390_map_info *map;
+
+ if (!adapter)
+ return NULL;
+
+ list_for_each_entry(map, &adapter->maps, list) {
+ if (map->guest_addr == addr)
+ return map;
+ }
+ return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+ struct s390_io_adapter *adapter,
+ struct kvm_s390_adapter_int *adapter_int)
+{
+ unsigned long bit;
+ int summary_set, idx;
+ struct s390_map_info *info;
+ void *map;
+
+ info = get_map_info(adapter, adapter_int->ind_addr);
+ if (!info)
+ return -1;
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ idx = srcu_read_lock(&kvm->srcu);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ info = get_map_info(adapter, adapter_int->summary_addr);
+ if (!info) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ return -1;
+ }
+ map = page_address(info->page);
+ bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+ adapter->swap);
+ summary_set = test_and_set_bit(bit, map);
+ mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(info->page);
+ srcu_read_unlock(&kvm->srcu, idx);
+ return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ int ret;
+ struct s390_io_adapter *adapter;
+
+ /* We're only interested in the 0->1 transition. */
+ if (!level)
+ return 0;
+ adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+ if (!adapter)
+ return -1;
+ down_read(&adapter->maps_lock);
+ ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+ up_read(&adapter->maps_lock);
+ if ((ret > 0) && !adapter->masked) {
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ .parm64 = (adapter->isc << 27) | 0x80000000,
+ };
+ ret = kvm_s390_inject_vm(kvm, &s390int);
+ if (ret == 0)
+ ret = 1;
+ }
+ return ret;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ int ret;
+
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_S390_ADAPTER:
+ e->set = set_adapter_int;
+ e->adapter.summary_addr = ue->u.adapter.summary_addr;
+ e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ e->adapter.summary_offset = ue->u.adapter.summary_offset;
+ e->adapter.ind_offset = ue->u.adapter.ind_offset;
+ e->adapter.adapter_id = ue->u.adapter.adapter_id;
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+ int irq_source_id, int level, bool line_status)
+{
+ return -EINVAL;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 0000000..d98e415
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 83b7944..6e1b990 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -159,6 +159,7 @@
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_ENABLE_CAP_VM:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
@@ -187,6 +188,25 @@
return 0;
}
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_IRQCHIP:
+ kvm->arch.use_irqchip = 1;
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -204,6 +224,26 @@
r = kvm_s390_inject_vm(kvm, &s390int);
break;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+ break;
+ }
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_irq_routing_entry routing;
+
+ r = -EINVAL;
+ if (kvm->arch.use_irqchip) {
+ /* Set up dummy routing. */
+ memset(&routing, 0, sizeof(routing));
+ kvm_set_irq_routing(kvm, &routing, 0, 0);
+ r = 0;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -265,6 +305,7 @@
}
kvm->arch.css_support = 0;
+ kvm->arch.use_irqchip = 0;
return 0;
out_nogmap:
@@ -324,6 +365,7 @@
debug_unregister(kvm->arch.dbf);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
+ kvm_s390_destroy_adapters(kvm);
}
/* Section: vcpu related */
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6311170..660e79f 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -137,6 +137,7 @@
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in priv.c */
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -163,5 +164,6 @@
/* implemented in interrupt.c */
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ddc8a7e..64fae65 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -43,6 +43,16 @@
return ret;
}
+u64 kvm_supported_xcr0(void)
+{
+ u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+
+ if (!kvm_x86_ops->mpx_supported())
+ xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+
+ return xcr0;
+}
+
void kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -73,7 +83,7 @@
} else {
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) &
- host_xcr0 & KVM_SUPPORTED_XCR0;
+ kvm_supported_xcr0();
vcpu->arch.guest_xstate_size = best->ebx =
xstate_required_size(vcpu->arch.xcr0);
}
@@ -210,13 +220,6 @@
entry->flags = 0;
}
-static bool supported_xcr0_bit(unsigned bit)
-{
- u64 mask = ((u64)1 << bit);
-
- return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
-}
-
#define F(x) bit(X86_FEATURE_##x)
static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -256,8 +259,7 @@
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
- unsigned f_mpx = kvm_x86_ops->mpx_supported ?
- (kvm_x86_ops->mpx_supported() ? F(MPX) : 0) : 0;
+ unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
/* cpuid 1.edx */
const u32 kvm_supported_word0_x86_features =
@@ -439,16 +441,18 @@
}
case 0xd: {
int idx, i;
+ u64 supported = kvm_supported_xcr0();
- entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
- entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+ entry->eax &= supported;
+ entry->edx &= supported >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (idx = 1, i = 1; idx < 64; ++idx) {
+ u64 mask = ((u64)1 << idx);
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[i], function, idx);
- if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+ if (entry[i].eax == 0 || !(supported & mask))
continue;
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a449c3d..2136cb6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4089,6 +4089,11 @@
return false;
}
+static bool svm_mpx_supported(void)
+{
+ return false;
+}
+
static bool svm_has_wbinvd_exit(void)
{
return true;
@@ -4371,6 +4376,7 @@
.rdtscp_supported = svm_rdtscp_supported,
.invpcid_supported = svm_invpcid_supported,
+ .mpx_supported = svm_mpx_supported,
.set_supported_cpuid = svm_set_supported_cpuid,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f4e5aed..1320e0f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -206,6 +206,7 @@
u64 guest_pdptr1;
u64 guest_pdptr2;
u64 guest_pdptr3;
+ u64 guest_bndcfgs;
u64 host_ia32_pat;
u64 host_ia32_efer;
u64 host_ia32_perf_global_ctrl;
@@ -541,6 +542,7 @@
GUEST_CS_LIMIT,
GUEST_CS_BASE,
GUEST_ES_BASE,
+ GUEST_BNDCFGS,
CR0_GUEST_HOST_MASK,
CR0_READ_SHADOW,
CR4_READ_SHADOW,
@@ -596,6 +598,7 @@
FIELD64(GUEST_PDPTR1, guest_pdptr1),
FIELD64(GUEST_PDPTR2, guest_pdptr2),
FIELD64(GUEST_PDPTR3, guest_pdptr3),
+ FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
FIELD64(HOST_IA32_PAT, host_ia32_pat),
FIELD64(HOST_IA32_EFER, host_ia32_efer),
FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -726,6 +729,7 @@
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
+static bool vmx_mpx_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
@@ -736,6 +740,7 @@
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+static bool vmx_mpx_supported(void);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2287,6 +2292,8 @@
nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ if (vmx_mpx_supported())
+ nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2300,6 +2307,8 @@
VM_ENTRY_LOAD_IA32_PAT;
nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
VM_ENTRY_LOAD_IA32_EFER);
+ if (vmx_mpx_supported())
+ nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2493,6 +2502,8 @@
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
+ if (!vmx_mpx_supported())
+ return 1;
data = vmcs_read64(GUEST_BNDCFGS);
break;
case MSR_IA32_FEATURE_CONTROL:
@@ -2564,6 +2575,8 @@
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
+ if (!vmx_mpx_supported())
+ return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
case MSR_IA32_TSC:
@@ -7866,6 +7879,9 @@
set_cr4_guest_host_mask(vmx);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8351,6 +8367,8 @@
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+ if (vmx_mpx_supported())
+ vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
/* update exit information fields: */
@@ -8460,6 +8478,10 @@
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
+ if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, 0);
+
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a37da6b..aa98695 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3084,9 +3084,7 @@
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
* with old userspace.
*/
- if (xstate_bv & ~KVM_SUPPORTED_XCR0)
- return -EINVAL;
- if (xstate_bv & ~host_xcr0)
+ if (xstate_bv & ~kvm_supported_xcr0())
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->xsave,
guest_xsave->region, vcpu->arch.guest_xstate_size);
@@ -3939,6 +3937,23 @@
for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
continue;
+
+ /*
+ * Even MSRs that are valid in the host may not be exposed
+ * to the guests in some cases. We could work around this
+ * in VMX with the generic MSR save/load machinery, but it
+ * is not really worthwhile since it will really only
+ * happen with nested virtualization.
+ */
+ switch (msrs_to_save[i]) {
+ case MSR_IA32_BNDCFGS:
+ if (!kvm_x86_ops->mpx_supported())
+ continue;
+ break;
+ default:
+ break;
+ }
+
if (j < i)
msrs_to_save[j] = msrs_to_save[i];
j++;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 392ecbf..8c97bac9 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -126,6 +126,8 @@
| XSTATE_BNDREGS | XSTATE_BNDCSR)
extern u64 host_xcr0;
+extern u64 kvm_supported_xcr0(void);
+
extern unsigned int min_timer_period_us;
extern struct static_key kvm_no_apic_vcpu;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9816b68..7d21cf9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -297,6 +297,14 @@
return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
}
+struct kvm_s390_adapter_int {
+ u64 ind_addr;
+ u64 summary_addr;
+ u64 ind_offset;
+ u32 summary_offset;
+ u32 adapter_id;
+};
+
struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
@@ -309,6 +317,7 @@
unsigned pin;
} irqchip;
struct msi_msg msi;
+ struct kvm_s390_adapter_int adapter;
};
struct hlist_node link;
};
@@ -913,7 +922,11 @@
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_S390
+#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
+#else
#define KVM_MAX_IRQ_ROUTES 1024
+#endif
int kvm_setup_default_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7518be..a8f4ee5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -741,6 +741,8 @@
#define KVM_CAP_EXT_EMUL_CPUID 95
#define KVM_CAP_HYPERV_TIME 96
#define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
+#define KVM_CAP_ENABLE_CAP_VM 98
+#define KVM_CAP_S390_IRQCHIP 99
#ifdef KVM_CAP_IRQ_ROUTING
@@ -756,9 +758,18 @@
__u32 pad;
};
+struct kvm_irq_routing_s390_adapter {
+ __u64 ind_addr;
+ __u64 summary_addr;
+ __u64 ind_offset;
+ __u32 summary_offset;
+ __u32 adapter_id;
+};
+
/* gsi routing entry types */
#define KVM_IRQ_ROUTING_IRQCHIP 1
#define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
struct kvm_irq_routing_entry {
__u32 gsi;
@@ -768,6 +779,7 @@
union {
struct kvm_irq_routing_irqchip irqchip;
struct kvm_irq_routing_msi msi;
+ struct kvm_irq_routing_s390_adapter adapter;
__u32 pad[8];
} u;
};
@@ -1076,6 +1088,10 @@
/* Available with KVM_CAP_DEBUGREGS */
#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs)
#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs)
+/*
+ * vcpu version available with KVM_ENABLE_CAP
+ * vm version available with KVM_CAP_ENABLE_CAP_VM
+ */
#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap)
/* Available with KVM_CAP_XSAVE */
#define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index abe4d60..29c2a04 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -391,19 +391,19 @@
lockdep_is_held(&kvm->irqfds.lock));
irqfd_update(kvm, irqfd, irq_rt);
- events = f.file->f_op->poll(f.file, &irqfd->pt);
-
list_add_tail(&irqfd->list, &kvm->irqfds.items);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
/*
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
*/
+ events = f.file->f_op->poll(f.file, &irqfd->pt);
+
if (events & POLLIN)
schedule_work(&irqfd->inject);
- spin_unlock_irq(&kvm->irqfds.lock);
-
/*
* do not drop the file until the irqfd is fully initialized, otherwise
* we might race against the POLLHUP
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1539d37..d4b6015 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -50,7 +50,7 @@
#else
#define ioapic_debug(fmt, arg...)
#endif
-static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq,
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -163,23 +163,67 @@
return false;
}
-static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
- bool line_status)
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+ int irq_level, bool line_status)
{
- union kvm_ioapic_redirect_entry *pent;
- int injected = -1;
+ union kvm_ioapic_redirect_entry entry;
+ u32 mask = 1 << irq;
+ u32 old_irr;
+ int edge, ret;
- pent = &ioapic->redirtbl[idx];
+ entry = ioapic->redirtbl[irq];
+ edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
- if (!pent->fields.mask) {
- injected = ioapic_deliver(ioapic, idx, line_status);
- if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
- pent->fields.remote_irr = 1;
+ if (!irq_level) {
+ ioapic->irr &= ~mask;
+ ret = 1;
+ goto out;
}
- return injected;
+ /*
+ * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+ * this only happens if a previous edge has not been delivered due
+ * do masking. For level interrupts, the remote_irr field tells
+ * us if the interrupt is waiting for an EOI.
+ *
+ * RTC is special: it is edge-triggered, but userspace likes to know
+ * if it has been already ack-ed via EOI because coalesced RTC
+ * interrupts lead to time drift in Windows guests. So we track
+ * EOI manually for the RTC interrupt.
+ */
+ if (irq == RTC_GSI && line_status &&
+ rtc_irq_check_coalesced(ioapic)) {
+ ret = 0;
+ goto out;
+ }
+
+ old_irr = ioapic->irr;
+ ioapic->irr |= mask;
+ if ((edge && old_irr == ioapic->irr) ||
+ (!edge && entry.fields.remote_irr)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+ trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+ return ret;
}
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+ u32 idx;
+
+ rtc_irq_eoi_tracking_reset(ioapic);
+ for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+ ioapic_set_irq(ioapic, idx, 1, true);
+
+ kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+
static void update_handled_vectors(struct kvm_ioapic *ioapic)
{
DECLARE_BITMAP(handled_vectors, 256);
@@ -282,12 +326,15 @@
}
}
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
{
union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
struct kvm_lapic_irq irqe;
int ret;
+ if (entry->fields.mask)
+ return -1;
+
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
entry->fields.dest_id, entry->fields.dest_mode,
@@ -302,6 +349,9 @@
irqe.level = 1;
irqe.shorthand = 0;
+ if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+ ioapic->irr &= ~(1 << irq);
+
if (irq == RTC_GSI && line_status) {
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
@@ -310,44 +360,24 @@
} else
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+ if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+ entry->fields.remote_irr = 1;
+
return ret;
}
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
int level, bool line_status)
{
- u32 old_irr;
- u32 mask = 1 << irq;
- union kvm_ioapic_redirect_entry entry;
int ret, irq_level;
BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
spin_lock(&ioapic->lock);
- old_irr = ioapic->irr;
irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
irq_source_id, level);
- entry = ioapic->redirtbl[irq];
- if (!irq_level) {
- ioapic->irr &= ~mask;
- ret = 1;
- } else {
- int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+ ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
- if (irq == RTC_GSI && line_status &&
- rtc_irq_check_coalesced(ioapic)) {
- ret = 0; /* coalesced */
- goto out;
- }
- ioapic->irr |= mask;
- if ((edge && old_irr != ioapic->irr) ||
- (!edge && !entry.fields.remote_irr))
- ret = ioapic_service(ioapic, irq, line_status);
- else
- ret = 0; /* report coalesced interrupt */
- }
-out:
- trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
spin_unlock(&ioapic->lock);
return ret;
@@ -393,7 +423,7 @@
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
- if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+ if (ioapic->irr & (1 << i))
ioapic_service(ioapic, i, false);
}
}
@@ -594,9 +624,10 @@
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+ ioapic->irr = 0;
update_handled_vectors(ioapic);
kvm_vcpu_request_scan_ioapic(kvm);
- kvm_rtc_eoi_tracking_restore_all(ioapic);
+ kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(&ioapic->lock);
return 0;
}