Merge tag 'kvm-s390-20140325' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into kvm-next

3 fixes
- memory leak on certain SIGP conditions
- wrong size for idle bitmap (always too big)
- clear local interrupts on initial CPU reset

1 performance improvement
- improve performance with many guests on certain workloads
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4714f28..2cb1640 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -586,8 +586,8 @@
 
 4.24 KVM_CREATE_IRQCHIP
 
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM, arm64
+Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
+Architectures: x86, ia64, ARM, arm64, s390
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -596,7 +596,10 @@
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
 only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
-created.
+created. On s390, a dummy irq routing table is created.
+
+Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
+before KVM_CREATE_IRQCHIP can be used.
 
 
 4.25 KVM_IRQ_LINE
@@ -932,9 +935,9 @@
 
 4.37 KVM_ENABLE_CAP
 
-Capability: KVM_CAP_ENABLE_CAP
+Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
 Architectures: ppc, s390
-Type: vcpu ioctl
+Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
 Parameters: struct kvm_enable_cap (in)
 Returns: 0 on success; -1 on error
 
@@ -965,6 +968,8 @@
        __u8  pad[64];
 };
 
+The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl
+for vm-wide capabilities.
 
 4.38 KVM_GET_MP_STATE
 
@@ -1334,7 +1339,7 @@
 4.52 KVM_SET_GSI_ROUTING
 
 Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64
+Architectures: x86 ia64 s390
 Type: vm ioctl
 Parameters: struct kvm_irq_routing (in)
 Returns: 0 on success, -1 on error
@@ -1357,6 +1362,7 @@
 	union {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
+		struct kvm_irq_routing_s390_adapter adapter;
 		__u32 pad[8];
 	} u;
 };
@@ -1364,6 +1370,7 @@
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
 
 No flags are specified so far, the corresponding field must be set to zero.
 
@@ -1379,6 +1386,14 @@
 	__u32 pad;
 };
 
+struct kvm_irq_routing_s390_adapter {
+	__u64 ind_addr;
+	__u64 summary_addr;
+	__u64 ind_offset;
+	__u32 summary_offset;
+	__u32 adapter_id;
+};
+
 
 4.53 KVM_ASSIGN_SET_MSIX_NR
 
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index 410fa67..4ceef53 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -12,6 +12,7 @@
 - inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
 - purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
 - enable/disable for the guest transparent async page faults
+- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
 
 Groups:
   KVM_DEV_FLIC_ENQUEUE
@@ -44,3 +45,47 @@
     Disables async page faults for the guest and waits until already pending
     async page faults are done. This is necessary to trigger a completion interrupt
     for every init interrupt before migrating the interrupt list.
+
+  KVM_DEV_FLIC_ADAPTER_REGISTER
+    Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
+    describing the adapter to register:
+
+struct kvm_s390_io_adapter {
+	__u32 id;
+	__u8 isc;
+	__u8 maskable;
+	__u8 swap;
+	__u8 pad;
+};
+
+   id contains the unique id for the adapter, isc the I/O interruption subclass
+   to use, maskable whether this adapter may be masked (interrupts turned off)
+   and swap whether the indicators need to be byte swapped.
+
+
+  KVM_DEV_FLIC_ADAPTER_MODIFY
+    Modifies attributes of an existing I/O adapter interrupt source. Takes
+    a kvm_s390_io_adapter_req specifiying the adapter and the operation:
+
+struct kvm_s390_io_adapter_req {
+	__u32 id;
+	__u8 type;
+	__u8 mask;
+	__u16 pad0;
+	__u64 addr;
+};
+
+    id specifies the adapter and type the operation. The supported operations
+    are:
+
+    KVM_S390_IO_ADAPTER_MASK
+      mask or unmask the adapter, as specified in mask
+
+    KVM_S390_IO_ADAPTER_MAP
+      perform a gmap translation for the guest address provided in addr,
+      pin a userspace page for the translated address and add it to the
+      list of mappings
+
+    KVM_S390_IO_ADAPTER_UNMAP
+      release a userspace page for the translated address specified in addr
+      from the list of mappings
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index a995fce..060aaa6 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -30,16 +30,16 @@
 
 
 /* Special address that contains the comm page, used for reducing # of traps */
-#define KVM_GUEST_COMMPAGE_ADDR     0x0
+#define KVM_GUEST_COMMPAGE_ADDR		0x0
 
 #define KVM_GUEST_KERNEL_MODE(vcpu)	((kvm_read_c0_guest_status(vcpu->arch.cop0) & (ST0_EXL | ST0_ERL)) || \
 					((kvm_read_c0_guest_status(vcpu->arch.cop0) & KSU_USER) == 0))
 
-#define KVM_GUEST_KUSEG             0x00000000UL
-#define KVM_GUEST_KSEG0             0x40000000UL
-#define KVM_GUEST_KSEG23            0x60000000UL
-#define KVM_GUEST_KSEGX(a)          ((_ACAST32_(a)) & 0x60000000)
-#define KVM_GUEST_CPHYSADDR(a)      ((_ACAST32_(a)) & 0x1fffffff)
+#define KVM_GUEST_KUSEG			0x00000000UL
+#define KVM_GUEST_KSEG0			0x40000000UL
+#define KVM_GUEST_KSEG23		0x60000000UL
+#define KVM_GUEST_KSEGX(a)		((_ACAST32_(a)) & 0x60000000)
+#define KVM_GUEST_CPHYSADDR(a)		((_ACAST32_(a)) & 0x1fffffff)
 
 #define KVM_GUEST_CKSEG0ADDR(a)		(KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0)
 #define KVM_GUEST_CKSEG1ADDR(a)		(KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1)
@@ -52,17 +52,17 @@
 #define KVM_GUEST_KSEG1ADDR(a)		(KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1)
 #define KVM_GUEST_KSEG23ADDR(a)		(KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23)
 
-#define KVM_INVALID_PAGE            0xdeadbeef
-#define KVM_INVALID_INST            0xdeadbeef
-#define KVM_INVALID_ADDR            0xdeadbeef
+#define KVM_INVALID_PAGE		0xdeadbeef
+#define KVM_INVALID_INST		0xdeadbeef
+#define KVM_INVALID_ADDR		0xdeadbeef
 
-#define KVM_MALTA_GUEST_RTC_ADDR    0xb8000070UL
+#define KVM_MALTA_GUEST_RTC_ADDR	0xb8000070UL
 
-#define GUEST_TICKS_PER_JIFFY (40000000/HZ)
-#define MS_TO_NS(x) (x * 1E6L)
+#define GUEST_TICKS_PER_JIFFY		(40000000/HZ)
+#define MS_TO_NS(x)			(x * 1E6L)
 
-#define CAUSEB_DC       27
-#define CAUSEF_DC       (_ULCAST_(1)   << 27)
+#define CAUSEB_DC			27
+#define CAUSEF_DC			(_ULCAST_(1) << 27)
 
 struct kvm;
 struct kvm_run;
@@ -126,8 +126,8 @@
 	int commpage_tlb;
 };
 
-#define N_MIPS_COPROC_REGS      32
-#define N_MIPS_COPROC_SEL   	8
+#define N_MIPS_COPROC_REGS	32
+#define N_MIPS_COPROC_SEL	8
 
 struct mips_coproc {
 	unsigned long reg[N_MIPS_COPROC_REGS][N_MIPS_COPROC_SEL];
@@ -139,124 +139,124 @@
 /*
  * Coprocessor 0 register names
  */
-#define	MIPS_CP0_TLB_INDEX	    0
-#define	MIPS_CP0_TLB_RANDOM	    1
-#define	MIPS_CP0_TLB_LOW	    2
-#define	MIPS_CP0_TLB_LO0	    2
-#define	MIPS_CP0_TLB_LO1	    3
-#define	MIPS_CP0_TLB_CONTEXT	4
-#define	MIPS_CP0_TLB_PG_MASK	5
-#define	MIPS_CP0_TLB_WIRED	    6
-#define	MIPS_CP0_HWRENA 	    7
-#define	MIPS_CP0_BAD_VADDR	    8
-#define	MIPS_CP0_COUNT	        9
-#define	MIPS_CP0_TLB_HI	        10
-#define	MIPS_CP0_COMPARE	    11
-#define	MIPS_CP0_STATUS	        12
-#define	MIPS_CP0_CAUSE	        13
-#define	MIPS_CP0_EXC_PC	        14
-#define	MIPS_CP0_PRID		    15
-#define	MIPS_CP0_CONFIG	        16
-#define	MIPS_CP0_LLADDR	        17
-#define	MIPS_CP0_WATCH_LO	    18
-#define	MIPS_CP0_WATCH_HI	    19
-#define	MIPS_CP0_TLB_XCONTEXT   20
-#define	MIPS_CP0_ECC		    26
-#define	MIPS_CP0_CACHE_ERR	    27
-#define	MIPS_CP0_TAG_LO	        28
-#define	MIPS_CP0_TAG_HI	        29
-#define	MIPS_CP0_ERROR_PC	    30
-#define	MIPS_CP0_DEBUG	        23
-#define	MIPS_CP0_DEPC		    24
-#define	MIPS_CP0_PERFCNT	    25
-#define	MIPS_CP0_ERRCTL         26
-#define	MIPS_CP0_DATA_LO	    28
-#define	MIPS_CP0_DATA_HI	    29
-#define	MIPS_CP0_DESAVE	        31
+#define MIPS_CP0_TLB_INDEX	0
+#define MIPS_CP0_TLB_RANDOM	1
+#define MIPS_CP0_TLB_LOW	2
+#define MIPS_CP0_TLB_LO0	2
+#define MIPS_CP0_TLB_LO1	3
+#define MIPS_CP0_TLB_CONTEXT	4
+#define MIPS_CP0_TLB_PG_MASK	5
+#define MIPS_CP0_TLB_WIRED	6
+#define MIPS_CP0_HWRENA		7
+#define MIPS_CP0_BAD_VADDR	8
+#define MIPS_CP0_COUNT		9
+#define MIPS_CP0_TLB_HI		10
+#define MIPS_CP0_COMPARE	11
+#define MIPS_CP0_STATUS		12
+#define MIPS_CP0_CAUSE		13
+#define MIPS_CP0_EXC_PC		14
+#define MIPS_CP0_PRID		15
+#define MIPS_CP0_CONFIG		16
+#define MIPS_CP0_LLADDR		17
+#define MIPS_CP0_WATCH_LO	18
+#define MIPS_CP0_WATCH_HI	19
+#define MIPS_CP0_TLB_XCONTEXT	20
+#define MIPS_CP0_ECC		26
+#define MIPS_CP0_CACHE_ERR	27
+#define MIPS_CP0_TAG_LO		28
+#define MIPS_CP0_TAG_HI		29
+#define MIPS_CP0_ERROR_PC	30
+#define MIPS_CP0_DEBUG		23
+#define MIPS_CP0_DEPC		24
+#define MIPS_CP0_PERFCNT	25
+#define MIPS_CP0_ERRCTL		26
+#define MIPS_CP0_DATA_LO	28
+#define MIPS_CP0_DATA_HI	29
+#define MIPS_CP0_DESAVE		31
 
-#define MIPS_CP0_CONFIG_SEL	    0
-#define MIPS_CP0_CONFIG1_SEL    1
-#define MIPS_CP0_CONFIG2_SEL    2
-#define MIPS_CP0_CONFIG3_SEL    3
+#define MIPS_CP0_CONFIG_SEL	0
+#define MIPS_CP0_CONFIG1_SEL	1
+#define MIPS_CP0_CONFIG2_SEL	2
+#define MIPS_CP0_CONFIG3_SEL	3
 
 /* Config0 register bits */
-#define CP0C0_M    31
-#define CP0C0_K23  28
-#define CP0C0_KU   25
-#define CP0C0_MDU  20
-#define CP0C0_MM   17
-#define CP0C0_BM   16
-#define CP0C0_BE   15
-#define CP0C0_AT   13
-#define CP0C0_AR   10
-#define CP0C0_MT   7
-#define CP0C0_VI   3
-#define CP0C0_K0   0
+#define CP0C0_M			31
+#define CP0C0_K23		28
+#define CP0C0_KU		25
+#define CP0C0_MDU		20
+#define CP0C0_MM		17
+#define CP0C0_BM		16
+#define CP0C0_BE		15
+#define CP0C0_AT		13
+#define CP0C0_AR		10
+#define CP0C0_MT		7
+#define CP0C0_VI		3
+#define CP0C0_K0		0
 
 /* Config1 register bits */
-#define CP0C1_M    31
-#define CP0C1_MMU  25
-#define CP0C1_IS   22
-#define CP0C1_IL   19
-#define CP0C1_IA   16
-#define CP0C1_DS   13
-#define CP0C1_DL   10
-#define CP0C1_DA   7
-#define CP0C1_C2   6
-#define CP0C1_MD   5
-#define CP0C1_PC   4
-#define CP0C1_WR   3
-#define CP0C1_CA   2
-#define CP0C1_EP   1
-#define CP0C1_FP   0
+#define CP0C1_M			31
+#define CP0C1_MMU		25
+#define CP0C1_IS		22
+#define CP0C1_IL		19
+#define CP0C1_IA		16
+#define CP0C1_DS		13
+#define CP0C1_DL		10
+#define CP0C1_DA		7
+#define CP0C1_C2		6
+#define CP0C1_MD		5
+#define CP0C1_PC		4
+#define CP0C1_WR		3
+#define CP0C1_CA		2
+#define CP0C1_EP		1
+#define CP0C1_FP		0
 
 /* Config2 Register bits */
-#define CP0C2_M    31
-#define CP0C2_TU   28
-#define CP0C2_TS   24
-#define CP0C2_TL   20
-#define CP0C2_TA   16
-#define CP0C2_SU   12
-#define CP0C2_SS   8
-#define CP0C2_SL   4
-#define CP0C2_SA   0
+#define CP0C2_M			31
+#define CP0C2_TU		28
+#define CP0C2_TS		24
+#define CP0C2_TL		20
+#define CP0C2_TA		16
+#define CP0C2_SU		12
+#define CP0C2_SS		8
+#define CP0C2_SL		4
+#define CP0C2_SA		0
 
 /* Config3 Register bits */
-#define CP0C3_M    31
-#define CP0C3_ISA_ON_EXC 16
-#define CP0C3_ULRI  13
-#define CP0C3_DSPP 10
-#define CP0C3_LPA  7
-#define CP0C3_VEIC 6
-#define CP0C3_VInt 5
-#define CP0C3_SP   4
-#define CP0C3_MT   2
-#define CP0C3_SM   1
-#define CP0C3_TL   0
+#define CP0C3_M			31
+#define CP0C3_ISA_ON_EXC	16
+#define CP0C3_ULRI		13
+#define CP0C3_DSPP		10
+#define CP0C3_LPA		7
+#define CP0C3_VEIC		6
+#define CP0C3_VInt		5
+#define CP0C3_SP		4
+#define CP0C3_MT		2
+#define CP0C3_SM		1
+#define CP0C3_TL		0
 
 /* Have config1, Cacheable, noncoherent, write-back, write allocate*/
-#define MIPS_CONFIG0                                              \
+#define MIPS_CONFIG0						\
   ((1 << CP0C0_M) | (0x3 << CP0C0_K0))
 
 /* Have config2, no coprocessor2 attached, no MDMX support attached,
    no performance counters, watch registers present,
    no code compression, EJTAG present, no FPU, no watch registers */
-#define MIPS_CONFIG1                                              \
-((1 << CP0C1_M) |                                                 \
- (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) |            \
- (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) |            \
+#define MIPS_CONFIG1						\
+((1 << CP0C1_M) |						\
+ (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) |		\
+ (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) |		\
  (0 << CP0C1_FP))
 
 /* Have config3, no tertiary/secondary caches implemented */
-#define MIPS_CONFIG2                                              \
+#define MIPS_CONFIG2						\
 ((1 << CP0C2_M))
 
 /* No config4, no DSP ASE, no large physaddr (PABITS),
    no external interrupt controller, no vectored interrupts,
    no 1kb pages, no SmartMIPS ASE, no trace logic */
-#define MIPS_CONFIG3                                              \
-((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) |          \
- (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) |        \
+#define MIPS_CONFIG3						\
+((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) |	\
+ (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) |	\
  (0 << CP0C3_SM) | (0 << CP0C3_TL))
 
 /* MMU types, the first four entries have the same layout as the
@@ -274,36 +274,36 @@
 /*
  * Trap codes
  */
-#define T_INT           0	/* Interrupt pending */
-#define T_TLB_MOD       1	/* TLB modified fault */
-#define T_TLB_LD_MISS       2	/* TLB miss on load or ifetch */
-#define T_TLB_ST_MISS       3	/* TLB miss on a store */
-#define T_ADDR_ERR_LD       4	/* Address error on a load or ifetch */
-#define T_ADDR_ERR_ST       5	/* Address error on a store */
-#define T_BUS_ERR_IFETCH    6	/* Bus error on an ifetch */
-#define T_BUS_ERR_LD_ST     7	/* Bus error on a load or store */
-#define T_SYSCALL       8	/* System call */
-#define T_BREAK         9	/* Breakpoint */
-#define T_RES_INST      10	/* Reserved instruction exception */
-#define T_COP_UNUSABLE      11	/* Coprocessor unusable */
-#define T_OVFLOW        12	/* Arithmetic overflow */
+#define T_INT			0	/* Interrupt pending */
+#define T_TLB_MOD		1	/* TLB modified fault */
+#define T_TLB_LD_MISS		2	/* TLB miss on load or ifetch */
+#define T_TLB_ST_MISS		3	/* TLB miss on a store */
+#define T_ADDR_ERR_LD		4	/* Address error on a load or ifetch */
+#define T_ADDR_ERR_ST		5	/* Address error on a store */
+#define T_BUS_ERR_IFETCH	6	/* Bus error on an ifetch */
+#define T_BUS_ERR_LD_ST		7	/* Bus error on a load or store */
+#define T_SYSCALL		8	/* System call */
+#define T_BREAK			9	/* Breakpoint */
+#define T_RES_INST		10	/* Reserved instruction exception */
+#define T_COP_UNUSABLE		11	/* Coprocessor unusable */
+#define T_OVFLOW		12	/* Arithmetic overflow */
 
 /*
  * Trap definitions added for r4000 port.
  */
-#define T_TRAP          13	/* Trap instruction */
-#define T_VCEI          14	/* Virtual coherency exception */
-#define T_FPE           15	/* Floating point exception */
-#define T_WATCH         23	/* Watch address reference */
-#define T_VCED          31	/* Virtual coherency data */
+#define T_TRAP			13	/* Trap instruction */
+#define T_VCEI			14	/* Virtual coherency exception */
+#define T_FPE			15	/* Floating point exception */
+#define T_WATCH			23	/* Watch address reference */
+#define T_VCED			31	/* Virtual coherency data */
 
 /* Resume Flags */
-#define RESUME_FLAG_DR          (1<<0)	/* Reload guest nonvolatile state? */
-#define RESUME_FLAG_HOST        (1<<1)	/* Resume host? */
+#define RESUME_FLAG_DR		(1<<0)	/* Reload guest nonvolatile state? */
+#define RESUME_FLAG_HOST	(1<<1)	/* Resume host? */
 
-#define RESUME_GUEST            0
-#define RESUME_GUEST_DR         RESUME_FLAG_DR
-#define RESUME_HOST             RESUME_FLAG_HOST
+#define RESUME_GUEST		0
+#define RESUME_GUEST_DR		RESUME_FLAG_DR
+#define RESUME_HOST		RESUME_FLAG_HOST
 
 enum emulation_result {
 	EMULATE_DONE,		/* no further processing */
@@ -313,24 +313,27 @@
 	EMULATE_PRIV_FAIL,
 };
 
-#define MIPS3_PG_G  0x00000001	/* Global; ignore ASID if in lo0 & lo1 */
-#define MIPS3_PG_V  0x00000002	/* Valid */
-#define MIPS3_PG_NV 0x00000000
-#define MIPS3_PG_D  0x00000004	/* Dirty */
+#define MIPS3_PG_G	0x00000001 /* Global; ignore ASID if in lo0 & lo1 */
+#define MIPS3_PG_V	0x00000002 /* Valid */
+#define MIPS3_PG_NV	0x00000000
+#define MIPS3_PG_D	0x00000004 /* Dirty */
 
 #define mips3_paddr_to_tlbpfn(x) \
-    (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME)
+	(((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME)
 #define mips3_tlbpfn_to_paddr(x) \
-    ((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT)
+	((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT)
 
-#define MIPS3_PG_SHIFT      6
-#define MIPS3_PG_FRAME      0x3fffffc0
+#define MIPS3_PG_SHIFT		6
+#define MIPS3_PG_FRAME		0x3fffffc0
 
-#define VPN2_MASK           0xffffe000
-#define TLB_IS_GLOBAL(x)    (((x).tlb_lo0 & MIPS3_PG_G) && ((x).tlb_lo1 & MIPS3_PG_G))
-#define TLB_VPN2(x)         ((x).tlb_hi & VPN2_MASK)
-#define TLB_ASID(x)         ((x).tlb_hi & ASID_MASK)
-#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) ? ((x).tlb_lo1 & MIPS3_PG_V) : ((x).tlb_lo0 & MIPS3_PG_V))
+#define VPN2_MASK		0xffffe000
+#define TLB_IS_GLOBAL(x)	(((x).tlb_lo0 & MIPS3_PG_G) &&	\
+				 ((x).tlb_lo1 & MIPS3_PG_G))
+#define TLB_VPN2(x)		((x).tlb_hi & VPN2_MASK)
+#define TLB_ASID(x)		((x).tlb_hi & ASID_MASK)
+#define TLB_IS_VALID(x, va)	(((va) & (1 << PAGE_SHIFT))	\
+				 ? ((x).tlb_lo1 & MIPS3_PG_V)	\
+				 : ((x).tlb_lo0 & MIPS3_PG_V))
 
 struct kvm_mips_tlb {
 	long tlb_mask;
@@ -339,7 +342,7 @@
 	long tlb_lo1;
 };
 
-#define KVM_MIPS_GUEST_TLB_SIZE     64
+#define KVM_MIPS_GUEST_TLB_SIZE	64
 struct kvm_vcpu_arch {
 	void *host_ebase, *guest_ebase;
 	unsigned long host_stack;
@@ -400,65 +403,67 @@
 };
 
 
-#define kvm_read_c0_guest_index(cop0)               (cop0->reg[MIPS_CP0_TLB_INDEX][0])
-#define kvm_write_c0_guest_index(cop0, val)         (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
-#define kvm_read_c0_guest_entrylo0(cop0)            (cop0->reg[MIPS_CP0_TLB_LO0][0])
-#define kvm_read_c0_guest_entrylo1(cop0)            (cop0->reg[MIPS_CP0_TLB_LO1][0])
-#define kvm_read_c0_guest_context(cop0)             (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
-#define kvm_write_c0_guest_context(cop0, val)       (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
-#define kvm_read_c0_guest_userlocal(cop0)           (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
-#define kvm_read_c0_guest_pagemask(cop0)            (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
-#define kvm_write_c0_guest_pagemask(cop0, val)      (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
-#define kvm_read_c0_guest_wired(cop0)               (cop0->reg[MIPS_CP0_TLB_WIRED][0])
-#define kvm_write_c0_guest_wired(cop0, val)         (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
-#define kvm_read_c0_guest_badvaddr(cop0)            (cop0->reg[MIPS_CP0_BAD_VADDR][0])
-#define kvm_write_c0_guest_badvaddr(cop0, val)      (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
-#define kvm_read_c0_guest_count(cop0)               (cop0->reg[MIPS_CP0_COUNT][0])
-#define kvm_write_c0_guest_count(cop0, val)         (cop0->reg[MIPS_CP0_COUNT][0] = (val))
-#define kvm_read_c0_guest_entryhi(cop0)             (cop0->reg[MIPS_CP0_TLB_HI][0])
-#define kvm_write_c0_guest_entryhi(cop0, val)       (cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
-#define kvm_read_c0_guest_compare(cop0)             (cop0->reg[MIPS_CP0_COMPARE][0])
-#define kvm_write_c0_guest_compare(cop0, val)       (cop0->reg[MIPS_CP0_COMPARE][0] = (val))
-#define kvm_read_c0_guest_status(cop0)              (cop0->reg[MIPS_CP0_STATUS][0])
-#define kvm_write_c0_guest_status(cop0, val)        (cop0->reg[MIPS_CP0_STATUS][0] = (val))
-#define kvm_read_c0_guest_intctl(cop0)              (cop0->reg[MIPS_CP0_STATUS][1])
-#define kvm_write_c0_guest_intctl(cop0, val)        (cop0->reg[MIPS_CP0_STATUS][1] = (val))
-#define kvm_read_c0_guest_cause(cop0)               (cop0->reg[MIPS_CP0_CAUSE][0])
-#define kvm_write_c0_guest_cause(cop0, val)         (cop0->reg[MIPS_CP0_CAUSE][0] = (val))
-#define kvm_read_c0_guest_epc(cop0)                 (cop0->reg[MIPS_CP0_EXC_PC][0])
-#define kvm_write_c0_guest_epc(cop0, val)           (cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
-#define kvm_read_c0_guest_prid(cop0)                (cop0->reg[MIPS_CP0_PRID][0])
-#define kvm_write_c0_guest_prid(cop0, val)          (cop0->reg[MIPS_CP0_PRID][0] = (val))
-#define kvm_read_c0_guest_ebase(cop0)               (cop0->reg[MIPS_CP0_PRID][1])
-#define kvm_write_c0_guest_ebase(cop0, val)         (cop0->reg[MIPS_CP0_PRID][1] = (val))
-#define kvm_read_c0_guest_config(cop0)              (cop0->reg[MIPS_CP0_CONFIG][0])
-#define kvm_read_c0_guest_config1(cop0)             (cop0->reg[MIPS_CP0_CONFIG][1])
-#define kvm_read_c0_guest_config2(cop0)             (cop0->reg[MIPS_CP0_CONFIG][2])
-#define kvm_read_c0_guest_config3(cop0)             (cop0->reg[MIPS_CP0_CONFIG][3])
-#define kvm_read_c0_guest_config7(cop0)             (cop0->reg[MIPS_CP0_CONFIG][7])
-#define kvm_write_c0_guest_config(cop0, val)        (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
-#define kvm_write_c0_guest_config1(cop0, val)       (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
-#define kvm_write_c0_guest_config2(cop0, val)       (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
-#define kvm_write_c0_guest_config3(cop0, val)       (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
-#define kvm_write_c0_guest_config7(cop0, val)       (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
-#define kvm_read_c0_guest_errorepc(cop0)            (cop0->reg[MIPS_CP0_ERROR_PC][0])
-#define kvm_write_c0_guest_errorepc(cop0, val)      (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
+#define kvm_read_c0_guest_index(cop0)		(cop0->reg[MIPS_CP0_TLB_INDEX][0])
+#define kvm_write_c0_guest_index(cop0, val)	(cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
+#define kvm_read_c0_guest_entrylo0(cop0)	(cop0->reg[MIPS_CP0_TLB_LO0][0])
+#define kvm_read_c0_guest_entrylo1(cop0)	(cop0->reg[MIPS_CP0_TLB_LO1][0])
+#define kvm_read_c0_guest_context(cop0)		(cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
+#define kvm_write_c0_guest_context(cop0, val)	(cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
+#define kvm_read_c0_guest_userlocal(cop0)	(cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
+#define kvm_read_c0_guest_pagemask(cop0)	(cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
+#define kvm_write_c0_guest_pagemask(cop0, val)	(cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
+#define kvm_read_c0_guest_wired(cop0)		(cop0->reg[MIPS_CP0_TLB_WIRED][0])
+#define kvm_write_c0_guest_wired(cop0, val)	(cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
+#define kvm_read_c0_guest_hwrena(cop0)		(cop0->reg[MIPS_CP0_HWRENA][0])
+#define kvm_write_c0_guest_hwrena(cop0, val)	(cop0->reg[MIPS_CP0_HWRENA][0] = (val))
+#define kvm_read_c0_guest_badvaddr(cop0)	(cop0->reg[MIPS_CP0_BAD_VADDR][0])
+#define kvm_write_c0_guest_badvaddr(cop0, val)	(cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
+#define kvm_read_c0_guest_count(cop0)		(cop0->reg[MIPS_CP0_COUNT][0])
+#define kvm_write_c0_guest_count(cop0, val)	(cop0->reg[MIPS_CP0_COUNT][0] = (val))
+#define kvm_read_c0_guest_entryhi(cop0)		(cop0->reg[MIPS_CP0_TLB_HI][0])
+#define kvm_write_c0_guest_entryhi(cop0, val)	(cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
+#define kvm_read_c0_guest_compare(cop0)		(cop0->reg[MIPS_CP0_COMPARE][0])
+#define kvm_write_c0_guest_compare(cop0, val)	(cop0->reg[MIPS_CP0_COMPARE][0] = (val))
+#define kvm_read_c0_guest_status(cop0)		(cop0->reg[MIPS_CP0_STATUS][0])
+#define kvm_write_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] = (val))
+#define kvm_read_c0_guest_intctl(cop0)		(cop0->reg[MIPS_CP0_STATUS][1])
+#define kvm_write_c0_guest_intctl(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][1] = (val))
+#define kvm_read_c0_guest_cause(cop0)		(cop0->reg[MIPS_CP0_CAUSE][0])
+#define kvm_write_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] = (val))
+#define kvm_read_c0_guest_epc(cop0)		(cop0->reg[MIPS_CP0_EXC_PC][0])
+#define kvm_write_c0_guest_epc(cop0, val)	(cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
+#define kvm_read_c0_guest_prid(cop0)		(cop0->reg[MIPS_CP0_PRID][0])
+#define kvm_write_c0_guest_prid(cop0, val)	(cop0->reg[MIPS_CP0_PRID][0] = (val))
+#define kvm_read_c0_guest_ebase(cop0)		(cop0->reg[MIPS_CP0_PRID][1])
+#define kvm_write_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] = (val))
+#define kvm_read_c0_guest_config(cop0)		(cop0->reg[MIPS_CP0_CONFIG][0])
+#define kvm_read_c0_guest_config1(cop0)		(cop0->reg[MIPS_CP0_CONFIG][1])
+#define kvm_read_c0_guest_config2(cop0)		(cop0->reg[MIPS_CP0_CONFIG][2])
+#define kvm_read_c0_guest_config3(cop0)		(cop0->reg[MIPS_CP0_CONFIG][3])
+#define kvm_read_c0_guest_config7(cop0)		(cop0->reg[MIPS_CP0_CONFIG][7])
+#define kvm_write_c0_guest_config(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][0] = (val))
+#define kvm_write_c0_guest_config1(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][1] = (val))
+#define kvm_write_c0_guest_config2(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][2] = (val))
+#define kvm_write_c0_guest_config3(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][3] = (val))
+#define kvm_write_c0_guest_config7(cop0, val)	(cop0->reg[MIPS_CP0_CONFIG][7] = (val))
+#define kvm_read_c0_guest_errorepc(cop0)	(cop0->reg[MIPS_CP0_ERROR_PC][0])
+#define kvm_write_c0_guest_errorepc(cop0, val)	(cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
 
-#define kvm_set_c0_guest_status(cop0, val)          (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
-#define kvm_clear_c0_guest_status(cop0, val)        (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
-#define kvm_set_c0_guest_cause(cop0, val)           (cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
-#define kvm_clear_c0_guest_cause(cop0, val)         (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
-#define kvm_change_c0_guest_cause(cop0, change, val)  \
-{                                                     \
-    kvm_clear_c0_guest_cause(cop0, change);           \
-    kvm_set_c0_guest_cause(cop0, ((val) & (change))); \
+#define kvm_set_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] |= (val))
+#define kvm_clear_c0_guest_status(cop0, val)	(cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
+#define kvm_set_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
+#define kvm_clear_c0_guest_cause(cop0, val)	(cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
+#define kvm_change_c0_guest_cause(cop0, change, val)			\
+{									\
+	kvm_clear_c0_guest_cause(cop0, change);				\
+	kvm_set_c0_guest_cause(cop0, ((val) & (change)));		\
 }
-#define kvm_set_c0_guest_ebase(cop0, val)           (cop0->reg[MIPS_CP0_PRID][1] |= (val))
-#define kvm_clear_c0_guest_ebase(cop0, val)         (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
-#define kvm_change_c0_guest_ebase(cop0, change, val)  \
-{                                                     \
-    kvm_clear_c0_guest_ebase(cop0, change);           \
-    kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \
+#define kvm_set_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] |= (val))
+#define kvm_clear_c0_guest_ebase(cop0, val)	(cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
+#define kvm_change_c0_guest_ebase(cop0, change, val)			\
+{									\
+	kvm_clear_c0_guest_ebase(cop0, change);				\
+	kvm_set_c0_guest_ebase(cop0, ((val) & (change)));		\
 }
 
 
diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c
index 4b6274b..e3fec99 100644
--- a/arch/mips/kvm/kvm_mips_emul.c
+++ b/arch/mips/kvm/kvm_mips_emul.c
@@ -436,13 +436,6 @@
 	sel = inst & 0x7;
 	co_bit = (inst >> 25) & 1;
 
-	/* Verify that the register is valid */
-	if (rd > MIPS_CP0_DESAVE) {
-		printk("Invalid rd: %d\n", rd);
-		er = EMULATE_FAIL;
-		goto done;
-	}
-
 	if (co_bit) {
 		op = (inst) & 0xff;
 
@@ -1542,8 +1535,15 @@
 	}
 
 	if ((inst & OPCODE) == SPEC3 && (inst & FUNC) == RDHWR) {
+		int usermode = !KVM_GUEST_KERNEL_MODE(vcpu);
 		int rd = (inst & RD) >> 11;
 		int rt = (inst & RT) >> 16;
+		/* If usermode, check RDHWR rd is allowed by guest HWREna */
+		if (usermode && !(kvm_read_c0_guest_hwrena(cop0) & BIT(rd))) {
+			kvm_debug("RDHWR %#x disallowed by HWREna @ %p\n",
+				  rd, opc);
+			goto emulate_ri;
+		}
 		switch (rd) {
 		case 0:	/* CPU number */
 			arch->gprs[rt] = 0;
@@ -1567,31 +1567,27 @@
 			}
 			break;
 		case 29:
-#if 1
 			arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0);
-#else
-			/* UserLocal not implemented */
-			er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
-#endif
 			break;
 
 		default:
-			printk("RDHWR not supported\n");
-			er = EMULATE_FAIL;
-			break;
+			kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc);
+			goto emulate_ri;
 		}
 	} else {
-		printk("Emulate RI not supported @ %p: %#x\n", opc, inst);
-		er = EMULATE_FAIL;
+		kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst);
+		goto emulate_ri;
 	}
 
+	return EMULATE_DONE;
+
+emulate_ri:
 	/*
-	 * Rollback PC only if emulation was unsuccessful
+	 * Rollback PC (if in branch delay slot then the PC already points to
+	 * branch target), and pass the RI exception to the guest OS.
 	 */
-	if (er == EMULATE_FAIL) {
-		vcpu->arch.pc = curr_pc;
-	}
-	return er;
+	vcpu->arch.pc = curr_pc;
+	return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
 }
 
 enum emulation_result
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c36cd35..68897fc 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -19,10 +19,19 @@
 #include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
+#include <asm/isc.h>
 
 #define KVM_MAX_VCPUS 64
 #define KVM_USER_MEM_SLOTS 32
 
+/*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 4096
+
 struct sca_entry {
 	atomic_t scn;
 	__u32	reserved;
@@ -244,6 +253,27 @@
 struct kvm_arch_memory_slot {
 };
 
+struct s390_map_info {
+	struct list_head list;
+	__u64 guest_addr;
+	__u64 addr;
+	struct page *page;
+};
+
+struct s390_io_adapter {
+	unsigned int id;
+	int isc;
+	bool maskable;
+	bool masked;
+	bool swap;
+	struct rw_semaphore maps_lock;
+	struct list_head maps;
+	atomic_t nr_maps;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
 struct kvm_arch{
 	struct sca_block *sca;
 	debug_info_t *dbf;
@@ -251,6 +281,8 @@
 	struct kvm_device *flic;
 	struct gmap *gmap;
 	int css_support;
+	int use_irqchip;
+	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 2f0ade2..c003c6a 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -22,6 +22,8 @@
 #define KVM_DEV_FLIC_CLEAR_IRQS		3
 #define KVM_DEV_FLIC_APF_ENABLE		4
 #define KVM_DEV_FLIC_APF_DISABLE_WAIT	5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER	6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY	7
 /*
  * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
  * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -32,6 +34,26 @@
 #define KVM_S390_MAX_FLOAT_IRQS	266250
 #define KVM_S390_FLIC_MAX_BUFFER	0x2000000
 
+struct kvm_s390_io_adapter {
+	__u32 id;
+	__u8 isc;
+	__u8 maskable;
+	__u8 swap;
+	__u8 pad;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+	__u32 id;
+	__u8 type;
+	__u8 mask;
+	__u16 pad0;
+	__u64 addr;
+};
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 	/* general purpose regs for s390 */
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index c8bacbc..10d529a 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -25,6 +25,8 @@
 	select HAVE_KVM_EVENTFD
 	select KVM_ASYNC_PF
 	select KVM_ASYNC_PF_SYNC
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index a47d2c3..d3adb37 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 05bffd7..200a8f9 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
 /*
  * handling kvm guest interrupts
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2014
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <asm/asm-offsets.h>
@@ -1068,6 +1069,171 @@
 	return r;
 }
 
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+	if (id >= MAX_S390_IO_ADAPTERS)
+		return NULL;
+	return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+			       struct kvm_device_attr *attr)
+{
+	struct s390_io_adapter *adapter;
+	struct kvm_s390_io_adapter adapter_info;
+
+	if (copy_from_user(&adapter_info,
+			   (void __user *)attr->addr, sizeof(adapter_info)))
+		return -EFAULT;
+
+	if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
+	    (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+		return -EINVAL;
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&adapter->maps);
+	init_rwsem(&adapter->maps_lock);
+	atomic_set(&adapter->nr_maps, 0);
+	adapter->id = adapter_info.id;
+	adapter->isc = adapter_info.isc;
+	adapter->maskable = adapter_info.maskable;
+	adapter->masked = false;
+	adapter->swap = adapter_info.swap;
+	dev->kvm->arch.adapters[adapter->id] = adapter;
+
+	return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+	int ret;
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+	if (!adapter || !adapter->maskable)
+		return -EINVAL;
+	ret = adapter->masked;
+	adapter->masked = masked;
+	return ret;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+	struct s390_map_info *map;
+	int ret;
+
+	if (!adapter || !addr)
+		return -EINVAL;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	INIT_LIST_HEAD(&map->list);
+	map->guest_addr = addr;
+	map->addr = gmap_translate(addr, kvm->arch.gmap);
+	if (map->addr == -EFAULT) {
+		ret = -EFAULT;
+		goto out;
+	}
+	ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret != 1);
+	down_write(&adapter->maps_lock);
+	if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
+		list_add_tail(&map->list, &adapter->maps);
+		ret = 0;
+	} else {
+		put_page(map->page);
+		ret = -EINVAL;
+	}
+	up_write(&adapter->maps_lock);
+out:
+	if (ret)
+		kfree(map);
+	return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+	struct s390_map_info *map, *tmp;
+	int found = 0;
+
+	if (!adapter || !addr)
+		return -EINVAL;
+
+	down_write(&adapter->maps_lock);
+	list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+		if (map->guest_addr == addr) {
+			found = 1;
+			atomic_dec(&adapter->nr_maps);
+			list_del(&map->list);
+			put_page(map->page);
+			kfree(map);
+			break;
+		}
+	}
+	up_write(&adapter->maps_lock);
+
+	return found ? 0 : -EINVAL;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+	int i;
+	struct s390_map_info *map, *tmp;
+
+	for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+		if (!kvm->arch.adapters[i])
+			continue;
+		list_for_each_entry_safe(map, tmp,
+					 &kvm->arch.adapters[i]->maps, list) {
+			list_del(&map->list);
+			put_page(map->page);
+			kfree(map);
+		}
+		kfree(kvm->arch.adapters[i]);
+	}
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	struct kvm_s390_io_adapter_req req;
+	struct s390_io_adapter *adapter;
+	int ret;
+
+	if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+		return -EFAULT;
+
+	adapter = get_io_adapter(dev->kvm, req.id);
+	if (!adapter)
+		return -EINVAL;
+	switch (req.type) {
+	case KVM_S390_IO_ADAPTER_MASK:
+		ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+		if (ret > 0)
+			ret = 0;
+		break;
+	case KVM_S390_IO_ADAPTER_MAP:
+		ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
+		break;
+	case KVM_S390_IO_ADAPTER_UNMAP:
+		ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
 static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	int r = 0;
@@ -1096,6 +1262,12 @@
 		kvm_for_each_vcpu(i, vcpu, dev->kvm)
 			kvm_clear_async_pf_completion_queue(vcpu);
 		break;
+	case KVM_DEV_FLIC_ADAPTER_REGISTER:
+		r = register_io_adapter(dev, attr);
+		break;
+	case KVM_DEV_FLIC_ADAPTER_MODIFY:
+		r = modify_io_adapter(dev, attr);
+		break;
 	default:
 		r = -EINVAL;
 	}
@@ -1127,3 +1299,123 @@
 	.create = flic_create,
 	.destroy = flic_destroy,
 };
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+	unsigned long bit;
+
+	bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+	return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+					  u64 addr)
+{
+	struct s390_map_info *map;
+
+	if (!adapter)
+		return NULL;
+
+	list_for_each_entry(map, &adapter->maps, list) {
+		if (map->guest_addr == addr)
+			return map;
+	}
+	return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+				  struct s390_io_adapter *adapter,
+				  struct kvm_s390_adapter_int *adapter_int)
+{
+	unsigned long bit;
+	int summary_set, idx;
+	struct s390_map_info *info;
+	void *map;
+
+	info = get_map_info(adapter, adapter_int->ind_addr);
+	if (!info)
+		return -1;
+	map = page_address(info->page);
+	bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+	set_bit(bit, map);
+	idx = srcu_read_lock(&kvm->srcu);
+	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(info->page);
+	info = get_map_info(adapter, adapter_int->summary_addr);
+	if (!info) {
+		srcu_read_unlock(&kvm->srcu, idx);
+		return -1;
+	}
+	map = page_address(info->page);
+	bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+			  adapter->swap);
+	summary_set = test_and_set_bit(bit, map);
+	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(info->page);
+	srcu_read_unlock(&kvm->srcu, idx);
+	return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+			   struct kvm *kvm, int irq_source_id, int level,
+			   bool line_status)
+{
+	int ret;
+	struct s390_io_adapter *adapter;
+
+	/* We're only interested in the 0->1 transition. */
+	if (!level)
+		return 0;
+	adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+	if (!adapter)
+		return -1;
+	down_read(&adapter->maps_lock);
+	ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+	up_read(&adapter->maps_lock);
+	if ((ret > 0) && !adapter->masked) {
+		struct kvm_s390_interrupt s390int = {
+			.type = KVM_S390_INT_IO(1, 0, 0, 0),
+			.parm = 0,
+			.parm64 = (adapter->isc << 27) | 0x80000000,
+		};
+		ret = kvm_s390_inject_vm(kvm, &s390int);
+		if (ret == 0)
+			ret = 1;
+	}
+	return ret;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int ret;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_S390_ADAPTER:
+		e->set = set_adapter_int;
+		e->adapter.summary_addr = ue->u.adapter.summary_addr;
+		e->adapter.ind_addr = ue->u.adapter.ind_addr;
+		e->adapter.summary_offset = ue->u.adapter.summary_offset;
+		e->adapter.ind_offset = ue->u.adapter.ind_offset;
+		e->adapter.adapter_id = ue->u.adapter.adapter_id;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+		int irq_source_id, int level, bool line_status)
+{
+	return -EINVAL;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 0000000..d98e415
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 83b7944..6e1b990 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -159,6 +159,7 @@
 	case KVM_CAP_S390_CSS_SUPPORT:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
+	case KVM_CAP_ENABLE_CAP_VM:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -187,6 +188,25 @@
 	return 0;
 }
 
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_S390_IRQCHIP:
+		kvm->arch.use_irqchip = 1;
+		r = 0;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -204,6 +224,26 @@
 		r = kvm_s390_inject_vm(kvm, &s390int);
 		break;
 	}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			break;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
+	case KVM_CREATE_IRQCHIP: {
+		struct kvm_irq_routing_entry routing;
+
+		r = -EINVAL;
+		if (kvm->arch.use_irqchip) {
+			/* Set up dummy routing. */
+			memset(&routing, 0, sizeof(routing));
+			kvm_set_irq_routing(kvm, &routing, 0, 0);
+			r = 0;
+		}
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -265,6 +305,7 @@
 	}
 
 	kvm->arch.css_support = 0;
+	kvm->arch.use_irqchip = 0;
 
 	return 0;
 out_nogmap:
@@ -324,6 +365,7 @@
 	debug_unregister(kvm->arch.dbf);
 	if (!kvm_is_ucontrol(kvm))
 		gmap_free(kvm->arch.gmap);
+	kvm_s390_destroy_adapters(kvm);
 }
 
 /* Section: vcpu related */
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 6311170..660e79f 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -137,6 +137,7 @@
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in priv.c */
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -163,5 +164,6 @@
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
 
 #endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ddc8a7e..64fae65 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -43,6 +43,16 @@
 	return ret;
 }
 
+u64 kvm_supported_xcr0(void)
+{
+	u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+
+	if (!kvm_x86_ops->mpx_supported())
+		xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+
+	return xcr0;
+}
+
 void kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
@@ -73,7 +83,7 @@
 	} else {
 		vcpu->arch.guest_supported_xcr0 =
 			(best->eax | ((u64)best->edx << 32)) &
-			host_xcr0 & KVM_SUPPORTED_XCR0;
+			kvm_supported_xcr0();
 		vcpu->arch.guest_xstate_size = best->ebx =
 			xstate_required_size(vcpu->arch.xcr0);
 	}
@@ -210,13 +220,6 @@
 	entry->flags = 0;
 }
 
-static bool supported_xcr0_bit(unsigned bit)
-{
-	u64 mask = ((u64)1 << bit);
-
-	return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
-}
-
 #define F(x) bit(X86_FEATURE_##x)
 
 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -256,8 +259,7 @@
 #endif
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
-	unsigned f_mpx = kvm_x86_ops->mpx_supported ?
-			 (kvm_x86_ops->mpx_supported() ? F(MPX) : 0) : 0;
+	unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -439,16 +441,18 @@
 	}
 	case 0xd: {
 		int idx, i;
+		u64 supported = kvm_supported_xcr0();
 
-		entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
-		entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+		entry->eax &= supported;
+		entry->edx &= supported >> 32;
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		for (idx = 1, i = 1; idx < 64; ++idx) {
+			u64 mask = ((u64)1 << idx);
 			if (*nent >= maxnent)
 				goto out;
 
 			do_cpuid_1_ent(&entry[i], function, idx);
-			if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+			if (entry[i].eax == 0 || !(supported & mask))
 				continue;
 			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a449c3d..2136cb6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4089,6 +4089,11 @@
 	return false;
 }
 
+static bool svm_mpx_supported(void)
+{
+	return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
 	return true;
@@ -4371,6 +4376,7 @@
 
 	.rdtscp_supported = svm_rdtscp_supported,
 	.invpcid_supported = svm_invpcid_supported,
+	.mpx_supported = svm_mpx_supported,
 
 	.set_supported_cpuid = svm_set_supported_cpuid,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f4e5aed..1320e0f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -206,6 +206,7 @@
 	u64 guest_pdptr1;
 	u64 guest_pdptr2;
 	u64 guest_pdptr3;
+	u64 guest_bndcfgs;
 	u64 host_ia32_pat;
 	u64 host_ia32_efer;
 	u64 host_ia32_perf_global_ctrl;
@@ -541,6 +542,7 @@
 	GUEST_CS_LIMIT,
 	GUEST_CS_BASE,
 	GUEST_ES_BASE,
+	GUEST_BNDCFGS,
 	CR0_GUEST_HOST_MASK,
 	CR0_READ_SHADOW,
 	CR4_READ_SHADOW,
@@ -596,6 +598,7 @@
 	FIELD64(GUEST_PDPTR1, guest_pdptr1),
 	FIELD64(GUEST_PDPTR2, guest_pdptr2),
 	FIELD64(GUEST_PDPTR3, guest_pdptr3),
+	FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
 	FIELD64(HOST_IA32_PAT, host_ia32_pat),
 	FIELD64(HOST_IA32_EFER, host_ia32_efer),
 	FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -726,6 +729,7 @@
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
+static bool vmx_mpx_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -736,6 +740,7 @@
 static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+static bool vmx_mpx_supported(void);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2287,6 +2292,8 @@
 	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
 		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
 		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+	if (vmx_mpx_supported())
+		nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2300,6 +2307,8 @@
 		VM_ENTRY_LOAD_IA32_PAT;
 	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
 				       VM_ENTRY_LOAD_IA32_EFER);
+	if (vmx_mpx_supported())
+		nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2493,6 +2502,8 @@
 		data = vmcs_readl(GUEST_SYSENTER_ESP);
 		break;
 	case MSR_IA32_BNDCFGS:
+		if (!vmx_mpx_supported())
+			return 1;
 		data = vmcs_read64(GUEST_BNDCFGS);
 		break;
 	case MSR_IA32_FEATURE_CONTROL:
@@ -2564,6 +2575,8 @@
 		vmcs_writel(GUEST_SYSENTER_ESP, data);
 		break;
 	case MSR_IA32_BNDCFGS:
+		if (!vmx_mpx_supported())
+			return 1;
 		vmcs_write64(GUEST_BNDCFGS, data);
 		break;
 	case MSR_IA32_TSC:
@@ -7866,6 +7879,9 @@
 
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
+		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
 		vmcs_write64(TSC_OFFSET,
 			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8351,6 +8367,8 @@
 	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
 	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
 	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+	if (vmx_mpx_supported())
+		vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
 
 	/* update exit information fields: */
 
@@ -8460,6 +8478,10 @@
 	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
 	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
 
+	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
+	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
+		vmcs_write64(GUEST_BNDCFGS, 0);
+
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
 		vcpu->arch.pat = vmcs12->host_ia32_pat;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a37da6b..aa98695 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3084,9 +3084,7 @@
 		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
 		 * with old userspace.
 		 */
-		if (xstate_bv & ~KVM_SUPPORTED_XCR0)
-			return -EINVAL;
-		if (xstate_bv & ~host_xcr0)
+		if (xstate_bv & ~kvm_supported_xcr0())
 			return -EINVAL;
 		memcpy(&vcpu->arch.guest_fpu.state->xsave,
 			guest_xsave->region, vcpu->arch.guest_xstate_size);
@@ -3939,6 +3937,23 @@
 	for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
 		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
 			continue;
+
+		/*
+		 * Even MSRs that are valid in the host may not be exposed
+		 * to the guests in some cases.  We could work around this
+		 * in VMX with the generic MSR save/load machinery, but it
+		 * is not really worthwhile since it will really only
+		 * happen with nested virtualization.
+		 */
+		switch (msrs_to_save[i]) {
+		case MSR_IA32_BNDCFGS:
+			if (!kvm_x86_ops->mpx_supported())
+				continue;
+			break;
+		default:
+			break;
+		}
+
 		if (j < i)
 			msrs_to_save[j] = msrs_to_save[i];
 		j++;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 392ecbf..8c97bac9 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -126,6 +126,8 @@
 				| XSTATE_BNDREGS | XSTATE_BNDCSR)
 extern u64 host_xcr0;
 
+extern u64 kvm_supported_xcr0(void);
+
 extern unsigned int min_timer_period_us;
 
 extern struct static_key kvm_no_apic_vcpu;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9816b68..7d21cf9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -297,6 +297,14 @@
 	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 }
 
+struct kvm_s390_adapter_int {
+	u64 ind_addr;
+	u64 summary_addr;
+	u64 ind_offset;
+	u32 summary_offset;
+	u32 adapter_id;
+};
+
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
@@ -309,6 +317,7 @@
 			unsigned pin;
 		} irqchip;
 		struct msi_msg msi;
+		struct kvm_s390_adapter_int adapter;
 	};
 	struct hlist_node link;
 };
@@ -913,7 +922,11 @@
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 
+#ifdef CONFIG_S390
+#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
+#else
 #define KVM_MAX_IRQ_ROUTES 1024
+#endif
 
 int kvm_setup_default_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7518be..a8f4ee5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -741,6 +741,8 @@
 #define KVM_CAP_EXT_EMUL_CPUID 95
 #define KVM_CAP_HYPERV_TIME 96
 #define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
+#define KVM_CAP_ENABLE_CAP_VM 98
+#define KVM_CAP_S390_IRQCHIP 99
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -756,9 +758,18 @@
 	__u32 pad;
 };
 
+struct kvm_irq_routing_s390_adapter {
+	__u64 ind_addr;
+	__u64 summary_addr;
+	__u64 ind_offset;
+	__u32 summary_offset;
+	__u32 adapter_id;
+};
+
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
 
 struct kvm_irq_routing_entry {
 	__u32 gsi;
@@ -768,6 +779,7 @@
 	union {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
+		struct kvm_irq_routing_s390_adapter adapter;
 		__u32 pad[8];
 	} u;
 };
@@ -1076,6 +1088,10 @@
 /* Available with KVM_CAP_DEBUGREGS */
 #define KVM_GET_DEBUGREGS         _IOR(KVMIO,  0xa1, struct kvm_debugregs)
 #define KVM_SET_DEBUGREGS         _IOW(KVMIO,  0xa2, struct kvm_debugregs)
+/*
+ * vcpu version available with KVM_ENABLE_CAP
+ * vm version available with KVM_CAP_ENABLE_CAP_VM
+ */
 #define KVM_ENABLE_CAP            _IOW(KVMIO,  0xa3, struct kvm_enable_cap)
 /* Available with KVM_CAP_XSAVE */
 #define KVM_GET_XSAVE		  _IOR(KVMIO,  0xa4, struct kvm_xsave)
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index abe4d60..29c2a04 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -391,19 +391,19 @@
 					   lockdep_is_held(&kvm->irqfds.lock));
 	irqfd_update(kvm, irqfd, irq_rt);
 
-	events = f.file->f_op->poll(f.file, &irqfd->pt);
-
 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
 
+	spin_unlock_irq(&kvm->irqfds.lock);
+
 	/*
 	 * Check if there was an event already pending on the eventfd
 	 * before we registered, and trigger it as if we didn't miss it.
 	 */
+	events = f.file->f_op->poll(f.file, &irqfd->pt);
+
 	if (events & POLLIN)
 		schedule_work(&irqfd->inject);
 
-	spin_unlock_irq(&kvm->irqfds.lock);
-
 	/*
 	 * do not drop the file until the irqfd is fully initialized, otherwise
 	 * we might race against the POLLHUP
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1539d37..d4b6015 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -50,7 +50,7 @@
 #else
 #define ioapic_debug(fmt, arg...)
 #endif
-static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq,
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
 		bool line_status);
 
 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -163,23 +163,67 @@
 	return false;
 }
 
-static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
-		bool line_status)
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+		int irq_level, bool line_status)
 {
-	union kvm_ioapic_redirect_entry *pent;
-	int injected = -1;
+	union kvm_ioapic_redirect_entry entry;
+	u32 mask = 1 << irq;
+	u32 old_irr;
+	int edge, ret;
 
-	pent = &ioapic->redirtbl[idx];
+	entry = ioapic->redirtbl[irq];
+	edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
 
-	if (!pent->fields.mask) {
-		injected = ioapic_deliver(ioapic, idx, line_status);
-		if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
-			pent->fields.remote_irr = 1;
+	if (!irq_level) {
+		ioapic->irr &= ~mask;
+		ret = 1;
+		goto out;
 	}
 
-	return injected;
+	/*
+	 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+	 * this only happens if a previous edge has not been delivered due
+	 * do masking.  For level interrupts, the remote_irr field tells
+	 * us if the interrupt is waiting for an EOI.
+	 *
+	 * RTC is special: it is edge-triggered, but userspace likes to know
+	 * if it has been already ack-ed via EOI because coalesced RTC
+	 * interrupts lead to time drift in Windows guests.  So we track
+	 * EOI manually for the RTC interrupt.
+	 */
+	if (irq == RTC_GSI && line_status &&
+		rtc_irq_check_coalesced(ioapic)) {
+		ret = 0;
+		goto out;
+	}
+
+	old_irr = ioapic->irr;
+	ioapic->irr |= mask;
+	if ((edge && old_irr == ioapic->irr) ||
+	    (!edge && entry.fields.remote_irr)) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+	return ret;
 }
 
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+	u32 idx;
+
+	rtc_irq_eoi_tracking_reset(ioapic);
+	for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+		ioapic_set_irq(ioapic, idx, 1, true);
+
+	kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+
 static void update_handled_vectors(struct kvm_ioapic *ioapic)
 {
 	DECLARE_BITMAP(handled_vectors, 256);
@@ -282,12 +326,15 @@
 	}
 }
 
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
 {
 	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
 	struct kvm_lapic_irq irqe;
 	int ret;
 
+	if (entry->fields.mask)
+		return -1;
+
 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
 		     "vector=%x trig_mode=%x\n",
 		     entry->fields.dest_id, entry->fields.dest_mode,
@@ -302,6 +349,9 @@
 	irqe.level = 1;
 	irqe.shorthand = 0;
 
+	if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+		ioapic->irr &= ~(1 << irq);
+
 	if (irq == RTC_GSI && line_status) {
 		BUG_ON(ioapic->rtc_status.pending_eoi != 0);
 		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
@@ -310,44 +360,24 @@
 	} else
 		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
 
+	if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+		entry->fields.remote_irr = 1;
+
 	return ret;
 }
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		       int level, bool line_status)
 {
-	u32 old_irr;
-	u32 mask = 1 << irq;
-	union kvm_ioapic_redirect_entry entry;
 	int ret, irq_level;
 
 	BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
 
 	spin_lock(&ioapic->lock);
-	old_irr = ioapic->irr;
 	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
 					 irq_source_id, level);
-	entry = ioapic->redirtbl[irq];
-	if (!irq_level) {
-		ioapic->irr &= ~mask;
-		ret = 1;
-	} else {
-		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+	ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
 
-		if (irq == RTC_GSI && line_status &&
-			rtc_irq_check_coalesced(ioapic)) {
-			ret = 0; /* coalesced */
-			goto out;
-		}
-		ioapic->irr |= mask;
-		if ((edge && old_irr != ioapic->irr) ||
-		    (!edge && !entry.fields.remote_irr))
-			ret = ioapic_service(ioapic, irq, line_status);
-		else
-			ret = 0; /* report coalesced interrupt */
-	}
-out:
-	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	spin_unlock(&ioapic->lock);
 
 	return ret;
@@ -393,7 +423,7 @@
 
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+		if (ioapic->irr & (1 << i))
 			ioapic_service(ioapic, i, false);
 	}
 }
@@ -594,9 +624,10 @@
 
 	spin_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+	ioapic->irr = 0;
 	update_handled_vectors(ioapic);
 	kvm_vcpu_request_scan_ioapic(kvm);
-	kvm_rtc_eoi_tracking_restore_all(ioapic);
+	kvm_ioapic_inject_all(ioapic, state->irr);
 	spin_unlock(&ioapic->lock);
 	return 0;
 }