MMIO guard, pKVM flavoured

Signed-off-by: Marc Zyngier <maz@kernel.org>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 3694567..e2716f3 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -67,6 +67,9 @@
 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu);
 int __pkvm_guest_share_host(struct kvm_vcpu *vcpu, u64 ipa);
 int __pkvm_guest_unshare_host(struct kvm_vcpu *vcpu, u64 ipa);
+int __pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa);
+int __pkvm_remove_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa);
+bool __pkvm_check_ioguard_page(struct kvm_vcpu *vcpu);
 
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 1ebf975..28c29ec 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -84,6 +84,9 @@
 	u32 fn = smccc_get_function(shadow_vcpu);
 
 	switch (fn) {
+	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
+		pkvm_refill_memcache(shadow_vcpu, host_vcpu);
+		break;
 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
 		fallthrough;
 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
@@ -271,10 +274,12 @@
 		return 0;
 
 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
-		fallthrough;
 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
 		return 3;
 
+	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
+		return 2;
+
 	/* The rest are either blocked or handled by hyp. */
 	default:
 		return -1;
@@ -304,8 +309,7 @@
 
 static void handle_pvm_exit_dabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu)
 {
-	/* FIXME: Revisit once MMIO-guard is available */
-	shadow_vcpu->mmio_needed = true;
+	shadow_vcpu->mmio_needed = __pkvm_check_ioguard_page(shadow_vcpu);
 
 	if (shadow_vcpu->mmio_needed) {
 		/* r0 as transfer register between the guest and the host. */
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 6248435..367ca16 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -1771,3 +1771,108 @@
 
 	return ret;
 }
+
+/* Replace this with something more structured once day */
+#define MMIO_NOTE	(('M' << 24 | 'M' << 16 | 'I' << 8 | 'O') << 1)
+
+static bool __check_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa)
+{
+	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
+	kvm_pte_t pte;
+	u32 level;
+	int ret;
+
+	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
+	if (ret)
+		return false;
+
+	/* Must be a PAGE_SIZE mapping with our annotation */
+	return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE &&
+		pte == MMIO_NOTE);
+}
+
+int __pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa)
+{
+	struct kvm_shadow_vm *vm;
+	kvm_pte_t pte;
+	u32 level;
+	int ret;
+
+	vm = vcpu->arch.pkvm.shadow_vm;
+
+	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->arch.flags))
+		return -EINVAL;
+
+	if (ipa & ~PAGE_MASK)
+		return -EINVAL;
+
+	guest_lock_component(vcpu);
+
+	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
+	if (ret)
+		goto unlock;
+
+	if (pte && BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE) {
+		/*
+		 * Already flagged as MMIO, let's accept it, and fail
+		 * otherwise
+		 */
+		if (pte != MMIO_NOTE)
+			ret = -EBUSY;
+
+		goto unlock;
+	}
+
+	ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE,
+					  &vcpu->arch.pkvm_memcache,
+					  MMIO_NOTE);
+
+unlock:
+	guest_unlock_component(vcpu);
+	return ret;
+}
+
+int __pkvm_remove_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa)
+{
+	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
+
+	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->arch.flags))
+		return -EINVAL;
+
+	guest_lock_component(vcpu);
+
+	if (__check_ioguard_page(vcpu, ipa)) {
+		struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
+
+		kvm_pgtable_stage2_unmap(&vm->pgt,
+					 ALIGN_DOWN(ipa, PAGE_SIZE), PAGE_SIZE);
+	}
+
+	guest_unlock_component(vcpu);
+	return 0;
+}
+
+bool __pkvm_check_ioguard_page(struct kvm_vcpu *vcpu)
+{
+	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
+	u64 ipa, end;
+	bool ret;
+
+	if (!kvm_vcpu_dabt_isvalid(vcpu))
+		return false;
+
+	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->arch.flags))
+		return true;
+
+	ipa  = kvm_vcpu_get_fault_ipa(vcpu);
+	ipa |= kvm_vcpu_get_hfar(vcpu) & FAR_MASK;
+	end = ipa + kvm_vcpu_dabt_get_as(vcpu) - 1;
+
+	guest_lock_component(vcpu);
+	ret = __check_ioguard_page(vcpu, ipa);
+	if ((end & PAGE_MASK) != (ipa & PAGE_MASK))
+		ret &= __check_ioguard_page(vcpu, end);
+	guest_unlock_component(vcpu);
+
+	return ret;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 15612349..7055bcb 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -1022,6 +1022,31 @@
 	return true;
 }
 
+static bool pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+	u32 retval = SMCCC_RET_SUCCESS;
+	u64 ipa = smccc_get_arg1(vcpu);
+	int ret;
+
+	ret = __pkvm_install_ioguard_page(vcpu, ipa);
+	if (ret == -ENOMEM) {
+		/*
+		 * We ran out of memcache, let's ask for more. Cancel
+		 * the effects of the HVC that took us here, and
+		 * forward the hypercall to the host for page donation
+		 * purposes.
+		 */
+		write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
+		return false;
+	}
+
+	if (ret)
+		retval = SMCCC_RET_INVALID_PARAMETER;
+
+	smccc_set_retval(vcpu, retval, 0, 0, 0);
+	return true;
+}
+
 /*
  * Handler for protected VM HVC calls.
  *
@@ -1049,7 +1074,22 @@
 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_HYP_MEMINFO);
 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE);
 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE);
+		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO);
+		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL);
+		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP);
+		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP);
 		break;
+	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID:
+		set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->arch.pkvm.shadow_vm->arch.flags);
+		val[0] = SMCCC_RET_SUCCESS;
+		break;
+	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
+		return pkvm_install_ioguard_page(vcpu, exit_code);
+	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID:
+		if (__pkvm_remove_ioguard_page(vcpu, vcpu_get_reg(vcpu, 1)))
+			val[0] = SMCCC_RET_SUCCESS;
+		break;
+	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID:
 	case ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID:
 		if (smccc_get_arg1(vcpu) ||
 		    smccc_get_arg2(vcpu) ||
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 003ba32..291d414 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1450,6 +1450,18 @@
 	struct kvm *kvm = vcpu->kvm;
 	int ret, idx;
 
+	/*
+	 * In protected mode, this is just about donating pages to the
+	 * greedy hypervisor, and everything is handled on the other
+	 * side.
+	 */
+	if (is_protected_kvm_enabled()) {
+		pr_warn("topping up for %llx\n", ipa);
+		ret = topup_hyp_memcache(&vcpu->arch.pkvm_memcache,
+					 kvm_mmu_cache_min_pages(kvm));
+		return !ret;
+	}
+
 	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &kvm->arch.flags))
 		return false;
 
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 0e10b78..7c09ca4 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -65,7 +65,7 @@
 		return;
 
 	arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID,
-			     &res);
+			     0, 0, 0, &res);
 	if (res.a0 != PAGE_SIZE)
 		return;