ANDROID: KVM: On guest exit ask Trustzone to relinquish the borrowed pages

Iterate the list of handles which identify the shared memory regions and
relinquish them from Trustzone. Let the host call into the scheduler
before retrying to ask the hypervisor to reclaim Trustzone memory.

[ qperret: Move the FF-A reclaim logic in its own hypercall ]

Bug: 269285339
Bug: 278749606
Change-Id: I5e5d40376415a249b3c0baf48b5d52e5a0d0befc
Signed-off-by: Sebastian Ene <sebastianene@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index dff9f2c..1fad502 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -98,6 +98,7 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___pkvm_start_teardown_vm,
 	__KVM_HOST_SMCCC_FUNC___pkvm_finalize_teardown_vm,
 	__KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_page,
+	__KVM_HOST_SMCCC_FUNC___pkvm_reclaim_dying_guest_ffa_resources,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h
index d3b707d..fd00654 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h
@@ -26,5 +26,6 @@ int hyp_ffa_init(void *pages);
 bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
 bool kvm_guest_ffa_handler(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code);
 struct ffa_mem_transfer *find_transfer_by_handle(u64 ffa_handle, struct kvm_ffa_buffers *buf);
+int kvm_dying_guest_reclaim_ffa_resources(struct pkvm_hyp_vm *vm);
 
 #endif /* __KVM_HYP_FFA_H */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 529bd0a..a2422e9 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -123,6 +123,7 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu);
 int __pkvm_start_teardown_vm(pkvm_handle_t handle);
 int __pkvm_finalize_teardown_vm(pkvm_handle_t handle);
 int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 pfn, u64 gfn, u8 order);
+int __pkvm_reclaim_dying_guest_ffa_resources(pkvm_handle_t handle);
 
 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
 					 unsigned int vcpu_idx);
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index f39b0de..79e00a6 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -1268,6 +1268,59 @@ bool kvm_guest_ffa_handler(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
 	goto back_to_guest;
 }
 
+static void kvm_guest_try_reclaim_transfer(struct ffa_mem_transfer *transfer, struct pkvm_hyp_vm *vm)
+{
+	struct ffa_translation *translation, *tmp;
+	struct arm_smccc_res res;
+
+	ffa_mem_reclaim(&res, HANDLE_LOW(transfer->ffa_handle),
+			HANDLE_HIGH(transfer->ffa_handle), 0);
+	if (res.a0 != FFA_SUCCESS)
+		return;
+
+	list_for_each_entry_safe(translation, tmp, &transfer->translations, node) {
+		WARN_ON(__pkvm_guest_unshare_ffa_page(vm->vcpus[0], translation->ipa));
+		list_del(&translation->node);
+		hyp_free(translation);
+	}
+
+	list_del(&transfer->node);
+	hyp_free(transfer);
+}
+
+int kvm_dying_guest_reclaim_ffa_resources(struct pkvm_hyp_vm *vm)
+{
+	struct kvm_ffa_buffers *ffa_buf = &vm->ffa_buf;
+	struct ffa_mem_transfer *transfer;
+	int ret = 0;
+
+	hyp_spin_lock(&kvm_ffa_hyp_lock);
+	if (!ffa_buf->tx && !ffa_buf->rx)
+		goto unlock;
+
+	if (list_empty(&ffa_buf->xfer_list)) {
+		/* XXX - needs an explicit rxtx unmap call ? */
+		if (ffa_buf->tx) {
+			WARN_ON(__pkvm_guest_unshare_hyp_page(vm->vcpus[0], ffa_buf->tx_ipa));
+			ffa_buf->tx = NULL;
+		}
+		if (ffa_buf->rx) {
+			WARN_ON(__pkvm_guest_unshare_hyp_page(vm->vcpus[0], ffa_buf->rx_ipa));
+			ffa_buf->rx = NULL;
+		}
+		goto unlock;
+	}
+
+	transfer = list_first_entry(&ffa_buf->xfer_list, typeof(*transfer), node);
+	kvm_guest_try_reclaim_transfer(transfer, vm);
+	ret = -EAGAIN;
+
+unlock:
+	hyp_spin_unlock(&kvm_ffa_hyp_lock);
+
+	return ret;
+}
+
 int hyp_ffa_init(void *pages)
 {
 	struct arm_smccc_res res;
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index cdac60a..642a8a8c 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -1473,6 +1473,13 @@ static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_
 		__pkvm_reclaim_dying_guest_page(handle, pfn, gfn, order);
 }
 
+static void handle___pkvm_reclaim_dying_guest_ffa_resources(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+
+	cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_ffa_resources(handle);
+}
+
 static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
 {
 	DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
@@ -1922,6 +1929,7 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__pkvm_start_teardown_vm),
 	HANDLE_FUNC(__pkvm_finalize_teardown_vm),
 	HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
+	HANDLE_FUNC(__pkvm_reclaim_dying_guest_ffa_resources),
 	HANDLE_FUNC(__pkvm_vcpu_load),
 	HANDLE_FUNC(__pkvm_vcpu_put),
 	HANDLE_FUNC(__pkvm_vcpu_sync_state),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index ac4e4a2..34e3abc 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -2380,12 +2380,10 @@ int __pkvm_host_reclaim_page(struct pkvm_hyp_vm *vm, u64 pfn, u64 ipa, u8 order)
 		goto unlock;
 	}
 
-	/* We could avoid TLB inval, it is done per VMID on the finalize path */
-	WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, page_size));
-
 	switch ((int)guest_get_page_state(pte, ipa)) {
 	case PKVM_PAGE_OWNED:
 		WARN_ON(__host_check_page_state_range(phys, page_size, PKVM_NOPAGE));
+		/* No vCPUs of the guest can run, doing this prior to stage-2 unmap is OK */
 		hyp_poison_page(phys);
 		psci_mem_protect_dec(1 << order);
 		break;
@@ -2394,12 +2392,18 @@ int __pkvm_host_reclaim_page(struct pkvm_hyp_vm *vm, u64 pfn, u64 ipa, u8 order)
 		WARN_ON(__host_check_page_state_range(phys, page_size, PKVM_PAGE_SHARED_OWNED));
 		break;
 	case PKVM_PAGE_SHARED_OWNED:
-		WARN_ON(__host_check_page_state_range(phys, page_size, PKVM_PAGE_SHARED_BORROWED));
+		if (__host_check_page_state_range(phys, page_size, PKVM_PAGE_SHARED_BORROWED)) {
+			/* Presumably a page shared via FF-A, will be handled separately */
+			ret = -EBUSY;
+			goto unlock;
+		}
 		break;
 	default:
 		BUG_ON(1);
 	}
 
+	/* We could avoid TLB inval, it is done per VMID on the finalize path */
+	WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, page_size));
 	WARN_ON(host_stage2_set_owner_locked(phys, page_size, PKVM_ID_HOST));
 
 unlock:
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 08e48f8..3414095 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -15,6 +15,7 @@
 #include <hyp/adjust_pc.h>
 
 #include <nvhe/alloc.h>
+#include <nvhe/ffa.h>
 #include <nvhe/mem_protect.h>
 #include <nvhe/memory.h>
 #include <nvhe/mm.h>
@@ -372,6 +373,20 @@ int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 pfn, u64 gfn, u8 o
 	return ret;
 }
 
+int __pkvm_reclaim_dying_guest_ffa_resources(pkvm_handle_t handle)
+{
+	struct pkvm_hyp_vm *hyp_vm;
+	int ret = -EINVAL;
+
+	hyp_read_lock(&vm_table_lock);
+	hyp_vm = get_vm_by_handle(handle);
+	if (hyp_vm && hyp_vm->is_dying)
+		ret = kvm_dying_guest_reclaim_ffa_resources(hyp_vm);
+	hyp_read_unlock(&vm_table_lock);
+
+	return ret;
+}
+
 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
 					 unsigned int vcpu_idx)
 {
@@ -974,6 +989,12 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
 	remove_vm_table_entry(handle);
 	hyp_write_unlock(&vm_table_lock);
 
+	/* A well-behaved host will have reclaimed all FF-A resources already */
+	do {
+		err = kvm_dying_guest_reclaim_ffa_resources(hyp_vm);
+	} while (err == -EAGAIN);
+	WARN_ON(err);
+
 	pkvm_devices_teardown(hyp_vm);
 
 	/*
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 379a93e..1c82fbe 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -366,23 +366,33 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
 	struct mm_struct *mm = current->mm;
 	struct kvm_pinned_page *ppage;
 	struct kvm_vcpu *host_vcpu;
-	unsigned long pages = 0;
+	unsigned long nr_busy;
+	unsigned long pages;
 	unsigned long idx;
+	int ret;
 
 	if (!pkvm_is_hyp_created(host_kvm))
 		goto out_free;
 
 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
 
+retry:
+	pages = 0;
+	nr_busy = 0;
 	ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages, 0, ~(0UL));
 	while (ppage) {
 		struct kvm_pinned_page *next;
 		u16 pins = ppage->pins;
 
-		WARN_ON(pkvm_call_hyp_nvhe_ppage(ppage,
+		ret = pkvm_call_hyp_nvhe_ppage(ppage,
 						 __reclaim_dying_guest_page_call,
-						 host_kvm, true));
+						 host_kvm, true);
 		cond_resched();
+		if (ret == -EBUSY) {
+			nr_busy++;
+			continue;
+		}
+		WARN_ON(ret);
 
 		unpin_user_pages_dirty_lock(&ppage->page, 1, true);
 		next = kvm_pinned_pages_iter_next(ppage, 0, ~(0UL));
@@ -394,6 +404,16 @@ static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
 
 	account_locked_vm(mm, pages, false);
 
+	if (nr_busy) {
+		do {
+			ret = kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_ffa_resources,
+						host_kvm->arch.pkvm.handle);
+			WARN_ON(ret && ret != -EAGAIN);
+			cond_resched();
+		} while (ret == -EAGAIN);
+		goto retry;
+	}
+
 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
 
 out_free: