KVM: arm64: Introduce new spinlock for hypervisor VM vCPUs[] array
The vCPU structures of the hypervisor VM struct claim to be protected
by 'vm_table_lock', however:
1. This doesn't appear to be the case on the PSCI paths, where the
array is walked locklessly when accessing the target vCPU
2. The 'vm_table_lock' serialises across all VMs, so is overkill
Introduce a new per-VM spinlock for the vCPU array and ensure that it is
held whilst accessing the data structure.
Change-Id: I316f6710af5f3147195d4b13e39b7cf5964e8589
Signed-off-by: Will Deacon <will@kernel.org>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 362302a..18f5217 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -61,6 +61,7 @@ struct pkvm_hyp_vm {
* reclaimed by the host.
*/
bool is_dying;
+ hyp_spinlock_t vcpus_lock;
/* Array of the hyp vCPU structures for this VM. */
struct pkvm_hyp_vcpu *vcpus[];
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index c92cb97..d99ba64 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -246,7 +246,8 @@ struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
hyp_read_lock(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
- if (!hyp_vm || hyp_vm->is_dying || hyp_vm->kvm.created_vcpus <= vcpu_idx)
+ if (!hyp_vm || READ_ONCE(hyp_vm->is_dying) ||
+ READ_ONCE(hyp_vm->kvm.created_vcpus) <= vcpu_idx)
goto unlock;
/*
@@ -453,6 +454,7 @@ static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
hyp_vm->kvm.arch.flags = 0;
pkvm_init_features_from_host(hyp_vm, host_kvm);
+ hyp_spin_lock_init(&hyp_vm->vcpus_lock);
}
static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
@@ -759,27 +761,28 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
if (!hyp_vcpu)
return -ENOMEM;
- hyp_write_lock(&vm_table_lock);
+ hyp_read_lock(&vm_table_lock);
hyp_vm = get_vm_by_handle(handle);
if (!hyp_vm) {
ret = -ENOENT;
- goto unlock;
+ goto unlock_vm;
}
+ hyp_spin_lock(&hyp_vm->vcpus_lock);
ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu);
if (ret)
- goto unlock;
+ goto unlock_vcpus;
idx = hyp_vcpu->vcpu.vcpu_idx;
- if (idx >= hyp_vm->kvm.created_vcpus) {
+ if (idx >= READ_ONCE(hyp_vm->kvm.created_vcpus)) {
ret = -EINVAL;
- goto unlock;
+ goto unlock_vcpus;
}
if (hyp_vm->vcpus[idx]) {
ret = -EINVAL;
- goto unlock;
+ goto unlock_vcpus;
}
/*
@@ -787,8 +790,11 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
* the vCPU-load path via 'hyp_vm->vcpus[]'.
*/
smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu);
-unlock:
- hyp_write_unlock(&vm_table_lock);
+
+unlock_vcpus:
+ hyp_spin_unlock(&hyp_vm->vcpus_lock);
+unlock_vm:
+ hyp_read_unlock(&vm_table_lock);
if (ret) {
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
@@ -863,6 +869,7 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
struct kvm_hyp_memcache *mc;
struct pkvm_hyp_vm *hyp_vm;
struct kvm *host_kvm;
+ int created_vcpus;
unsigned int idx;
int err;
@@ -877,6 +884,7 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
}
host_kvm = hyp_vm->host_kvm;
+ created_vcpus = READ_ONCE(hyp_vm->kvm.created_vcpus);
/* Ensure the VMID is clean before it can be reallocated */
__kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
@@ -892,10 +900,10 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
mc = &host_kvm->arch.pkvm.teardown_mc;
destroy_hyp_vm_pgt(hyp_vm);
drain_hyp_pool(hyp_vm, mc);
- unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->kvm.created_vcpus);
+ unpin_host_vcpus(hyp_vm->vcpus, created_vcpus);
/* Push the metadata pages to the teardown memcache */
- for (idx = 0; idx < hyp_vm->kvm.created_vcpus; ++idx) {
+ for (idx = 0; idx < created_vcpus; ++idx) {
struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
struct kvm_hyp_memcache *vcpu_mc;
@@ -922,7 +930,7 @@ int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
teardown_donated_memory(mc, (__force void *)last_vcpu_ran,
last_ran_size);
- vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
+ vm_size = pkvm_get_hyp_vm_size(created_vcpus);
teardown_donated_memory(mc, hyp_vm, vm_size);
hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
return 0;
@@ -965,21 +973,25 @@ void pkvm_reset_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct pkvm_hyp_vcpu *pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm,
u64 mpidr)
{
+ struct pkvm_hyp_vcpu *hyp_vcpu;
int i;
mpidr &= MPIDR_HWID_BITMASK;
- for (i = 0; i < hyp_vm->kvm.created_vcpus; i++) {
- struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[i];
+ hyp_spin_lock(&hyp_vm->vcpus_lock);
+ for (i = 0; i < READ_ONCE(hyp_vm->kvm.created_vcpus); i++) {
+ hyp_vcpu = hyp_vm->vcpus[i];
if (!hyp_vcpu)
continue;
if (mpidr == kvm_vcpu_get_mpidr_aff(&hyp_vcpu->vcpu))
- return hyp_vcpu;
+ goto unlock;
}
-
- return NULL;
+ hyp_vcpu = NULL;
+unlock:
+ hyp_spin_unlock(&hyp_vm->vcpus_lock);
+ return hyp_vcpu;
}
/*
@@ -1078,7 +1090,8 @@ static bool pvm_psci_vcpu_affinity_info(struct pkvm_hyp_vcpu *hyp_vcpu)
* then if at least one is PENDING_ON then return PENDING_ON.
* Otherwise, return OFF.
*/
- for (i = 0; i < hyp_vm->kvm.created_vcpus; i++) {
+ hyp_spin_lock(&hyp_vm->vcpus_lock);
+ for (i = 0; i < READ_ONCE(hyp_vm->kvm.created_vcpus); i++) {
struct pkvm_hyp_vcpu *target = hyp_vm->vcpus[i];
if (!target)
@@ -1097,19 +1110,20 @@ static bool pvm_psci_vcpu_affinity_info(struct pkvm_hyp_vcpu *hyp_vcpu)
break;
case PSCI_0_2_AFFINITY_LEVEL_ON:
ret = PSCI_0_2_AFFINITY_LEVEL_ON;
- goto done;
+ goto unlock;
case PSCI_0_2_AFFINITY_LEVEL_OFF:
break;
default:
ret = PSCI_RET_INTERNAL_FAILURE;
- goto done;
+ goto unlock;
}
}
}
if (!matching_cpus)
ret = PSCI_RET_INVALID_PARAMS;
-
+unlock:
+ hyp_spin_unlock(&hyp_vm->vcpus_lock);
done:
/* Nothing to be handled by the host. Go back to the guest. */
smccc_set_retval(vcpu, ret, 0, 0, 0);