KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID space
It is not currently possible to create the full number of possible
VCPUs (KVM_MAX_VCPUS) on Power9 with KVM-HV when the guest uses fewer
threads per core than its core stride (or "VSMT mode"). This is
because the VCORE ID and XIVE offsets grow beyond KVM_MAX_VCPUS
even though the VCPU ID is less than KVM_MAX_VCPU_ID.
To address this, "pack" the VCORE ID and XIVE offsets by using
knowledge of the way the VCPU IDs will be used when there are fewer
guest threads per core than the core stride. The primary thread of
each core will always be used first. Then, if the guest uses more than
one thread per core, these secondary threads will sequentially follow
the primary in each core.
So, the only way an ID above KVM_MAX_VCPUS can be seen, is if the
VCPUs are being spaced apart, so at least half of each core is empty,
and IDs between KVM_MAX_VCPUS and (KVM_MAX_VCPUS * 2) can be mapped
into the second half of each core (4..7, in an 8-thread core).
Similarly, if IDs above KVM_MAX_VCPUS * 2 are seen, at least 3/4 of
each core is being left empty, and we can map down into the second and
third quarters of each core (2, 3 and 5, 6 in an 8-thread core).
Lastly, if IDs above KVM_MAX_VCPUS * 4 are seen, only the primary
threads are being used and 7/8 of the core is empty, allowing use of
the 1, 5, 3 and 7 thread slots.
(Strides less than 8 are handled similarly.)
This allows the VCORE ID or offset to be calculated quickly from the
VCPU ID or XIVE server numbers, without access to the VCPU structure.
[paulus@ozlabs.org - tidied up comment a little, changed some WARN_ONCE
to pr_devel, wrapped line, fixed id check.]
Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d73b29b6..785245e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1816,7 +1816,7 @@ static int threads_per_vcore(struct kvm *kvm)
return threads_per_subcore;
}
-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
{
struct kvmppc_vcore *vcore;
@@ -1830,7 +1830,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * kvm->arch.smt_mode;
+ vcore->first_vcpuid = id;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -1989,10 +1989,16 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
- int err;
+ int err = -EINVAL;
int core;
struct kvmppc_vcore *vcore;
+ if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode) &&
+ cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pr_devel("DNCI: VCPU ID too high\n");
+ goto out;
+ }
+
err = -ENOMEM;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
@@ -2048,12 +2054,21 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_lock(&kvm->lock);
vcore = NULL;
err = -EINVAL;
- core = id / kvm->arch.smt_mode;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ BUG_ON(kvm->arch.smt_mode != 1);
+ core = kvmppc_pack_vcpu_id(kvm, id);
+ } else {
+ core = id / kvm->arch.smt_mode;
+ }
if (core < KVM_MAX_VCORES) {
vcore = kvm->arch.vcores[core];
- if (!vcore) {
+ if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pr_devel("KVM: collision on id %u", id);
+ vcore = NULL;
+ } else if (!vcore) {
err = -ENOMEM;
- vcore = kvmppc_vcore_create(kvm, core);
+ vcore = kvmppc_vcore_create(kvm,
+ id & ~(kvm->arch.smt_mode - 1));
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
}