KVM: MMU: Push clean gpte write protection out of gpte_access()
gpte_access() computes the access permissions of a guest pte and also
write-protects clean gptes. This is wrong when we are servicing a
write fault (since we'll be setting the dirty bit momentarily) but
correct when instantiating a speculative spte, or when servicing a
read fault (since we'll want to trap a following write in order to
set the dirty bit).
It doesn't seem to hurt in practice, but in order to make the code
readable, push the write protection out of gpte_access() and into
a new protect_clean_gpte() which is called explicitly when needed.
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bf8c42b..bf7b4ff 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -101,14 +101,11 @@
return (ret != orig_pte);
}
-static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte,
- bool last)
+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
unsigned access;
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
- if (last && !is_dirty_gpte(gpte))
- access &= ~ACC_WRITE_MASK;
#if PTTYPE == 64
if (vcpu->arch.mmu.nx)
@@ -222,8 +219,7 @@
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
if (last_gpte) {
- pte_access = pt_access &
- FNAME(gpte_access)(vcpu, pte, true);
+ pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
/* check if the kernel is fetching from user page */
if (unlikely(pte_access & PT_USER_MASK) &&
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
@@ -274,7 +270,7 @@
break;
}
- pt_access &= FNAME(gpte_access)(vcpu, pte, false);
+ pt_access &= FNAME(gpte_access)(vcpu, pte);
--walker->level;
}
@@ -283,7 +279,9 @@
goto error;
}
- if (write_fault && unlikely(!is_dirty_gpte(pte))) {
+ if (!write_fault)
+ protect_clean_gpte(&pte_access, pte);
+ else if (unlikely(!is_dirty_gpte(pte))) {
int ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
@@ -368,7 +366,8 @@
return;
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
- pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
+ pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ protect_clean_gpte(&pte_access, gpte);
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
if (mmu_invalid_pfn(pfn))
return;
@@ -441,8 +440,8 @@
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
continue;
- pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
- true);
+ pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ protect_clean_gpte(&pte_access, gpte);
gfn = gpte_to_gfn(gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
pte_access & ACC_WRITE_MASK);
@@ -794,7 +793,8 @@
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
- pte_access &= FNAME(gpte_access)(vcpu, gpte, true);
+ pte_access &= FNAME(gpte_access)(vcpu, gpte);
+ protect_clean_gpte(&pte_access, gpte);
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
continue;