ANDROID: KVM: arm64: Eagerly coalesce host page tables
Rather than zapping tables with no refcount, and leaving it to the
demand-fault path to install a block mapping, simply install the
mapping immediately.
Bug: 278749606
Bug: 308373293
Bug: 311571169
Bug: 357781595
Change-Id: Icaad23a65b21399b8d75014677bf6359a6d90f17
Signed-off-by: Keir Fraser <keirf@google.com>
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index b6378a6..e3f6e47 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1047,37 +1047,70 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
return 0;
}
+static void debug_check_table_before_coalescing(
+ const struct kvm_pgtable_visit_ctx *ctx,
+ struct stage2_map_data *data,
+ kvm_pte_t *ptep, u64 pa)
+{
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ u64 granule = kvm_granule_size(ctx->level + 1);
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pa += granule) {
+ kvm_pte_t pte = kvm_init_valid_leaf_pte(
+ pa, data->attr, ctx->level + 1);
+ WARN_ON(pte != *ptep);
+ }
+#endif
+}
+
static int stage2_coalesce_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx,
struct stage2_map_data *data)
{
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
- kvm_pte_t *childp = kvm_pte_follow(*ctx->ptep, mm_ops);
+ kvm_pte_t new, *childp = kvm_pte_follow(ctx->old, mm_ops);
+ u64 size, addr;
/*
- * Decrement the refcount only on the set ownership path to avoid a
- * loop situation when the following happens:
- * 1. We take a host stage2 fault and we create a small mapping which
- * has default attributes (is not refcounted).
- * 2. On the way back we execute the post handler and we zap the
- * table that holds our mapping.
+ * We don't want to coalesce during pkvm initialisation, before the
+ * overall structure of the host S2 table is created.
*/
- if (kvm_phys_is_valid(data->phys) ||
- !kvm_level_supports_block_mapping(ctx->level))
+ if (!static_branch_likely(&kvm_protected_mode_initialized))
return 0;
/*
- * Free a page that is not referenced anymore and drop the reference
- * of the page table page.
+ * If we installed a non-refcounted valid mapping, and the table has no
+ * other raised references, then we can immediately collapse to a block
+ * mapping.
*/
- if (mm_ops->page_count(childp) == 1) {
- u64 size = kvm_granule_size(ctx->level);
- u64 addr = ALIGN_DOWN(ctx->addr, size);
+ if (!kvm_phys_is_valid(data->phys) ||
+ !kvm_level_supports_block_mapping(ctx->level) ||
+ (mm_ops->page_count(childp) != 1))
+ return 0;
- kvm_clear_pte(ctx->ptep);
- kvm_tlb_flush_vmid_range(data->mmu, addr, size);
- mm_ops->put_page(ctx->ptep);
- mm_ops->put_page(childp);
- }
+ /*
+ * This should apply only to the host S2, which does not refcount its
+ * default memory and mmio mappings.
+ */
+ WARN_ON(!(data->mmu->pgt->flags & KVM_PGTABLE_S2_IDMAP));
+
+ size = kvm_granule_size(ctx->level);
+ addr = ALIGN_DOWN(ctx->addr, size);
+
+ debug_check_table_before_coalescing(ctx, data, childp, addr);
+
+ new = kvm_init_valid_leaf_pte(addr, data->attr, ctx->level);
+
+ /* Breaking must succeed, as this is not a shared walk. */
+ WARN_ON(!stage2_try_break_pte(ctx, data->mmu));
+
+ /* Host doesn't require CMOs. */
+ WARN_ON(mm_ops->dcache_clean_inval_poc || mm_ops->icache_inval_pou);
+
+ stage2_make_pte(ctx, new);
+
+ /* Finally, free the unlinked table. */
+ mm_ops->put_page(childp);
return 0;
}
@@ -1125,7 +1158,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
.cb = stage2_map_walker,
.flags = flags |
KVM_PGTABLE_WALK_TABLE_PRE |
- KVM_PGTABLE_WALK_LEAF,
+ KVM_PGTABLE_WALK_LEAF |
+ KVM_PGTABLE_WALK_TABLE_POST,
.arg = &map_data,
};
@@ -1158,8 +1192,7 @@ int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
.flags = KVM_PGTABLE_WALK_TABLE_PRE |
- KVM_PGTABLE_WALK_LEAF |
- KVM_PGTABLE_WALK_TABLE_POST,
+ KVM_PGTABLE_WALK_LEAF,
.arg = &map_data,
};
@@ -1249,11 +1282,29 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
static int stage2_reclaim_leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
- /* XXX: Once coalescing is ready!
- * struct stage2_map_data *data = ctx->arg;
- *
- * stage2_coalesce_walk_table_post(ctx, data);
+ struct stage2_map_data *data = ctx->arg;
+ struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+ kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
+ u64 size, addr;
+
+ /*
+ * If this table's refcount is not raised, we can safely discard it.
+ * Any mappings that it contains can be re-created on demand.
*/
+ if (!kvm_level_supports_block_mapping(ctx->level) ||
+ (mm_ops->page_count(childp) != 1))
+ return 0;
+
+ size = kvm_granule_size(ctx->level);
+ addr = ALIGN_DOWN(ctx->addr, size);
+
+ /* Unlink the table and flush TLBs. */
+ kvm_clear_pte(ctx->ptep);
+ kvm_tlb_flush_vmid_range(data->mmu, addr, size);
+
+ /* Free the unlinked table, and drop its reference in the parent. */
+ mm_ops->put_page(ctx->ptep);
+ mm_ops->put_page(childp);
return 0;
}