| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Support KVM gust page tracking |
| * |
| * This feature allows us to track page access in guest. Currently, only |
| * write access is tracked. |
| * |
| * Copyright(C) 2015 Intel Corporation. |
| * |
| * Author: |
| * Xiao Guangrong <guangrong.xiao@linux.intel.com> |
| */ |
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| |
| #include <linux/lockdep.h> |
| #include <linux/kvm_host.h> |
| #include <linux/rculist.h> |
| |
| #include "mmu.h" |
| #include "mmu_internal.h" |
| #include "page_track.h" |
| |
| static bool kvm_external_write_tracking_enabled(struct kvm *kvm) |
| { |
| #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING |
| /* |
| * Read external_write_tracking_enabled before related pointers. Pairs |
| * with the smp_store_release in kvm_page_track_write_tracking_enable(). |
| */ |
| return smp_load_acquire(&kvm->arch.external_write_tracking_enabled); |
| #else |
| return false; |
| #endif |
| } |
| |
| bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) |
| { |
| return kvm_external_write_tracking_enabled(kvm) || |
| kvm_shadow_root_allocated(kvm) || !tdp_enabled; |
| } |
| |
| void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) |
| { |
| vfree(slot->arch.gfn_write_track); |
| slot->arch.gfn_write_track = NULL; |
| } |
| |
| static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot, |
| unsigned long npages) |
| { |
| const size_t size = sizeof(*slot->arch.gfn_write_track); |
| |
| if (!slot->arch.gfn_write_track) |
| slot->arch.gfn_write_track = __vcalloc(npages, size, |
| GFP_KERNEL_ACCOUNT); |
| |
| return slot->arch.gfn_write_track ? 0 : -ENOMEM; |
| } |
| |
| int kvm_page_track_create_memslot(struct kvm *kvm, |
| struct kvm_memory_slot *slot, |
| unsigned long npages) |
| { |
| if (!kvm_page_track_write_tracking_enabled(kvm)) |
| return 0; |
| |
| return __kvm_page_track_write_tracking_alloc(slot, npages); |
| } |
| |
| int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) |
| { |
| return __kvm_page_track_write_tracking_alloc(slot, slot->npages); |
| } |
| |
| static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn, |
| short count) |
| { |
| int index, val; |
| |
| index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); |
| |
| val = slot->arch.gfn_write_track[index]; |
| |
| if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX)) |
| return; |
| |
| slot->arch.gfn_write_track[index] += count; |
| } |
| |
| void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, |
| gfn_t gfn) |
| { |
| lockdep_assert_held_write(&kvm->mmu_lock); |
| |
| lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || |
| srcu_read_lock_held(&kvm->srcu)); |
| |
| if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) |
| return; |
| |
| update_gfn_write_track(slot, gfn, 1); |
| |
| /* |
| * new track stops large page mapping for the |
| * tracked page. |
| */ |
| kvm_mmu_gfn_disallow_lpage(slot, gfn); |
| |
| if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K)) |
| kvm_flush_remote_tlbs(kvm); |
| } |
| |
| void __kvm_write_track_remove_gfn(struct kvm *kvm, |
| struct kvm_memory_slot *slot, gfn_t gfn) |
| { |
| lockdep_assert_held_write(&kvm->mmu_lock); |
| |
| lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) || |
| srcu_read_lock_held(&kvm->srcu)); |
| |
| if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm)) |
| return; |
| |
| update_gfn_write_track(slot, gfn, -1); |
| |
| /* |
| * allow large page mapping for the tracked page |
| * after the tracker is gone. |
| */ |
| kvm_mmu_gfn_allow_lpage(slot, gfn); |
| } |
| |
| /* |
| * check if the corresponding access on the specified guest page is tracked. |
| */ |
| bool kvm_gfn_is_write_tracked(struct kvm *kvm, |
| const struct kvm_memory_slot *slot, gfn_t gfn) |
| { |
| int index; |
| |
| if (!slot) |
| return false; |
| |
| if (!kvm_page_track_write_tracking_enabled(kvm)) |
| return false; |
| |
| index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); |
| return !!READ_ONCE(slot->arch.gfn_write_track[index]); |
| } |
| |
| #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING |
| void kvm_page_track_cleanup(struct kvm *kvm) |
| { |
| struct kvm_page_track_notifier_head *head; |
| |
| head = &kvm->arch.track_notifier_head; |
| cleanup_srcu_struct(&head->track_srcu); |
| } |
| |
| int kvm_page_track_init(struct kvm *kvm) |
| { |
| struct kvm_page_track_notifier_head *head; |
| |
| head = &kvm->arch.track_notifier_head; |
| INIT_HLIST_HEAD(&head->track_notifier_list); |
| return init_srcu_struct(&head->track_srcu); |
| } |
| |
| static int kvm_enable_external_write_tracking(struct kvm *kvm) |
| { |
| struct kvm_memslots *slots; |
| struct kvm_memory_slot *slot; |
| int r = 0, i, bkt; |
| |
| mutex_lock(&kvm->slots_arch_lock); |
| |
| /* |
| * Check for *any* write tracking user (not just external users) under |
| * lock. This avoids unnecessary work, e.g. if KVM itself is using |
| * write tracking, or if two external users raced when registering. |
| */ |
| if (kvm_page_track_write_tracking_enabled(kvm)) |
| goto out_success; |
| |
| for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { |
| slots = __kvm_memslots(kvm, i); |
| kvm_for_each_memslot(slot, bkt, slots) { |
| /* |
| * Intentionally do NOT free allocations on failure to |
| * avoid having to track which allocations were made |
| * now versus when the memslot was created. The |
| * metadata is guaranteed to be freed when the slot is |
| * freed, and will be kept/used if userspace retries |
| * the failed ioctl() instead of killing the VM. |
| */ |
| r = kvm_page_track_write_tracking_alloc(slot); |
| if (r) |
| goto out_unlock; |
| } |
| } |
| |
| out_success: |
| /* |
| * Ensure that external_write_tracking_enabled becomes true strictly |
| * after all the related pointers are set. |
| */ |
| smp_store_release(&kvm->arch.external_write_tracking_enabled, true); |
| out_unlock: |
| mutex_unlock(&kvm->slots_arch_lock); |
| return r; |
| } |
| |
| /* |
| * register the notifier so that event interception for the tracked guest |
| * pages can be received. |
| */ |
| int kvm_page_track_register_notifier(struct kvm *kvm, |
| struct kvm_page_track_notifier_node *n) |
| { |
| struct kvm_page_track_notifier_head *head; |
| int r; |
| |
| if (!kvm || kvm->mm != current->mm) |
| return -ESRCH; |
| |
| if (!kvm_external_write_tracking_enabled(kvm)) { |
| r = kvm_enable_external_write_tracking(kvm); |
| if (r) |
| return r; |
| } |
| |
| kvm_get_kvm(kvm); |
| |
| head = &kvm->arch.track_notifier_head; |
| |
| write_lock(&kvm->mmu_lock); |
| hlist_add_head_rcu(&n->node, &head->track_notifier_list); |
| write_unlock(&kvm->mmu_lock); |
| return 0; |
| } |
| EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier); |
| |
| /* |
| * stop receiving the event interception. It is the opposed operation of |
| * kvm_page_track_register_notifier(). |
| */ |
| void kvm_page_track_unregister_notifier(struct kvm *kvm, |
| struct kvm_page_track_notifier_node *n) |
| { |
| struct kvm_page_track_notifier_head *head; |
| |
| head = &kvm->arch.track_notifier_head; |
| |
| write_lock(&kvm->mmu_lock); |
| hlist_del_rcu(&n->node); |
| write_unlock(&kvm->mmu_lock); |
| synchronize_srcu(&head->track_srcu); |
| |
| kvm_put_kvm(kvm); |
| } |
| EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); |
| |
| /* |
| * Notify the node that write access is intercepted and write emulation is |
| * finished at this time. |
| * |
| * The node should figure out if the written page is the one that node is |
| * interested in by itself. |
| */ |
| void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes) |
| { |
| struct kvm_page_track_notifier_head *head; |
| struct kvm_page_track_notifier_node *n; |
| int idx; |
| |
| head = &kvm->arch.track_notifier_head; |
| |
| if (hlist_empty(&head->track_notifier_list)) |
| return; |
| |
| idx = srcu_read_lock(&head->track_srcu); |
| hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, |
| srcu_read_lock_held(&head->track_srcu)) |
| if (n->track_write) |
| n->track_write(gpa, new, bytes, n); |
| srcu_read_unlock(&head->track_srcu, idx); |
| } |
| |
| /* |
| * Notify external page track nodes that a memory region is being removed from |
| * the VM, e.g. so that users can free any associated metadata. |
| */ |
| void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot) |
| { |
| struct kvm_page_track_notifier_head *head; |
| struct kvm_page_track_notifier_node *n; |
| int idx; |
| |
| head = &kvm->arch.track_notifier_head; |
| |
| if (hlist_empty(&head->track_notifier_list)) |
| return; |
| |
| idx = srcu_read_lock(&head->track_srcu); |
| hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, |
| srcu_read_lock_held(&head->track_srcu)) |
| if (n->track_remove_region) |
| n->track_remove_region(slot->base_gfn, slot->npages, n); |
| srcu_read_unlock(&head->track_srcu, idx); |
| } |
| |
| /* |
| * add guest page to the tracking pool so that corresponding access on that |
| * page will be intercepted. |
| * |
| * @kvm: the guest instance we are interested in. |
| * @gfn: the guest page. |
| */ |
| int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn) |
| { |
| struct kvm_memory_slot *slot; |
| int idx; |
| |
| idx = srcu_read_lock(&kvm->srcu); |
| |
| slot = gfn_to_memslot(kvm, gfn); |
| if (!slot) { |
| srcu_read_unlock(&kvm->srcu, idx); |
| return -EINVAL; |
| } |
| |
| write_lock(&kvm->mmu_lock); |
| __kvm_write_track_add_gfn(kvm, slot, gfn); |
| write_unlock(&kvm->mmu_lock); |
| |
| srcu_read_unlock(&kvm->srcu, idx); |
| |
| return 0; |
| } |
| EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn); |
| |
| /* |
| * remove the guest page from the tracking pool which stops the interception |
| * of corresponding access on that page. |
| * |
| * @kvm: the guest instance we are interested in. |
| * @gfn: the guest page. |
| */ |
| int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn) |
| { |
| struct kvm_memory_slot *slot; |
| int idx; |
| |
| idx = srcu_read_lock(&kvm->srcu); |
| |
| slot = gfn_to_memslot(kvm, gfn); |
| if (!slot) { |
| srcu_read_unlock(&kvm->srcu, idx); |
| return -EINVAL; |
| } |
| |
| write_lock(&kvm->mmu_lock); |
| __kvm_write_track_remove_gfn(kvm, slot, gfn); |
| write_unlock(&kvm->mmu_lock); |
| |
| srcu_read_unlock(&kvm->srcu, idx); |
| |
| return 0; |
| } |
| EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn); |
| #endif |