| // SPDX-License-Identifier: MIT |
| /* |
| * Copyright © 2021 Intel Corporation |
| */ |
| |
| #include <linux/interval_tree_generic.h> |
| #include <linux/sched/mm.h> |
| |
| #include "i915_sw_fence.h" |
| #include "i915_vma_resource.h" |
| #include "i915_drv.h" |
| #include "intel_memory_region.h" |
| |
| #include "gt/intel_gtt.h" |
| |
| static struct kmem_cache *slab_vma_resources; |
| |
| /** |
| * DOC: |
| * We use a per-vm interval tree to keep track of vma_resources |
| * scheduled for unbind but not yet unbound. The tree is protected by |
| * the vm mutex, and nodes are removed just after the unbind fence signals. |
| * The removal takes the vm mutex from a kernel thread which we need to |
| * keep in mind so that we don't grab the mutex and try to wait for all |
| * pending unbinds to complete, because that will temporaryily block many |
| * of the workqueue threads, and people will get angry. |
| * |
| * We should consider using a single ordered fence per VM instead but that |
| * requires ordering the unbinds and might introduce unnecessary waiting |
| * for unrelated unbinds. Amount of code will probably be roughly the same |
| * due to the simplicity of using the interval tree interface. |
| * |
| * Another drawback of this interval tree is that the complexity of insertion |
| * and removal of fences increases as O(ln(pending_unbinds)) instead of |
| * O(1) for a single fence without interval tree. |
| */ |
| #define VMA_RES_START(_node) ((_node)->start - (_node)->guard) |
| #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1) |
| INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, |
| u64, __subtree_last, |
| VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); |
| |
| /* Callbacks for the unbind dma-fence. */ |
| |
| /** |
| * i915_vma_resource_alloc - Allocate a vma resource |
| * |
| * Return: A pointer to a cleared struct i915_vma_resource or |
| * a -ENOMEM error pointer if allocation fails. |
| */ |
| struct i915_vma_resource *i915_vma_resource_alloc(void) |
| { |
| struct i915_vma_resource *vma_res = |
| kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL); |
| |
| return vma_res ? vma_res : ERR_PTR(-ENOMEM); |
| } |
| |
| /** |
| * i915_vma_resource_free - Free a vma resource |
| * @vma_res: The vma resource to free. |
| */ |
| void i915_vma_resource_free(struct i915_vma_resource *vma_res) |
| { |
| if (vma_res) |
| kmem_cache_free(slab_vma_resources, vma_res); |
| } |
| |
| static const char *get_driver_name(struct dma_fence *fence) |
| { |
| return "vma unbind fence"; |
| } |
| |
| static const char *get_timeline_name(struct dma_fence *fence) |
| { |
| return "unbound"; |
| } |
| |
| static void unbind_fence_free_rcu(struct rcu_head *head) |
| { |
| struct i915_vma_resource *vma_res = |
| container_of(head, typeof(*vma_res), unbind_fence.rcu); |
| |
| i915_vma_resource_free(vma_res); |
| } |
| |
| static void unbind_fence_release(struct dma_fence *fence) |
| { |
| struct i915_vma_resource *vma_res = |
| container_of(fence, typeof(*vma_res), unbind_fence); |
| |
| i915_sw_fence_fini(&vma_res->chain); |
| |
| call_rcu(&fence->rcu, unbind_fence_free_rcu); |
| } |
| |
| static struct dma_fence_ops unbind_fence_ops = { |
| .get_driver_name = get_driver_name, |
| .get_timeline_name = get_timeline_name, |
| .release = unbind_fence_release, |
| }; |
| |
| static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) |
| { |
| struct i915_address_space *vm; |
| |
| if (!refcount_dec_and_test(&vma_res->hold_count)) |
| return; |
| |
| dma_fence_signal(&vma_res->unbind_fence); |
| |
| vm = vma_res->vm; |
| if (vma_res->wakeref) |
| intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref); |
| |
| vma_res->vm = NULL; |
| if (!RB_EMPTY_NODE(&vma_res->rb)) { |
| mutex_lock(&vm->mutex); |
| vma_res_itree_remove(vma_res, &vm->pending_unbind); |
| mutex_unlock(&vm->mutex); |
| } |
| |
| if (vma_res->bi.pages_rsgt) |
| i915_refct_sgt_put(vma_res->bi.pages_rsgt); |
| } |
| |
| /** |
| * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind |
| * fence. |
| * @vma_res: The vma resource. |
| * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold. |
| * |
| * The function may leave a dma_fence critical section. |
| */ |
| void i915_vma_resource_unhold(struct i915_vma_resource *vma_res, |
| bool lockdep_cookie) |
| { |
| dma_fence_end_signalling(lockdep_cookie); |
| |
| if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { |
| unsigned long irq_flags; |
| |
| /* Inefficient open-coded might_lock_irqsave() */ |
| spin_lock_irqsave(&vma_res->lock, irq_flags); |
| spin_unlock_irqrestore(&vma_res->lock, irq_flags); |
| } |
| |
| __i915_vma_resource_unhold(vma_res); |
| } |
| |
| /** |
| * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence. |
| * @vma_res: The vma resource. |
| * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should |
| * be given as an argument to the pairing i915_vma_resource_unhold. |
| * |
| * If returning true, the function enters a dma_fence signalling critical |
| * section if not in one already. |
| * |
| * Return: true if holding successful, false if not. |
| */ |
| bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, |
| bool *lockdep_cookie) |
| { |
| bool held = refcount_inc_not_zero(&vma_res->hold_count); |
| |
| if (held) |
| *lockdep_cookie = dma_fence_begin_signalling(); |
| |
| return held; |
| } |
| |
| static void i915_vma_resource_unbind_work(struct work_struct *work) |
| { |
| struct i915_vma_resource *vma_res = |
| container_of(work, typeof(*vma_res), work); |
| struct i915_address_space *vm = vma_res->vm; |
| bool lockdep_cookie; |
| |
| lockdep_cookie = dma_fence_begin_signalling(); |
| if (likely(!vma_res->skip_pte_rewrite)) |
| vma_res->ops->unbind_vma(vm, vma_res); |
| |
| dma_fence_end_signalling(lockdep_cookie); |
| __i915_vma_resource_unhold(vma_res); |
| i915_vma_resource_put(vma_res); |
| } |
| |
| static int |
| i915_vma_resource_fence_notify(struct i915_sw_fence *fence, |
| enum i915_sw_fence_notify state) |
| { |
| struct i915_vma_resource *vma_res = |
| container_of(fence, typeof(*vma_res), chain); |
| struct dma_fence *unbind_fence = |
| &vma_res->unbind_fence; |
| |
| switch (state) { |
| case FENCE_COMPLETE: |
| dma_fence_get(unbind_fence); |
| if (vma_res->immediate_unbind) { |
| i915_vma_resource_unbind_work(&vma_res->work); |
| } else { |
| INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); |
| queue_work(system_unbound_wq, &vma_res->work); |
| } |
| break; |
| case FENCE_FREE: |
| i915_vma_resource_put(vma_res); |
| break; |
| } |
| |
| return NOTIFY_DONE; |
| } |
| |
| /** |
| * i915_vma_resource_unbind - Unbind a vma resource |
| * @vma_res: The vma resource to unbind. |
| * @tlb: pointer to vma->obj->mm.tlb associated with the resource |
| * to be stored at vma_res->tlb. When not-NULL, it will be used |
| * to do TLB cache invalidation before freeing a VMA resource. |
| * Used only for async unbind. |
| * |
| * At this point this function does little more than publish a fence that |
| * signals immediately unless signaling is held back. |
| * |
| * Return: A refcounted pointer to a dma-fence that signals when unbinding is |
| * complete. |
| */ |
| struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, |
| u32 *tlb) |
| { |
| struct i915_address_space *vm = vma_res->vm; |
| |
| vma_res->tlb = tlb; |
| |
| /* Reference for the sw fence */ |
| i915_vma_resource_get(vma_res); |
| |
| /* Caller must already have a wakeref in this case. */ |
| if (vma_res->needs_wakeref) |
| vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm); |
| |
| if (atomic_read(&vma_res->chain.pending) <= 1) { |
| RB_CLEAR_NODE(&vma_res->rb); |
| vma_res->immediate_unbind = 1; |
| } else { |
| vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind); |
| } |
| |
| i915_sw_fence_commit(&vma_res->chain); |
| |
| return &vma_res->unbind_fence; |
| } |
| |
| /** |
| * __i915_vma_resource_init - Initialize a vma resource. |
| * @vma_res: The vma resource to initialize |
| * |
| * Initializes the private members of a vma resource. |
| */ |
| void __i915_vma_resource_init(struct i915_vma_resource *vma_res) |
| { |
| spin_lock_init(&vma_res->lock); |
| dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, |
| &vma_res->lock, 0, 0); |
| refcount_set(&vma_res->hold_count, 1); |
| i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); |
| } |
| |
| static void |
| i915_vma_resource_color_adjust_range(struct i915_address_space *vm, |
| u64 *start, |
| u64 *end) |
| { |
| if (i915_vm_has_cache_coloring(vm)) { |
| if (*start) |
| *start -= I915_GTT_PAGE_SIZE; |
| *end += I915_GTT_PAGE_SIZE; |
| } |
| } |
| |
| /** |
| * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a |
| * certain vm range. |
| * @vm: The vm to look at. |
| * @offset: The range start. |
| * @size: The range size. |
| * @intr: Whether to wait interrubtible. |
| * |
| * The function needs to be called with the vm lock held. |
| * |
| * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true |
| */ |
| int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, |
| u64 offset, |
| u64 size, |
| bool intr) |
| { |
| struct i915_vma_resource *node; |
| u64 last = offset + size - 1; |
| |
| lockdep_assert_held(&vm->mutex); |
| might_sleep(); |
| |
| i915_vma_resource_color_adjust_range(vm, &offset, &last); |
| node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); |
| while (node) { |
| int ret = dma_fence_wait(&node->unbind_fence, intr); |
| |
| if (ret) |
| return ret; |
| |
| node = vma_res_itree_iter_next(node, offset, last); |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, |
| * releasing the vm lock while waiting. |
| * @vm: The vm to look at. |
| * |
| * The function may not be called with the vm lock held. |
| * Typically this is called at vm destruction to finish any pending |
| * unbind operations. The vm mutex is released while waiting to avoid |
| * stalling kernel workqueues trying to grab the mutex. |
| */ |
| void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) |
| { |
| struct i915_vma_resource *node; |
| struct dma_fence *fence; |
| |
| do { |
| fence = NULL; |
| mutex_lock(&vm->mutex); |
| node = vma_res_itree_iter_first(&vm->pending_unbind, 0, |
| U64_MAX); |
| if (node) |
| fence = dma_fence_get_rcu(&node->unbind_fence); |
| mutex_unlock(&vm->mutex); |
| |
| if (fence) { |
| /* |
| * The wait makes sure the node eventually removes |
| * itself from the tree. |
| */ |
| dma_fence_wait(fence, false); |
| dma_fence_put(fence); |
| } |
| } while (node); |
| } |
| |
| /** |
| * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all |
| * pending unbinds in a certain range of a vm. |
| * @vm: The vm to look at. |
| * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. |
| * @offset: The range start. |
| * @size: The range size. |
| * @intr: Whether to wait interrubtible. |
| * @gfp: Allocation mode for memory allocations. |
| * |
| * The function makes @sw_fence await all pending unbinds in a certain |
| * vm range before calling the complete notifier. To be able to await |
| * each individual unbind, the function needs to allocate memory using |
| * the @gpf allocation mode. If that fails, the function will instead |
| * wait for the unbind fence to signal, using @intr to judge whether to |
| * wait interruptible or not. Note that @gfp should ideally be selected so |
| * as to avoid any expensive memory allocation stalls and rather fail and |
| * synchronize itself. For now the vm mutex is required when calling this |
| * function with means that @gfp can't call into direct reclaim. In reality |
| * this means that during heavy memory pressure, we will sync in this |
| * function. |
| * |
| * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true |
| */ |
| int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, |
| struct i915_sw_fence *sw_fence, |
| u64 offset, |
| u64 size, |
| bool intr, |
| gfp_t gfp) |
| { |
| struct i915_vma_resource *node; |
| u64 last = offset + size - 1; |
| |
| lockdep_assert_held(&vm->mutex); |
| might_alloc(gfp); |
| might_sleep(); |
| |
| i915_vma_resource_color_adjust_range(vm, &offset, &last); |
| node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); |
| while (node) { |
| int ret; |
| |
| ret = i915_sw_fence_await_dma_fence(sw_fence, |
| &node->unbind_fence, |
| 0, gfp); |
| if (ret < 0) { |
| ret = dma_fence_wait(&node->unbind_fence, intr); |
| if (ret) |
| return ret; |
| } |
| |
| node = vma_res_itree_iter_next(node, offset, last); |
| } |
| |
| return 0; |
| } |
| |
| void i915_vma_resource_module_exit(void) |
| { |
| kmem_cache_destroy(slab_vma_resources); |
| } |
| |
| int __init i915_vma_resource_module_init(void) |
| { |
| slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); |
| if (!slab_vma_resources) |
| return -ENOMEM; |
| |
| return 0; |
| } |