| // SPDX-License-Identifier: GPL-2.0 or MIT |
| /* Copyright 2023 Collabora ltd. */ |
| |
| #include <linux/iosys-map.h> |
| #include <linux/rwsem.h> |
| |
| #include <drm/panthor_drm.h> |
| |
| #include "panthor_device.h" |
| #include "panthor_gem.h" |
| #include "panthor_heap.h" |
| #include "panthor_mmu.h" |
| #include "panthor_regs.h" |
| |
| /* |
| * The GPU heap context is an opaque structure used by the GPU to track the |
| * heap allocations. The driver should only touch it to initialize it (zero all |
| * fields). Because the CPU and GPU can both access this structure it is |
| * required to be GPU cache line aligned. |
| */ |
| #define HEAP_CONTEXT_SIZE 32 |
| |
| /** |
| * struct panthor_heap_chunk_header - Heap chunk header |
| */ |
| struct panthor_heap_chunk_header { |
| /** |
| * @next: Next heap chunk in the list. |
| * |
| * This is a GPU VA. |
| */ |
| u64 next; |
| |
| /** @unknown: MBZ. */ |
| u32 unknown[14]; |
| }; |
| |
| /** |
| * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. |
| */ |
| struct panthor_heap_chunk { |
| /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ |
| struct list_head node; |
| |
| /** @bo: Buffer object backing the heap chunk. */ |
| struct panthor_kernel_bo *bo; |
| }; |
| |
| /** |
| * struct panthor_heap - Structure used to manage tiler heap contexts. |
| */ |
| struct panthor_heap { |
| /** @chunks: List containing all heap chunks allocated so far. */ |
| struct list_head chunks; |
| |
| /** @lock: Lock protecting insertion in the chunks list. */ |
| struct mutex lock; |
| |
| /** @chunk_size: Size of each chunk. */ |
| u32 chunk_size; |
| |
| /** @max_chunks: Maximum number of chunks. */ |
| u32 max_chunks; |
| |
| /** |
| * @target_in_flight: Number of in-flight render passes after which |
| * we'd let the FW wait for fragment job to finish instead of allocating new chunks. |
| */ |
| u32 target_in_flight; |
| |
| /** @chunk_count: Number of heap chunks currently allocated. */ |
| u32 chunk_count; |
| }; |
| |
| #define MAX_HEAPS_PER_POOL 128 |
| |
| /** |
| * struct panthor_heap_pool - Pool of heap contexts |
| * |
| * The pool is attached to a panthor_file and can't be shared across processes. |
| */ |
| struct panthor_heap_pool { |
| /** @refcount: Reference count. */ |
| struct kref refcount; |
| |
| /** @ptdev: Device. */ |
| struct panthor_device *ptdev; |
| |
| /** @vm: VM this pool is bound to. */ |
| struct panthor_vm *vm; |
| |
| /** @lock: Lock protecting access to @xa. */ |
| struct rw_semaphore lock; |
| |
| /** @xa: Array storing panthor_heap objects. */ |
| struct xarray xa; |
| |
| /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ |
| struct panthor_kernel_bo *gpu_contexts; |
| }; |
| |
| static int panthor_heap_ctx_stride(struct panthor_device *ptdev) |
| { |
| u32 l2_features = ptdev->gpu_info.l2_features; |
| u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); |
| |
| return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); |
| } |
| |
| static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) |
| { |
| return panthor_heap_ctx_stride(pool->ptdev) * id; |
| } |
| |
| static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) |
| { |
| return pool->gpu_contexts->kmap + |
| panthor_get_heap_ctx_offset(pool, id); |
| } |
| |
| static void panthor_free_heap_chunk(struct panthor_vm *vm, |
| struct panthor_heap *heap, |
| struct panthor_heap_chunk *chunk) |
| { |
| mutex_lock(&heap->lock); |
| list_del(&chunk->node); |
| heap->chunk_count--; |
| mutex_unlock(&heap->lock); |
| |
| panthor_kernel_bo_destroy(chunk->bo); |
| kfree(chunk); |
| } |
| |
| static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, |
| struct panthor_vm *vm, |
| struct panthor_heap *heap, |
| bool initial_chunk) |
| { |
| struct panthor_heap_chunk *chunk; |
| struct panthor_heap_chunk_header *hdr; |
| int ret; |
| |
| chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); |
| if (!chunk) |
| return -ENOMEM; |
| |
| chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size, |
| DRM_PANTHOR_BO_NO_MMAP, |
| DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, |
| PANTHOR_VM_KERNEL_AUTO_VA); |
| if (IS_ERR(chunk->bo)) { |
| ret = PTR_ERR(chunk->bo); |
| goto err_free_chunk; |
| } |
| |
| ret = panthor_kernel_bo_vmap(chunk->bo); |
| if (ret) |
| goto err_destroy_bo; |
| |
| hdr = chunk->bo->kmap; |
| memset(hdr, 0, sizeof(*hdr)); |
| |
| if (initial_chunk && !list_empty(&heap->chunks)) { |
| struct panthor_heap_chunk *prev_chunk; |
| u64 prev_gpuva; |
| |
| prev_chunk = list_first_entry(&heap->chunks, |
| struct panthor_heap_chunk, |
| node); |
| |
| prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); |
| hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | |
| (heap->chunk_size >> 12); |
| } |
| |
| panthor_kernel_bo_vunmap(chunk->bo); |
| |
| mutex_lock(&heap->lock); |
| list_add(&chunk->node, &heap->chunks); |
| heap->chunk_count++; |
| mutex_unlock(&heap->lock); |
| |
| return 0; |
| |
| err_destroy_bo: |
| panthor_kernel_bo_destroy(chunk->bo); |
| |
| err_free_chunk: |
| kfree(chunk); |
| |
| return ret; |
| } |
| |
| static void panthor_free_heap_chunks(struct panthor_vm *vm, |
| struct panthor_heap *heap) |
| { |
| struct panthor_heap_chunk *chunk, *tmp; |
| |
| list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) |
| panthor_free_heap_chunk(vm, heap, chunk); |
| } |
| |
| static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, |
| struct panthor_vm *vm, |
| struct panthor_heap *heap, |
| u32 chunk_count) |
| { |
| int ret; |
| u32 i; |
| |
| for (i = 0; i < chunk_count; i++) { |
| ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true); |
| if (ret) |
| return ret; |
| } |
| |
| return 0; |
| } |
| |
| static int |
| panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) |
| { |
| struct panthor_heap *heap; |
| |
| heap = xa_erase(&pool->xa, handle); |
| if (!heap) |
| return -EINVAL; |
| |
| panthor_free_heap_chunks(pool->vm, heap); |
| mutex_destroy(&heap->lock); |
| kfree(heap); |
| return 0; |
| } |
| |
| /** |
| * panthor_heap_destroy() - Destroy a heap context |
| * @pool: Pool this context belongs to. |
| * @handle: Handle returned by panthor_heap_create(). |
| */ |
| int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) |
| { |
| int ret; |
| |
| down_write(&pool->lock); |
| ret = panthor_heap_destroy_locked(pool, handle); |
| up_write(&pool->lock); |
| |
| return ret; |
| } |
| |
| /** |
| * panthor_heap_create() - Create a heap context |
| * @pool: Pool to instantiate the heap context from. |
| * @initial_chunk_count: Number of chunk allocated at initialization time. |
| * Must be at least 1. |
| * @chunk_size: The size of each chunk. Must be page-aligned and lie in the |
| * [128k:8M] range. |
| * @max_chunks: Maximum number of chunks that can be allocated. |
| * @target_in_flight: Maximum number of in-flight render passes. |
| * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap |
| * context. |
| * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk |
| * assigned to the heap context. |
| * |
| * Return: a positive handle on success, a negative error otherwise. |
| */ |
| int panthor_heap_create(struct panthor_heap_pool *pool, |
| u32 initial_chunk_count, |
| u32 chunk_size, |
| u32 max_chunks, |
| u32 target_in_flight, |
| u64 *heap_ctx_gpu_va, |
| u64 *first_chunk_gpu_va) |
| { |
| struct panthor_heap *heap; |
| struct panthor_heap_chunk *first_chunk; |
| struct panthor_vm *vm; |
| int ret = 0; |
| u32 id; |
| |
| if (initial_chunk_count == 0) |
| return -EINVAL; |
| |
| if (initial_chunk_count > max_chunks) |
| return -EINVAL; |
| |
| if (!IS_ALIGNED(chunk_size, PAGE_SIZE) || |
| chunk_size < SZ_128K || chunk_size > SZ_8M) |
| return -EINVAL; |
| |
| down_read(&pool->lock); |
| vm = panthor_vm_get(pool->vm); |
| up_read(&pool->lock); |
| |
| /* The pool has been destroyed, we can't create a new heap. */ |
| if (!vm) |
| return -EINVAL; |
| |
| heap = kzalloc(sizeof(*heap), GFP_KERNEL); |
| if (!heap) { |
| ret = -ENOMEM; |
| goto err_put_vm; |
| } |
| |
| mutex_init(&heap->lock); |
| INIT_LIST_HEAD(&heap->chunks); |
| heap->chunk_size = chunk_size; |
| heap->max_chunks = max_chunks; |
| heap->target_in_flight = target_in_flight; |
| |
| ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap, |
| initial_chunk_count); |
| if (ret) |
| goto err_free_heap; |
| |
| first_chunk = list_first_entry(&heap->chunks, |
| struct panthor_heap_chunk, |
| node); |
| *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); |
| |
| down_write(&pool->lock); |
| /* The pool has been destroyed, we can't create a new heap. */ |
| if (!pool->vm) { |
| ret = -EINVAL; |
| } else { |
| ret = xa_alloc(&pool->xa, &id, heap, |
| XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL); |
| if (!ret) { |
| void *gpu_ctx = panthor_get_heap_ctx(pool, id); |
| |
| memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); |
| *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + |
| panthor_get_heap_ctx_offset(pool, id); |
| } |
| } |
| up_write(&pool->lock); |
| |
| if (ret) |
| goto err_free_heap; |
| |
| panthor_vm_put(vm); |
| return id; |
| |
| err_free_heap: |
| panthor_free_heap_chunks(pool->vm, heap); |
| mutex_destroy(&heap->lock); |
| kfree(heap); |
| |
| err_put_vm: |
| panthor_vm_put(vm); |
| return ret; |
| } |
| |
| /** |
| * panthor_heap_return_chunk() - Return an unused heap chunk |
| * @pool: The pool this heap belongs to. |
| * @heap_gpu_va: The GPU address of the heap context. |
| * @chunk_gpu_va: The chunk VA to return. |
| * |
| * This function is used when a chunk allocated with panthor_heap_grow() |
| * couldn't be linked to the heap context through the FW interface because |
| * the group requesting the allocation was scheduled out in the meantime. |
| */ |
| int panthor_heap_return_chunk(struct panthor_heap_pool *pool, |
| u64 heap_gpu_va, |
| u64 chunk_gpu_va) |
| { |
| u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); |
| u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); |
| struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; |
| struct panthor_heap *heap; |
| int ret; |
| |
| if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) |
| return -EINVAL; |
| |
| down_read(&pool->lock); |
| heap = xa_load(&pool->xa, heap_id); |
| if (!heap) { |
| ret = -EINVAL; |
| goto out_unlock; |
| } |
| |
| chunk_gpu_va &= GENMASK_ULL(63, 12); |
| |
| mutex_lock(&heap->lock); |
| list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { |
| if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { |
| removed = chunk; |
| list_del(&chunk->node); |
| heap->chunk_count--; |
| break; |
| } |
| } |
| mutex_unlock(&heap->lock); |
| |
| if (removed) { |
| panthor_kernel_bo_destroy(chunk->bo); |
| kfree(chunk); |
| ret = 0; |
| } else { |
| ret = -EINVAL; |
| } |
| |
| out_unlock: |
| up_read(&pool->lock); |
| return ret; |
| } |
| |
| /** |
| * panthor_heap_grow() - Make a heap context grow. |
| * @pool: The pool this heap belongs to. |
| * @heap_gpu_va: The GPU address of the heap context. |
| * @renderpasses_in_flight: Number of render passes currently in-flight. |
| * @pending_frag_count: Number of fragment jobs waiting for execution/completion. |
| * @new_chunk_gpu_va: Pointer used to return the chunk VA. |
| * |
| * Return: |
| * - 0 if a new heap was allocated |
| * - -ENOMEM if the tiler context reached the maximum number of chunks |
| * or if too many render passes are in-flight |
| * or if the allocation failed |
| * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid |
| */ |
| int panthor_heap_grow(struct panthor_heap_pool *pool, |
| u64 heap_gpu_va, |
| u32 renderpasses_in_flight, |
| u32 pending_frag_count, |
| u64 *new_chunk_gpu_va) |
| { |
| u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); |
| u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); |
| struct panthor_heap_chunk *chunk; |
| struct panthor_heap *heap; |
| int ret; |
| |
| if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) |
| return -EINVAL; |
| |
| down_read(&pool->lock); |
| heap = xa_load(&pool->xa, heap_id); |
| if (!heap) { |
| ret = -EINVAL; |
| goto out_unlock; |
| } |
| |
| /* If we reached the target in-flight render passes, or if we |
| * reached the maximum number of chunks, let the FW figure another way to |
| * find some memory (wait for render passes to finish, or call the exception |
| * handler provided by the userspace driver, if any). |
| */ |
| if (renderpasses_in_flight > heap->target_in_flight || |
| heap->chunk_count >= heap->max_chunks) { |
| ret = -ENOMEM; |
| goto out_unlock; |
| } |
| |
| /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, |
| * which goes through the blocking allocation path. Ultimately, we |
| * want a non-blocking allocation, so we can immediately report to the |
| * FW when the system is running out of memory. In that case, the FW |
| * can call a user-provided exception handler, which might try to free |
| * some tiler memory by issuing an intermediate fragment job. If the |
| * exception handler can't do anything, it will flag the queue as |
| * faulty so the job that triggered this tiler chunk allocation and all |
| * further jobs in this queue fail immediately instead of having to |
| * wait for the job timeout. |
| */ |
| ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false); |
| if (ret) |
| goto out_unlock; |
| |
| chunk = list_first_entry(&heap->chunks, |
| struct panthor_heap_chunk, |
| node); |
| *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | |
| (heap->chunk_size >> 12); |
| ret = 0; |
| |
| out_unlock: |
| up_read(&pool->lock); |
| return ret; |
| } |
| |
| static void panthor_heap_pool_release(struct kref *refcount) |
| { |
| struct panthor_heap_pool *pool = |
| container_of(refcount, struct panthor_heap_pool, refcount); |
| |
| xa_destroy(&pool->xa); |
| kfree(pool); |
| } |
| |
| /** |
| * panthor_heap_pool_put() - Release a heap pool reference |
| * @pool: Pool to release the reference on. Can be NULL. |
| */ |
| void panthor_heap_pool_put(struct panthor_heap_pool *pool) |
| { |
| if (pool) |
| kref_put(&pool->refcount, panthor_heap_pool_release); |
| } |
| |
| /** |
| * panthor_heap_pool_get() - Get a heap pool reference |
| * @pool: Pool to get the reference on. Can be NULL. |
| * |
| * Return: @pool. |
| */ |
| struct panthor_heap_pool * |
| panthor_heap_pool_get(struct panthor_heap_pool *pool) |
| { |
| if (pool) |
| kref_get(&pool->refcount); |
| |
| return pool; |
| } |
| |
| /** |
| * panthor_heap_pool_create() - Create a heap pool |
| * @ptdev: Device. |
| * @vm: The VM this heap pool will be attached to. |
| * |
| * Heap pools might contain up to 128 heap contexts, and are per-VM. |
| * |
| * Return: A valid pointer on success, a negative error code otherwise. |
| */ |
| struct panthor_heap_pool * |
| panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) |
| { |
| size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * |
| panthor_heap_ctx_stride(ptdev), |
| 4096); |
| struct panthor_heap_pool *pool; |
| int ret = 0; |
| |
| pool = kzalloc(sizeof(*pool), GFP_KERNEL); |
| if (!pool) |
| return ERR_PTR(-ENOMEM); |
| |
| /* We want a weak ref here: the heap pool belongs to the VM, so we're |
| * sure that, as long as the heap pool exists, the VM exists too. |
| */ |
| pool->vm = vm; |
| pool->ptdev = ptdev; |
| init_rwsem(&pool->lock); |
| xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); |
| kref_init(&pool->refcount); |
| |
| pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, |
| DRM_PANTHOR_BO_NO_MMAP, |
| DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, |
| PANTHOR_VM_KERNEL_AUTO_VA); |
| if (IS_ERR(pool->gpu_contexts)) { |
| ret = PTR_ERR(pool->gpu_contexts); |
| goto err_destroy_pool; |
| } |
| |
| ret = panthor_kernel_bo_vmap(pool->gpu_contexts); |
| if (ret) |
| goto err_destroy_pool; |
| |
| return pool; |
| |
| err_destroy_pool: |
| panthor_heap_pool_destroy(pool); |
| return ERR_PTR(ret); |
| } |
| |
| /** |
| * panthor_heap_pool_destroy() - Destroy a heap pool. |
| * @pool: Pool to destroy. |
| * |
| * This function destroys all heap contexts and their resources. Thus |
| * preventing any use of the heap context or the chunk attached to them |
| * after that point. |
| * |
| * If the GPU still has access to some heap contexts, a fault should be |
| * triggered, which should flag the command stream groups using these |
| * context as faulty. |
| * |
| * The heap pool object is only released when all references to this pool |
| * are released. |
| */ |
| void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) |
| { |
| struct panthor_heap *heap; |
| unsigned long i; |
| |
| if (!pool) |
| return; |
| |
| down_write(&pool->lock); |
| xa_for_each(&pool->xa, i, heap) |
| drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); |
| |
| if (!IS_ERR_OR_NULL(pool->gpu_contexts)) |
| panthor_kernel_bo_destroy(pool->gpu_contexts); |
| |
| /* Reflects the fact the pool has been destroyed. */ |
| pool->vm = NULL; |
| up_write(&pool->lock); |
| |
| panthor_heap_pool_put(pool); |
| } |