drivers/gpu/drm/panthor/panthor_heap.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0 or MIT
 /* Copyright 2023 Collabora ltd. */

 #include <linux/iosys-map.h>
 #include <linux/rwsem.h>

 #include <drm/panthor_drm.h>

 #include "panthor_device.h"
 #include "panthor_gem.h"
 #include "panthor_heap.h"
 #include "panthor_mmu.h"
 #include "panthor_regs.h"

 /*
  * The GPU heap context is an opaque structure used by the GPU to track the
  * heap allocations. The driver should only touch it to initialize it (zero all
  * fields). Because the CPU and GPU can both access this structure it is
  * required to be GPU cache line aligned.
  */
 #define HEAP_CONTEXT_SIZE	32

 /**
  * struct panthor_heap_chunk_header - Heap chunk header
  */
 struct panthor_heap_chunk_header {
 	/**
 	 * @next: Next heap chunk in the list.
 	 *
 	 * This is a GPU VA.
 	 */
 	u64 next;

 	/** @unknown: MBZ. */
 	u32 unknown[14];
 };

 /**
  * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
  */
 struct panthor_heap_chunk {
 	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
 	struct list_head node;

 	/** @bo: Buffer object backing the heap chunk. */
 	struct panthor_kernel_bo *bo;
 };

 /**
  * struct panthor_heap - Structure used to manage tiler heap contexts.
  */
 struct panthor_heap {
 	/** @chunks: List containing all heap chunks allocated so far. */
 	struct list_head chunks;

 	/** @lock: Lock protecting insertion in the chunks list. */
 	struct mutex lock;

 	/** @chunk_size: Size of each chunk. */
 	u32 chunk_size;

 	/** @max_chunks: Maximum number of chunks. */
 	u32 max_chunks;

 	/**
 	 * @target_in_flight: Number of in-flight render passes after which
 	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
 	 */
 	u32 target_in_flight;

 	/** @chunk_count: Number of heap chunks currently allocated. */
 	u32 chunk_count;
 };

 #define MAX_HEAPS_PER_POOL    128

 /**
  * struct panthor_heap_pool - Pool of heap contexts
  *
  * The pool is attached to a panthor_file and can't be shared across processes.
  */
 struct panthor_heap_pool {
 	/** @refcount: Reference count. */
 	struct kref refcount;

 	/** @ptdev: Device. */
 	struct panthor_device *ptdev;

 	/** @vm: VM this pool is bound to. */
 	struct panthor_vm *vm;

 	/** @lock: Lock protecting access to @xa. */
 	struct rw_semaphore lock;

 	/** @xa: Array storing panthor_heap objects. */
 	struct xarray xa;

 	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
 	struct panthor_kernel_bo *gpu_contexts;
 };

 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
 {
 	u32 l2_features = ptdev->gpu_info.l2_features;
 	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);

 	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
 }

 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
 {
 	return panthor_heap_ctx_stride(pool->ptdev) * id;
 }

 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
 {
 	return pool->gpu_contexts->kmap +
 	       panthor_get_heap_ctx_offset(pool, id);
 }

 static void panthor_free_heap_chunk(struct panthor_vm *vm,
 				    struct panthor_heap *heap,
 				    struct panthor_heap_chunk *chunk)
 {
 	mutex_lock(&heap->lock);
 	list_del(&chunk->node);
 	heap->chunk_count--;
 	mutex_unlock(&heap->lock);

 	panthor_kernel_bo_destroy(chunk->bo);
 	kfree(chunk);
 }

 static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
 				    struct panthor_vm *vm,
 				    struct panthor_heap *heap,
 				    bool initial_chunk)
 {
 	struct panthor_heap_chunk *chunk;
 	struct panthor_heap_chunk_header *hdr;
 	int ret;

 	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
 	if (!chunk)
 		return -ENOMEM;

 	chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
 					     DRM_PANTHOR_BO_NO_MMAP,
 					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
 					     PANTHOR_VM_KERNEL_AUTO_VA);
 	if (IS_ERR(chunk->bo)) {
 		ret = PTR_ERR(chunk->bo);
 		goto err_free_chunk;
 	}

 	ret = panthor_kernel_bo_vmap(chunk->bo);
 	if (ret)
 		goto err_destroy_bo;

 	hdr = chunk->bo->kmap;
 	memset(hdr, 0, sizeof(*hdr));

 	if (initial_chunk && !list_empty(&heap->chunks)) {
 		struct panthor_heap_chunk *prev_chunk;
 		u64 prev_gpuva;

 		prev_chunk = list_first_entry(&heap->chunks,
 					      struct panthor_heap_chunk,
 					      node);

 		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
 		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
 			    (heap->chunk_size >> 12);
 	}

 	panthor_kernel_bo_vunmap(chunk->bo);

 	mutex_lock(&heap->lock);
 	list_add(&chunk->node, &heap->chunks);
 	heap->chunk_count++;
 	mutex_unlock(&heap->lock);

 	return 0;

 err_destroy_bo:
 	panthor_kernel_bo_destroy(chunk->bo);

 err_free_chunk:
 	kfree(chunk);

 	return ret;
 }

 static void panthor_free_heap_chunks(struct panthor_vm *vm,
 				     struct panthor_heap *heap)
 {
 	struct panthor_heap_chunk *chunk, *tmp;

 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
 		panthor_free_heap_chunk(vm, heap, chunk);
 }

 static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
 				     struct panthor_vm *vm,
 				     struct panthor_heap *heap,
 				     u32 chunk_count)
 {
 	int ret;
 	u32 i;

 	for (i = 0; i < chunk_count; i++) {
 		ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
 		if (ret)
 			return ret;
 	}

 	return 0;
 }

 static int
 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
 {
 	struct panthor_heap *heap;

 	heap = xa_erase(&pool->xa, handle);
 	if (!heap)
 		return -EINVAL;

 	panthor_free_heap_chunks(pool->vm, heap);
 	mutex_destroy(&heap->lock);
 	kfree(heap);
 	return 0;
 }

 /**
  * panthor_heap_destroy() - Destroy a heap context
  * @pool: Pool this context belongs to.
  * @handle: Handle returned by panthor_heap_create().
  */
 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
 {
 	int ret;

 	down_write(&pool->lock);
 	ret = panthor_heap_destroy_locked(pool, handle);
 	up_write(&pool->lock);

 	return ret;
 }

 /**
  * panthor_heap_create() - Create a heap context
  * @pool: Pool to instantiate the heap context from.
  * @initial_chunk_count: Number of chunk allocated at initialization time.
  * Must be at least 1.
  * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
  * [128k:8M] range.
  * @max_chunks: Maximum number of chunks that can be allocated.
  * @target_in_flight: Maximum number of in-flight render passes.
  * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
  * context.
  * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
  * assigned to the heap context.
  *
  * Return: a positive handle on success, a negative error otherwise.
  */
 int panthor_heap_create(struct panthor_heap_pool *pool,
 			u32 initial_chunk_count,
 			u32 chunk_size,
 			u32 max_chunks,
 			u32 target_in_flight,
 			u64 *heap_ctx_gpu_va,
 			u64 *first_chunk_gpu_va)
 {
 	struct panthor_heap *heap;
 	struct panthor_heap_chunk *first_chunk;
 	struct panthor_vm *vm;
 	int ret = 0;
 	u32 id;

 	if (initial_chunk_count == 0)
 		return -EINVAL;

 	if (initial_chunk_count > max_chunks)
 		return -EINVAL;

 	if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
 	    chunk_size < SZ_128K || chunk_size > SZ_8M)
 		return -EINVAL;

 	down_read(&pool->lock);
 	vm = panthor_vm_get(pool->vm);
 	up_read(&pool->lock);

 	/* The pool has been destroyed, we can't create a new heap. */
 	if (!vm)
 		return -EINVAL;

 	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
 	if (!heap) {
 		ret = -ENOMEM;
 		goto err_put_vm;
 	}

 	mutex_init(&heap->lock);
 	INIT_LIST_HEAD(&heap->chunks);
 	heap->chunk_size = chunk_size;
 	heap->max_chunks = max_chunks;
 	heap->target_in_flight = target_in_flight;

 	ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
 					initial_chunk_count);
 	if (ret)
 		goto err_free_heap;

 	first_chunk = list_first_entry(&heap->chunks,
 				       struct panthor_heap_chunk,
 				       node);
 	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);

 	down_write(&pool->lock);
 	/* The pool has been destroyed, we can't create a new heap. */
 	if (!pool->vm) {
 		ret = -EINVAL;
 	} else {
 		ret = xa_alloc(&pool->xa, &id, heap,
 			       XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
 		if (!ret) {
 			void *gpu_ctx = panthor_get_heap_ctx(pool, id);

 			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
 			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
 					   panthor_get_heap_ctx_offset(pool, id);
 		}
 	}
 	up_write(&pool->lock);

 	if (ret)
 		goto err_free_heap;

 	panthor_vm_put(vm);
 	return id;

 err_free_heap:
 	panthor_free_heap_chunks(pool->vm, heap);
 	mutex_destroy(&heap->lock);
 	kfree(heap);

 err_put_vm:
 	panthor_vm_put(vm);
 	return ret;
 }

 /**
  * panthor_heap_return_chunk() - Return an unused heap chunk
  * @pool: The pool this heap belongs to.
  * @heap_gpu_va: The GPU address of the heap context.
  * @chunk_gpu_va: The chunk VA to return.
  *
  * This function is used when a chunk allocated with panthor_heap_grow()
  * couldn't be linked to the heap context through the FW interface because
  * the group requesting the allocation was scheduled out in the meantime.
  */
 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
 			      u64 heap_gpu_va,
 			      u64 chunk_gpu_va)
 {
 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
 	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
 	struct panthor_heap *heap;
 	int ret;

 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
 		return -EINVAL;

 	down_read(&pool->lock);
 	heap = xa_load(&pool->xa, heap_id);
 	if (!heap) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}

 	chunk_gpu_va &= GENMASK_ULL(63, 12);

 	mutex_lock(&heap->lock);
 	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
 		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
 			removed = chunk;
 			list_del(&chunk->node);
 			heap->chunk_count--;
 			break;
 		}
 	}
 	mutex_unlock(&heap->lock);

 	if (removed) {
 		panthor_kernel_bo_destroy(chunk->bo);
 		kfree(chunk);
 		ret = 0;
 	} else {
 		ret = -EINVAL;
 	}

 out_unlock:
 	up_read(&pool->lock);
 	return ret;
 }

 /**
  * panthor_heap_grow() - Make a heap context grow.
  * @pool: The pool this heap belongs to.
  * @heap_gpu_va: The GPU address of the heap context.
  * @renderpasses_in_flight: Number of render passes currently in-flight.
  * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
  * @new_chunk_gpu_va: Pointer used to return the chunk VA.
  *
  * Return:
  * - 0 if a new heap was allocated
  * - -ENOMEM if the tiler context reached the maximum number of chunks
  *   or if too many render passes are in-flight
  *   or if the allocation failed
  * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
  */
 int panthor_heap_grow(struct panthor_heap_pool *pool,
 		      u64 heap_gpu_va,
 		      u32 renderpasses_in_flight,
 		      u32 pending_frag_count,
 		      u64 *new_chunk_gpu_va)
 {
 	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
 	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
 	struct panthor_heap_chunk *chunk;
 	struct panthor_heap *heap;
 	int ret;

 	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
 		return -EINVAL;

 	down_read(&pool->lock);
 	heap = xa_load(&pool->xa, heap_id);
 	if (!heap) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}

 	/* If we reached the target in-flight render passes, or if we
 	 * reached the maximum number of chunks, let the FW figure another way to
 	 * find some memory (wait for render passes to finish, or call the exception
 	 * handler provided by the userspace driver, if any).
 	 */
 	if (renderpasses_in_flight > heap->target_in_flight ||
 	    heap->chunk_count >= heap->max_chunks) {
 		ret = -ENOMEM;
 		goto out_unlock;
 	}

 	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
 	 * which goes through the blocking allocation path. Ultimately, we
 	 * want a non-blocking allocation, so we can immediately report to the
 	 * FW when the system is running out of memory. In that case, the FW
 	 * can call a user-provided exception handler, which might try to free
 	 * some tiler memory by issuing an intermediate fragment job. If the
 	 * exception handler can't do anything, it will flag the queue as
 	 * faulty so the job that triggered this tiler chunk allocation and all
 	 * further jobs in this queue fail immediately instead of having to
 	 * wait for the job timeout.
 	 */
 	ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
 	if (ret)
 		goto out_unlock;

 	chunk = list_first_entry(&heap->chunks,
 				 struct panthor_heap_chunk,
 				 node);
 	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
 			    (heap->chunk_size >> 12);
 	ret = 0;

 out_unlock:
 	up_read(&pool->lock);
 	return ret;
 }

 static void panthor_heap_pool_release(struct kref *refcount)
 {
 	struct panthor_heap_pool *pool =
 		container_of(refcount, struct panthor_heap_pool, refcount);

 	xa_destroy(&pool->xa);
 	kfree(pool);
 }

 /**
  * panthor_heap_pool_put() - Release a heap pool reference
  * @pool: Pool to release the reference on. Can be NULL.
  */
 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
 {
 	if (pool)
 		kref_put(&pool->refcount, panthor_heap_pool_release);
 }

 /**
  * panthor_heap_pool_get() - Get a heap pool reference
  * @pool: Pool to get the reference on. Can be NULL.
  *
  * Return: @pool.
  */
 struct panthor_heap_pool *
 panthor_heap_pool_get(struct panthor_heap_pool *pool)
 {
 	if (pool)
 		kref_get(&pool->refcount);

 	return pool;
 }

 /**
  * panthor_heap_pool_create() - Create a heap pool
  * @ptdev: Device.
  * @vm: The VM this heap pool will be attached to.
  *
  * Heap pools might contain up to 128 heap contexts, and are per-VM.
  *
  * Return: A valid pointer on success, a negative error code otherwise.
  */
 struct panthor_heap_pool *
 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
 {
 	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
 			      panthor_heap_ctx_stride(ptdev),
 			      4096);
 	struct panthor_heap_pool *pool;
 	int ret = 0;

 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 	if (!pool)
 		return ERR_PTR(-ENOMEM);

 	/* We want a weak ref here: the heap pool belongs to the VM, so we're
 	 * sure that, as long as the heap pool exists, the VM exists too.
 	 */
 	pool->vm = vm;
 	pool->ptdev = ptdev;
 	init_rwsem(&pool->lock);
 	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
 	kref_init(&pool->refcount);

 	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
 						      DRM_PANTHOR_BO_NO_MMAP,
 						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
 						      PANTHOR_VM_KERNEL_AUTO_VA);
 	if (IS_ERR(pool->gpu_contexts)) {
 		ret = PTR_ERR(pool->gpu_contexts);
 		goto err_destroy_pool;
 	}

 	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
 	if (ret)
 		goto err_destroy_pool;

 	return pool;

 err_destroy_pool:
 	panthor_heap_pool_destroy(pool);
 	return ERR_PTR(ret);
 }

 /**
  * panthor_heap_pool_destroy() - Destroy a heap pool.
  * @pool: Pool to destroy.
  *
  * This function destroys all heap contexts and their resources. Thus
  * preventing any use of the heap context or the chunk attached to them
  * after that point.
  *
  * If the GPU still has access to some heap contexts, a fault should be
  * triggered, which should flag the command stream groups using these
  * context as faulty.
  *
  * The heap pool object is only released when all references to this pool
  * are released.
  */
 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
 {
 	struct panthor_heap *heap;
 	unsigned long i;

 	if (!pool)
 		return;

 	down_write(&pool->lock);
 	xa_for_each(&pool->xa, i, heap)
 		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));

 	if (!IS_ERR_OR_NULL(pool->gpu_contexts))
 		panthor_kernel_bo_destroy(pool->gpu_contexts);

 	/* Reflects the fact the pool has been destroyed. */
 	pool->vm = NULL;
 	up_write(&pool->lock);

 	panthor_heap_pool_put(pool);
 }
	// SPDX-License-Identifier: GPL-2.0 or MIT
	/* Copyright 2023 Collabora ltd. */

	#include <linux/iosys-map.h>
	#include <linux/rwsem.h>

	#include <drm/panthor_drm.h>

	#include "panthor_device.h"
	#include "panthor_gem.h"
	#include "panthor_heap.h"
	#include "panthor_mmu.h"
	#include "panthor_regs.h"

	/*
	* The GPU heap context is an opaque structure used by the GPU to track the
	* heap allocations. The driver should only touch it to initialize it (zero all
	* fields). Because the CPU and GPU can both access this structure it is
	* required to be GPU cache line aligned.
	*/
	#define HEAP_CONTEXT_SIZE 32

	/**
	* struct panthor_heap_chunk_header - Heap chunk header
	*/
	struct panthor_heap_chunk_header {
	/**
	* @next: Next heap chunk in the list.
	*
	* This is a GPU VA.
	*/
	u64 next;

	/** @unknown: MBZ. */
	u32 unknown[14];
	};

	/**
	* struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
	*/
	struct panthor_heap_chunk {
	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
	struct list_head node;

	/** @bo: Buffer object backing the heap chunk. */
	struct panthor_kernel_bo *bo;
	};

	/**
	* struct panthor_heap - Structure used to manage tiler heap contexts.
	*/
	struct panthor_heap {
	/** @chunks: List containing all heap chunks allocated so far. */
	struct list_head chunks;

	/** @lock: Lock protecting insertion in the chunks list. */
	struct mutex lock;

	/** @chunk_size: Size of each chunk. */
	u32 chunk_size;

	/** @max_chunks: Maximum number of chunks. */
	u32 max_chunks;

	/**
	* @target_in_flight: Number of in-flight render passes after which
	* we'd let the FW wait for fragment job to finish instead of allocating new chunks.
	*/
	u32 target_in_flight;

	/** @chunk_count: Number of heap chunks currently allocated. */
	u32 chunk_count;
	};

	#define MAX_HEAPS_PER_POOL 128

	/**
	* struct panthor_heap_pool - Pool of heap contexts
	*
	* The pool is attached to a panthor_file and can't be shared across processes.
	*/
	struct panthor_heap_pool {
	/** @refcount: Reference count. */
	struct kref refcount;

	/** @ptdev: Device. */
	struct panthor_device *ptdev;

	/** @vm: VM this pool is bound to. */
	struct panthor_vm *vm;

	/** @lock: Lock protecting access to @xa. */
	struct rw_semaphore lock;

	/** @xa: Array storing panthor_heap objects. */
	struct xarray xa;

	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
	struct panthor_kernel_bo *gpu_contexts;
	};

	static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
	{
	u32 l2_features = ptdev->gpu_info.l2_features;
	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);

	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
	}

	static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
	{
	return panthor_heap_ctx_stride(pool->ptdev) * id;
	}

	static void panthor_get_heap_ctx(struct panthor_heap_pool pool, int id)
	{
	return pool->gpu_contexts->kmap +
	panthor_get_heap_ctx_offset(pool, id);
	}

	static void panthor_free_heap_chunk(struct panthor_vm *vm,
	struct panthor_heap *heap,
	struct panthor_heap_chunk *chunk)
	{
	mutex_lock(&heap->lock);
	list_del(&chunk->node);
	heap->chunk_count--;
	mutex_unlock(&heap->lock);

	panthor_kernel_bo_destroy(chunk->bo);
	kfree(chunk);
	}

	static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
	struct panthor_vm *vm,
	struct panthor_heap *heap,
	bool initial_chunk)
	{
	struct panthor_heap_chunk *chunk;
	struct panthor_heap_chunk_header *hdr;
	int ret;

	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
	if (!chunk)
	return -ENOMEM;

	chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
	DRM_PANTHOR_BO_NO_MMAP,
	DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
	PANTHOR_VM_KERNEL_AUTO_VA);
	if (IS_ERR(chunk->bo)) {
	ret = PTR_ERR(chunk->bo);
	goto err_free_chunk;
	}

	ret = panthor_kernel_bo_vmap(chunk->bo);
	if (ret)
	goto err_destroy_bo;

	hdr = chunk->bo->kmap;
	memset(hdr, 0, sizeof(*hdr));

	if (initial_chunk && !list_empty(&heap->chunks)) {
	struct panthor_heap_chunk *prev_chunk;
	u64 prev_gpuva;

	prev_chunk = list_first_entry(&heap->chunks,
	struct panthor_heap_chunk,
	node);

	prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
	hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) \|
	(heap->chunk_size >> 12);
	}

	panthor_kernel_bo_vunmap(chunk->bo);

	mutex_lock(&heap->lock);
	list_add(&chunk->node, &heap->chunks);
	heap->chunk_count++;
	mutex_unlock(&heap->lock);

	return 0;

	err_destroy_bo:
	panthor_kernel_bo_destroy(chunk->bo);

	err_free_chunk:
	kfree(chunk);

	return ret;
	}

	static void panthor_free_heap_chunks(struct panthor_vm *vm,
	struct panthor_heap *heap)
	{
	struct panthor_heap_chunk chunk, tmp;

	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
	panthor_free_heap_chunk(vm, heap, chunk);
	}

	static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
	struct panthor_vm *vm,
	struct panthor_heap *heap,
	u32 chunk_count)
	{
	int ret;
	u32 i;

	for (i = 0; i < chunk_count; i++) {
	ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
	if (ret)
	return ret;
	}

	return 0;
	}

	static int
	panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
	{
	struct panthor_heap *heap;

	heap = xa_erase(&pool->xa, handle);
	if (!heap)
	return -EINVAL;

	panthor_free_heap_chunks(pool->vm, heap);
	mutex_destroy(&heap->lock);
	kfree(heap);
	return 0;
	}

	/**
	* panthor_heap_destroy() - Destroy a heap context
	* @pool: Pool this context belongs to.
	* @handle: Handle returned by panthor_heap_create().
	*/
	int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
	{
	int ret;

	down_write(&pool->lock);
	ret = panthor_heap_destroy_locked(pool, handle);
	up_write(&pool->lock);

	return ret;
	}

	/**
	* panthor_heap_create() - Create a heap context
	* @pool: Pool to instantiate the heap context from.
	* @initial_chunk_count: Number of chunk allocated at initialization time.
	* Must be at least 1.
	* @chunk_size: The size of each chunk. Must be page-aligned and lie in the
	* [128k:8M] range.
	* @max_chunks: Maximum number of chunks that can be allocated.
	* @target_in_flight: Maximum number of in-flight render passes.
	* @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
	* context.
	* @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
	* assigned to the heap context.
	*
	* Return: a positive handle on success, a negative error otherwise.
	*/
	int panthor_heap_create(struct panthor_heap_pool *pool,
	u32 initial_chunk_count,
	u32 chunk_size,
	u32 max_chunks,
	u32 target_in_flight,
	u64 *heap_ctx_gpu_va,
	u64 *first_chunk_gpu_va)
	{
	struct panthor_heap *heap;
	struct panthor_heap_chunk *first_chunk;
	struct panthor_vm *vm;
	int ret = 0;
	u32 id;

	if (initial_chunk_count == 0)
	return -EINVAL;

	if (initial_chunk_count > max_chunks)
	return -EINVAL;

	if (!IS_ALIGNED(chunk_size, PAGE_SIZE) \|\|
	chunk_size < SZ_128K \|\| chunk_size > SZ_8M)
	return -EINVAL;

	down_read(&pool->lock);
	vm = panthor_vm_get(pool->vm);
	up_read(&pool->lock);

	/* The pool has been destroyed, we can't create a new heap. */
	if (!vm)
	return -EINVAL;

	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
	if (!heap) {
	ret = -ENOMEM;
	goto err_put_vm;
	}

	mutex_init(&heap->lock);
	INIT_LIST_HEAD(&heap->chunks);
	heap->chunk_size = chunk_size;
	heap->max_chunks = max_chunks;
	heap->target_in_flight = target_in_flight;

	ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
	initial_chunk_count);
	if (ret)
	goto err_free_heap;

	first_chunk = list_first_entry(&heap->chunks,
	struct panthor_heap_chunk,
	node);
	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);

	down_write(&pool->lock);
	/* The pool has been destroyed, we can't create a new heap. */
	if (!pool->vm) {
	ret = -EINVAL;
	} else {
	ret = xa_alloc(&pool->xa, &id, heap,
	XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
	if (!ret) {
	void *gpu_ctx = panthor_get_heap_ctx(pool, id);

	memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
	*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
	panthor_get_heap_ctx_offset(pool, id);
	}
	}
	up_write(&pool->lock);

	if (ret)
	goto err_free_heap;

	panthor_vm_put(vm);
	return id;

	err_free_heap:
	panthor_free_heap_chunks(pool->vm, heap);
	mutex_destroy(&heap->lock);
	kfree(heap);

	err_put_vm:
	panthor_vm_put(vm);
	return ret;
	}

	/**
	* panthor_heap_return_chunk() - Return an unused heap chunk
	* @pool: The pool this heap belongs to.
	* @heap_gpu_va: The GPU address of the heap context.
	* @chunk_gpu_va: The chunk VA to return.
	*
	* This function is used when a chunk allocated with panthor_heap_grow()
	* couldn't be linked to the heap context through the FW interface because
	* the group requesting the allocation was scheduled out in the meantime.
	*/
	int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
	u64 heap_gpu_va,
	u64 chunk_gpu_va)
	{
	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
	struct panthor_heap_chunk chunk, tmp, *removed = NULL;
	struct panthor_heap *heap;
	int ret;

	if (offset > U32_MAX \|\| heap_id >= MAX_HEAPS_PER_POOL)
	return -EINVAL;

	down_read(&pool->lock);
	heap = xa_load(&pool->xa, heap_id);
	if (!heap) {
	ret = -EINVAL;
	goto out_unlock;
	}

	chunk_gpu_va &= GENMASK_ULL(63, 12);

	mutex_lock(&heap->lock);
	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
	if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
	removed = chunk;
	list_del(&chunk->node);
	heap->chunk_count--;
	break;
	}
	}
	mutex_unlock(&heap->lock);

	if (removed) {
	panthor_kernel_bo_destroy(chunk->bo);
	kfree(chunk);
	ret = 0;
	} else {
	ret = -EINVAL;
	}

	out_unlock:
	up_read(&pool->lock);
	return ret;
	}

	/**
	* panthor_heap_grow() - Make a heap context grow.
	* @pool: The pool this heap belongs to.
	* @heap_gpu_va: The GPU address of the heap context.
	* @renderpasses_in_flight: Number of render passes currently in-flight.
	* @pending_frag_count: Number of fragment jobs waiting for execution/completion.
	* @new_chunk_gpu_va: Pointer used to return the chunk VA.
	*
	* Return:
	* - 0 if a new heap was allocated
	* - -ENOMEM if the tiler context reached the maximum number of chunks
	* or if too many render passes are in-flight
	* or if the allocation failed
	* - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
	*/
	int panthor_heap_grow(struct panthor_heap_pool *pool,
	u64 heap_gpu_va,
	u32 renderpasses_in_flight,
	u32 pending_frag_count,
	u64 *new_chunk_gpu_va)
	{
	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
	struct panthor_heap_chunk *chunk;
	struct panthor_heap *heap;
	int ret;

	if (offset > U32_MAX \|\| heap_id >= MAX_HEAPS_PER_POOL)
	return -EINVAL;

	down_read(&pool->lock);
	heap = xa_load(&pool->xa, heap_id);
	if (!heap) {
	ret = -EINVAL;
	goto out_unlock;
	}

	/* If we reached the target in-flight render passes, or if we
	* reached the maximum number of chunks, let the FW figure another way to
	* find some memory (wait for render passes to finish, or call the exception
	* handler provided by the userspace driver, if any).
	*/
	if (renderpasses_in_flight > heap->target_in_flight \|\|
	heap->chunk_count >= heap->max_chunks) {
	ret = -ENOMEM;
	goto out_unlock;
	}

	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
	* which goes through the blocking allocation path. Ultimately, we
	* want a non-blocking allocation, so we can immediately report to the
	* FW when the system is running out of memory. In that case, the FW
	* can call a user-provided exception handler, which might try to free
	* some tiler memory by issuing an intermediate fragment job. If the
	* exception handler can't do anything, it will flag the queue as
	* faulty so the job that triggered this tiler chunk allocation and all
	* further jobs in this queue fail immediately instead of having to
	* wait for the job timeout.
	*/
	ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
	if (ret)
	goto out_unlock;

	chunk = list_first_entry(&heap->chunks,
	struct panthor_heap_chunk,
	node);
	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) \|
	(heap->chunk_size >> 12);
	ret = 0;

	out_unlock:
	up_read(&pool->lock);
	return ret;
	}

	static void panthor_heap_pool_release(struct kref *refcount)
	{
	struct panthor_heap_pool *pool =
	container_of(refcount, struct panthor_heap_pool, refcount);

	xa_destroy(&pool->xa);
	kfree(pool);
	}

	/**
	* panthor_heap_pool_put() - Release a heap pool reference
	* @pool: Pool to release the reference on. Can be NULL.
	*/
	void panthor_heap_pool_put(struct panthor_heap_pool *pool)
	{
	if (pool)
	kref_put(&pool->refcount, panthor_heap_pool_release);
	}

	/**
	* panthor_heap_pool_get() - Get a heap pool reference
	* @pool: Pool to get the reference on. Can be NULL.
	*
	* Return: @pool.
	*/
	struct panthor_heap_pool *
	panthor_heap_pool_get(struct panthor_heap_pool *pool)
	{
	if (pool)
	kref_get(&pool->refcount);

	return pool;
	}

	/**
	* panthor_heap_pool_create() - Create a heap pool
	* @ptdev: Device.
	* @vm: The VM this heap pool will be attached to.
	*
	* Heap pools might contain up to 128 heap contexts, and are per-VM.
	*
	* Return: A valid pointer on success, a negative error code otherwise.
	*/
	struct panthor_heap_pool *
	panthor_heap_pool_create(struct panthor_device ptdev, struct panthor_vm vm)
	{
	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
	panthor_heap_ctx_stride(ptdev),
	4096);
	struct panthor_heap_pool *pool;
	int ret = 0;

	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
	if (!pool)
	return ERR_PTR(-ENOMEM);

	/* We want a weak ref here: the heap pool belongs to the VM, so we're
	* sure that, as long as the heap pool exists, the VM exists too.
	*/
	pool->vm = vm;
	pool->ptdev = ptdev;
	init_rwsem(&pool->lock);
	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
	kref_init(&pool->refcount);

	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
	DRM_PANTHOR_BO_NO_MMAP,
	DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
	PANTHOR_VM_KERNEL_AUTO_VA);
	if (IS_ERR(pool->gpu_contexts)) {
	ret = PTR_ERR(pool->gpu_contexts);
	goto err_destroy_pool;
	}

	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
	if (ret)
	goto err_destroy_pool;

	return pool;

	err_destroy_pool:
	panthor_heap_pool_destroy(pool);
	return ERR_PTR(ret);
	}

	/**
	* panthor_heap_pool_destroy() - Destroy a heap pool.
	* @pool: Pool to destroy.
	*
	* This function destroys all heap contexts and their resources. Thus
	* preventing any use of the heap context or the chunk attached to them
	* after that point.
	*
	* If the GPU still has access to some heap contexts, a fault should be
	* triggered, which should flag the command stream groups using these
	* context as faulty.
	*
	* The heap pool object is only released when all references to this pool
	* are released.
	*/
	void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
	{
	struct panthor_heap *heap;
	unsigned long i;

	if (!pool)
	return;

	down_write(&pool->lock);
	xa_for_each(&pool->xa, i, heap)
	drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));

	if (!IS_ERR_OR_NULL(pool->gpu_contexts))
	panthor_kernel_bo_destroy(pool->gpu_contexts);

	/* Reflects the fact the pool has been destroyed. */
	pool->vm = NULL;
	up_write(&pool->lock);

	panthor_heap_pool_put(pool);
	}