drivers/gpu/drm/i915/gem/i915_gem_mman.c - linux - Git at Google

 /*
  * SPDX-License-Identifier: MIT
  *
  * Copyright © 2014-2016 Intel Corporation
  */

 #include <linux/mman.h>
 #include <linux/sizes.h>

 #include "gt/intel_gt.h"

 #include "i915_drv.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 #include "i915_trace.h"
 #include "i915_vma.h"

 static inline bool
 __vma_matches(struct vm_area_struct *vma, struct file *filp,
 	      unsigned long addr, unsigned long size)
 {
 	if (vma->vm_file != filp)
 		return false;

 	return vma->vm_start == addr &&
 	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
 }

 /**
  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
  *			 it is mapped to.
  * @dev: drm device
  * @data: ioctl data blob
  * @file: drm file
  *
  * While the mapping holds a reference on the contents of the object, it doesn't
  * imply a ref on the object itself.
  *
  * IMPORTANT:
  *
  * DRM driver writers who look a this function as an example for how to do GEM
  * mmap support, please don't implement mmap support like here. The modern way
  * to implement DRM mmap support is with an mmap offset ioctl (like
  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
  * That way debug tooling like valgrind will understand what's going on, hiding
  * the mmap call in a driver private ioctl will break that. The i915 driver only
  * does cpu mmaps this way because we didn't know better.
  */
 int
 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
 	struct drm_i915_gem_mmap *args = data;
 	struct drm_i915_gem_object *obj;
 	unsigned long addr;

 	if (args->flags & ~(I915_MMAP_WC))
 		return -EINVAL;

 	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
 		return -ENODEV;

 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;

 	/* prime objects have no backing filp to GEM mmap
 	 * pages from.
 	 */
 	if (!obj->base.filp) {
 		addr = -ENXIO;
 		goto err;
 	}

 	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
 		addr = -EINVAL;
 		goto err;
 	}

 	addr = vm_mmap(obj->base.filp, 0, args->size,
 		       PROT_READ | PROT_WRITE, MAP_SHARED,
 		       args->offset);
 	if (IS_ERR_VALUE(addr))
 		goto err;

 	if (args->flags & I915_MMAP_WC) {
 		struct mm_struct *mm = current->mm;
 		struct vm_area_struct *vma;

 		if (down_write_killable(&mm->mmap_sem)) {
 			addr = -EINTR;
 			goto err;
 		}
 		vma = find_vma(mm, addr);
 		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
 			vma->vm_page_prot =
 				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
 		else
 			addr = -ENOMEM;
 		up_write(&mm->mmap_sem);
 		if (IS_ERR_VALUE(addr))
 			goto err;
 	}
 	i915_gem_object_put(obj);

 	args->addr_ptr = (u64)addr;
 	return 0;

 err:
 	i915_gem_object_put(obj);
 	return addr;
 }

 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
 {
 	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
 }

 /**
  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
  *
  * A history of the GTT mmap interface:
  *
  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
  *     aligned and suitable for fencing, and still fit into the available
  *     mappable space left by the pinned display objects. A classic problem
  *     we called the page-fault-of-doom where we would ping-pong between
  *     two objects that could not fit inside the GTT and so the memcpy
  *     would page one object in at the expense of the other between every
  *     single byte.
  *
  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
  *     object is too large for the available space (or simply too large
  *     for the mappable aperture!), a view is created instead and faulted
  *     into userspace. (This view is aligned and sized appropriately for
  *     fenced access.)
  *
  * 2 - Recognise WC as a separate cache domain so that we can flush the
  *     delayed writes via GTT before performing direct access via WC.
  *
  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
  *     pagefault; swapin remains transparent.
  *
  * Restrictions:
  *
  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
  *    hangs on some architectures, corruption on others. An attempt to service
  *    a GTT page fault from a snoopable object will generate a SIGBUS.
  *
  *  * the object must be able to fit into RAM (physical memory, though no
  *    limited to the mappable aperture).
  *
  *
  * Caveats:
  *
  *  * a new GTT page fault will synchronize rendering from the GPU and flush
  *    all data to system memory. Subsequent access will not be synchronized.
  *
  *  * all mappings are revoked on runtime device suspend.
  *
  *  * there are only 8, 16 or 32 fence registers to share between all users
  *    (older machines require fence register for display and blitter access
  *    as well). Contention of the fence registers will cause the previous users
  *    to be unmapped and any new access will generate new page faults.
  *
  *  * running out of memory while servicing a fault may generate a SIGBUS,
  *    rather than the expected SIGSEGV.
  */
 int i915_gem_mmap_gtt_version(void)
 {
 	return 3;
 }

 static inline struct i915_ggtt_view
 compute_partial_view(const struct drm_i915_gem_object *obj,
 		     pgoff_t page_offset,
 		     unsigned int chunk)
 {
 	struct i915_ggtt_view view;

 	if (i915_gem_object_is_tiled(obj))
 		chunk = roundup(chunk, tile_row_pages(obj));

 	view.type = I915_GGTT_VIEW_PARTIAL;
 	view.partial.offset = rounddown(page_offset, chunk);
 	view.partial.size =
 		min_t(unsigned int, chunk,
 		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);

 	/* If the partial covers the entire object, just create a normal VMA. */
 	if (chunk >= obj->base.size >> PAGE_SHIFT)
 		view.type = I915_GGTT_VIEW_NORMAL;

 	return view;
 }

 /**
  * i915_gem_fault - fault a page into the GTT
  * @vmf: fault info
  *
  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
  * from userspace.  The fault handler takes care of binding the object to
  * the GTT (if needed), allocating and programming a fence register (again,
  * only if needed based on whether the old reg is still valid or the object
  * is tiled) and inserting a new PTE into the faulting process.
  *
  * Note that the faulting process may involve evicting existing objects
  * from the GTT and/or fence registers to make room.  So performance may
  * suffer if the GTT working set is large or there are few fence registers
  * left.
  *
  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
  */
 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 {
 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
 	struct vm_area_struct *area = vmf->vma;
 	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	bool write = area->vm_flags & VM_WRITE;
 	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	pgoff_t page_offset;
 	int srcu;
 	int ret;

 	/* Sanity check that we allow writing into this object */
 	if (i915_gem_object_is_readonly(obj) && write)
 		return VM_FAULT_SIGBUS;

 	/* We don't use vmf->pgoff since that has the fake offset */
 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;

 	trace_i915_gem_object_fault(obj, page_offset, true, write);

 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		goto err;

 	wakeref = intel_runtime_pm_get(rpm);

 	srcu = intel_gt_reset_trylock(ggtt->vm.gt);
 	if (srcu < 0) {
 		ret = srcu;
 		goto err_rpm;
 	}

 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		goto err_reset;

 	/* Access to snoopable pages through the GTT is incoherent. */
 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
 		ret = -EFAULT;
 		goto err_unlock;
 	}

 	/* Now pin it into the GTT as needed */
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
 				       PIN_NONBLOCK /* NOWARN */ |
 				       PIN_NOEVICT);
 	if (IS_ERR(vma)) {
 		/* Use a partial view if it is bigger than available space */
 		struct i915_ggtt_view view =
 			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
 		unsigned int flags;

 		flags = PIN_MAPPABLE | PIN_NOSEARCH;
 		if (view.type == I915_GGTT_VIEW_NORMAL)
 			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */

 		/*
 		 * Userspace is now writing through an untracked VMA, abandon
 		 * all hope that the hardware is able to track future writes.
 		 */

 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
 		if (IS_ERR(vma)) {
 			flags = PIN_MAPPABLE;
 			view.type = I915_GGTT_VIEW_PARTIAL;
 			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
 		}
 	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unlock;
 	}

 	ret = i915_vma_pin_fence(vma);
 	if (ret)
 		goto err_unpin;

 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
 			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
 			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
 			       &ggtt->iomap);
 	if (ret)
 		goto err_fence;

 	assert_rpm_wakelock_held(rpm);

 	/* Mark as being mmapped into userspace for later revocation */
 	mutex_lock(&i915->ggtt.vm.mutex);
 	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
 		list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
 	mutex_unlock(&i915->ggtt.vm.mutex);

 	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
 		intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
 				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));

 	i915_vma_set_ggtt_write(vma);

 err_fence:
 	i915_vma_unpin_fence(vma);
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_reset:
 	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
 err_rpm:
 	intel_runtime_pm_put(rpm, wakeref);
 	i915_gem_object_unpin_pages(obj);
 err:
 	switch (ret) {
 	case -EIO:
 		/*
 		 * We eat errors when the gpu is terminally wedged to avoid
 		 * userspace unduly crashing (gl has no provisions for mmaps to
 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
 		 * and so needs to be reported.
 		 */
 		if (!intel_gt_is_wedged(ggtt->vm.gt))
 			return VM_FAULT_SIGBUS;
 		/* else, fall through */
 	case -EAGAIN:
 		/*
 		 * EAGAIN means the gpu is hung and we'll wait for the error
 		 * handler to reset everything when re-faulting in
 		 * i915_mutex_lock_interruptible.
 		 */
 	case 0:
 	case -ERESTARTSYS:
 	case -EINTR:
 	case -EBUSY:
 		/*
 		 * EBUSY is ok: this just means that another thread
 		 * already did the job.
 		 */
 		return VM_FAULT_NOPAGE;
 	case -ENOMEM:
 		return VM_FAULT_OOM;
 	case -ENOSPC:
 	case -EFAULT:
 		return VM_FAULT_SIGBUS;
 	default:
 		WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
 		return VM_FAULT_SIGBUS;
 	}
 }

 void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;

 	GEM_BUG_ON(!obj->userfault_count);

 	obj->userfault_count = 0;
 	list_del(&obj->userfault_link);
 	drm_vma_node_unmap(&obj->base.vma_node,
 			   obj->base.dev->anon_inode->i_mapping);

 	for_each_ggtt_vma(vma, obj)
 		i915_vma_unset_userfault(vma);
 }

 /**
  * i915_gem_object_release_mmap - remove physical page mappings
  * @obj: obj in question
  *
  * Preserve the reservation of the mmapping with the DRM core code, but
  * relinquish ownership of the pages back to the system.
  *
  * It is vital that we remove the page mapping if we have mapped a tiled
  * object through the GTT and then lose the fence register due to
  * resource pressure. Similarly if the object has been moved out of the
  * aperture, than pages mapped into userspace must be revoked. Removing the
  * mapping will then trigger a page fault on the next user access, allowing
  * fixup by i915_gem_fault().
  */
 void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	intel_wakeref_t wakeref;

 	/* Serialisation between user GTT access and our code depends upon
 	 * revoking the CPU's PTE whilst the mutex is held. The next user
 	 * pagefault then has to wait until we release the mutex.
 	 *
 	 * Note that RPM complicates somewhat by adding an additional
 	 * requirement that operations to the GGTT be made holding the RPM
 	 * wakeref.
 	 */
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	mutex_lock(&i915->ggtt.vm.mutex);

 	if (!obj->userfault_count)
 		goto out;

 	__i915_gem_object_release_mmap(obj);

 	/* Ensure that the CPU's PTE are revoked and there are not outstanding
 	 * memory transactions from userspace before we return. The TLB
 	 * flushing implied above by changing the PTE above *should* be
 	 * sufficient, an extra barrier here just provides us with a bit
 	 * of paranoid documentation about our requirement to serialise
 	 * memory writes before touching registers / GSM.
 	 */
 	wmb();

 out:
 	mutex_unlock(&i915->ggtt.vm.mutex);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }

 static int create_mmap_offset(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	int err;

 	err = drm_gem_create_mmap_offset(&obj->base);
 	if (likely(!err))
 		return 0;

 	/* Attempt to reap some mmap space from dead objects */
 	do {
 		err = i915_gem_wait_for_idle(i915,
 					     I915_WAIT_INTERRUPTIBLE,
 					     MAX_SCHEDULE_TIMEOUT);
 		if (err)
 			break;

 		i915_gem_drain_freed_objects(i915);
 		err = drm_gem_create_mmap_offset(&obj->base);
 		if (!err)
 			break;

 	} while (flush_delayed_work(&i915->gem.retire_work));

 	return err;
 }

 int
 i915_gem_mmap_gtt(struct drm_file *file,
 		  struct drm_device *dev,
 		  u32 handle,
 		  u64 *offset)
 {
 	struct drm_i915_gem_object *obj;
 	int ret;

 	obj = i915_gem_object_lookup(file, handle);
 	if (!obj)
 		return -ENOENT;

 	ret = create_mmap_offset(obj);
 	if (ret == 0)
 		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);

 	i915_gem_object_put(obj);
 	return ret;
 }

 /**
  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
  * @dev: DRM device
  * @data: GTT mapping ioctl data
  * @file: GEM object info
  *
  * Simply returns the fake offset to userspace so it can mmap it.
  * The mmap call will end up in drm_gem_mmap(), which will set things
  * up so we can get faults in the handler above.
  *
  * The fault handler will take care of binding the object into the GTT
  * (since it may have been evicted to make room for something), allocating
  * a fence register, and mapping the appropriate aperture address into
  * userspace.
  */
 int
 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file)
 {
 	struct drm_i915_gem_mmap_gtt *args = data;

 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
 }

 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_gem_mman.c"
 #endif
	/*
	* SPDX-License-Identifier: MIT
	*
	* Copyright © 2014-2016 Intel Corporation
	*/

	#include <linux/mman.h>
	#include <linux/sizes.h>

	#include "gt/intel_gt.h"

	#include "i915_drv.h"
	#include "i915_gem_gtt.h"
	#include "i915_gem_ioctls.h"
	#include "i915_gem_object.h"
	#include "i915_trace.h"
	#include "i915_vma.h"

	static inline bool
	__vma_matches(struct vm_area_struct vma, struct file filp,
	unsigned long addr, unsigned long size)
	{
	if (vma->vm_file != filp)
	return false;

	return vma->vm_start == addr &&
	(vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
	}

	/**
	* i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
	* it is mapped to.
	* @dev: drm device
	* @data: ioctl data blob
	* @file: drm file
	*
	* While the mapping holds a reference on the contents of the object, it doesn't
	* imply a ref on the object itself.
	*
	* IMPORTANT:
	*
	* DRM driver writers who look a this function as an example for how to do GEM
	* mmap support, please don't implement mmap support like here. The modern way
	* to implement DRM mmap support is with an mmap offset ioctl (like
	* i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
	* That way debug tooling like valgrind will understand what's going on, hiding
	* the mmap call in a driver private ioctl will break that. The i915 driver only
	* does cpu mmaps this way because we didn't know better.
	*/
	int
	i915_gem_mmap_ioctl(struct drm_device dev, void data,
	struct drm_file *file)
	{
	struct drm_i915_gem_mmap *args = data;
	struct drm_i915_gem_object *obj;
	unsigned long addr;

	if (args->flags & ~(I915_MMAP_WC))
	return -EINVAL;

	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
	return -ENODEV;

	obj = i915_gem_object_lookup(file, args->handle);
	if (!obj)
	return -ENOENT;

	/* prime objects have no backing filp to GEM mmap
	* pages from.
	*/
	if (!obj->base.filp) {
	addr = -ENXIO;
	goto err;
	}

	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
	addr = -EINVAL;
	goto err;
	}

	addr = vm_mmap(obj->base.filp, 0, args->size,
	PROT_READ \| PROT_WRITE, MAP_SHARED,
	args->offset);
	if (IS_ERR_VALUE(addr))
	goto err;

	if (args->flags & I915_MMAP_WC) {
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;

	if (down_write_killable(&mm->mmap_sem)) {
	addr = -EINTR;
	goto err;
	}
	vma = find_vma(mm, addr);
	if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
	vma->vm_page_prot =
	pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
	else
	addr = -ENOMEM;
	up_write(&mm->mmap_sem);
	if (IS_ERR_VALUE(addr))
	goto err;
	}
	i915_gem_object_put(obj);

	args->addr_ptr = (u64)addr;
	return 0;

	err:
	i915_gem_object_put(obj);
	return addr;
	}

	static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
	{
	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
	}

	/**
	* i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
	*
	* A history of the GTT mmap interface:
	*
	* 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
	* aligned and suitable for fencing, and still fit into the available
	* mappable space left by the pinned display objects. A classic problem
	* we called the page-fault-of-doom where we would ping-pong between
	* two objects that could not fit inside the GTT and so the memcpy
	* would page one object in at the expense of the other between every
	* single byte.
	*
	* 1 - Objects can be any size, and have any compatible fencing (X Y, or none
	* as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
	* object is too large for the available space (or simply too large
	* for the mappable aperture!), a view is created instead and faulted
	* into userspace. (This view is aligned and sized appropriately for
	* fenced access.)
	*
	* 2 - Recognise WC as a separate cache domain so that we can flush the
	* delayed writes via GTT before performing direct access via WC.
	*
	* 3 - Remove implicit set-domain(GTT) and synchronisation on initial
	* pagefault; swapin remains transparent.
	*
	* Restrictions:
	*
	* * snoopable objects cannot be accessed via the GTT. It can cause machine
	* hangs on some architectures, corruption on others. An attempt to service
	* a GTT page fault from a snoopable object will generate a SIGBUS.
	*
	* * the object must be able to fit into RAM (physical memory, though no
	* limited to the mappable aperture).
	*
	*
	* Caveats:
	*
	* * a new GTT page fault will synchronize rendering from the GPU and flush
	* all data to system memory. Subsequent access will not be synchronized.
	*
	* * all mappings are revoked on runtime device suspend.
	*
	* * there are only 8, 16 or 32 fence registers to share between all users
	* (older machines require fence register for display and blitter access
	* as well). Contention of the fence registers will cause the previous users
	* to be unmapped and any new access will generate new page faults.
	*
	* * running out of memory while servicing a fault may generate a SIGBUS,
	* rather than the expected SIGSEGV.
	*/
	int i915_gem_mmap_gtt_version(void)
	{
	return 3;
	}

	static inline struct i915_ggtt_view
	compute_partial_view(const struct drm_i915_gem_object *obj,
	pgoff_t page_offset,
	unsigned int chunk)
	{
	struct i915_ggtt_view view;

	if (i915_gem_object_is_tiled(obj))
	chunk = roundup(chunk, tile_row_pages(obj));

	view.type = I915_GGTT_VIEW_PARTIAL;
	view.partial.offset = rounddown(page_offset, chunk);
	view.partial.size =
	min_t(unsigned int, chunk,
	(obj->base.size >> PAGE_SHIFT) - view.partial.offset);

	/* If the partial covers the entire object, just create a normal VMA. */
	if (chunk >= obj->base.size >> PAGE_SHIFT)
	view.type = I915_GGTT_VIEW_NORMAL;

	return view;
	}

	/**
	* i915_gem_fault - fault a page into the GTT
	* @vmf: fault info
	*
	* The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
	* from userspace. The fault handler takes care of binding the object to
	* the GTT (if needed), allocating and programming a fence register (again,
	* only if needed based on whether the old reg is still valid or the object
	* is tiled) and inserting a new PTE into the faulting process.
	*
	* Note that the faulting process may involve evicting existing objects
	* from the GTT and/or fence registers to make room. So performance may
	* suffer if the GTT working set is large or there are few fence registers
	* left.
	*
	* The current feature set supported by i915_gem_fault() and thus GTT mmaps
	* is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
	*/
	vm_fault_t i915_gem_fault(struct vm_fault *vmf)
	{
	#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
	struct vm_area_struct *area = vmf->vma;
	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *i915 = to_i915(dev);
	struct intel_runtime_pm *rpm = &i915->runtime_pm;
	struct i915_ggtt *ggtt = &i915->ggtt;
	bool write = area->vm_flags & VM_WRITE;
	intel_wakeref_t wakeref;
	struct i915_vma *vma;
	pgoff_t page_offset;
	int srcu;
	int ret;

	/* Sanity check that we allow writing into this object */
	if (i915_gem_object_is_readonly(obj) && write)
	return VM_FAULT_SIGBUS;

	/* We don't use vmf->pgoff since that has the fake offset */
	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;

	trace_i915_gem_object_fault(obj, page_offset, true, write);

	ret = i915_gem_object_pin_pages(obj);
	if (ret)
	goto err;

	wakeref = intel_runtime_pm_get(rpm);

	srcu = intel_gt_reset_trylock(ggtt->vm.gt);
	if (srcu < 0) {
	ret = srcu;
	goto err_rpm;
	}

	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
	goto err_reset;

	/* Access to snoopable pages through the GTT is incoherent. */
	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
	ret = -EFAULT;
	goto err_unlock;
	}

	/* Now pin it into the GTT as needed */
	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
	PIN_MAPPABLE \|
	PIN_NONBLOCK /* NOWARN */ \|
	PIN_NOEVICT);
	if (IS_ERR(vma)) {
	/* Use a partial view if it is bigger than available space */
	struct i915_ggtt_view view =
	compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
	unsigned int flags;

	flags = PIN_MAPPABLE \| PIN_NOSEARCH;
	if (view.type == I915_GGTT_VIEW_NORMAL)
	flags \|= PIN_NONBLOCK; /* avoid warnings for pinned */

	/*
	* Userspace is now writing through an untracked VMA, abandon
	* all hope that the hardware is able to track future writes.
	*/

	vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
	if (IS_ERR(vma)) {
	flags = PIN_MAPPABLE;
	view.type = I915_GGTT_VIEW_PARTIAL;
	vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
	}
	}
	if (IS_ERR(vma)) {
	ret = PTR_ERR(vma);
	goto err_unlock;
	}

	ret = i915_vma_pin_fence(vma);
	if (ret)
	goto err_unpin;

	/* Finally, remap it using the new GTT offset */
	ret = remap_io_mapping(area,
	area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
	(ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
	min_t(u64, vma->size, area->vm_end - area->vm_start),
	&ggtt->iomap);
	if (ret)
	goto err_fence;

	assert_rpm_wakelock_held(rpm);

	/* Mark as being mmapped into userspace for later revocation */
	mutex_lock(&i915->ggtt.vm.mutex);
	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
	list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
	mutex_unlock(&i915->ggtt.vm.mutex);

	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
	intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
	msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));

	i915_vma_set_ggtt_write(vma);

	err_fence:
	i915_vma_unpin_fence(vma);
	err_unpin:
	__i915_vma_unpin(vma);
	err_unlock:
	mutex_unlock(&dev->struct_mutex);
	err_reset:
	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
	err_rpm:
	intel_runtime_pm_put(rpm, wakeref);
	i915_gem_object_unpin_pages(obj);
	err:
	switch (ret) {
	case -EIO:
	/*
	* We eat errors when the gpu is terminally wedged to avoid
	* userspace unduly crashing (gl has no provisions for mmaps to
	* fail). But any other -EIO isn't ours (e.g. swap in failure)
	* and so needs to be reported.
	*/
	if (!intel_gt_is_wedged(ggtt->vm.gt))
	return VM_FAULT_SIGBUS;
	/* else, fall through */
	case -EAGAIN:
	/*
	* EAGAIN means the gpu is hung and we'll wait for the error
	* handler to reset everything when re-faulting in
	* i915_mutex_lock_interruptible.
	*/
	case 0:
	case -ERESTARTSYS:
	case -EINTR:
	case -EBUSY:
	/*
	* EBUSY is ok: this just means that another thread
	* already did the job.
	*/
	return VM_FAULT_NOPAGE;
	case -ENOMEM:
	return VM_FAULT_OOM;
	case -ENOSPC:
	case -EFAULT:
	return VM_FAULT_SIGBUS;
	default:
	WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
	return VM_FAULT_SIGBUS;
	}
	}

	void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
	{
	struct i915_vma *vma;

	GEM_BUG_ON(!obj->userfault_count);

	obj->userfault_count = 0;
	list_del(&obj->userfault_link);
	drm_vma_node_unmap(&obj->base.vma_node,
	obj->base.dev->anon_inode->i_mapping);

	for_each_ggtt_vma(vma, obj)
	i915_vma_unset_userfault(vma);
	}

	/**
	* i915_gem_object_release_mmap - remove physical page mappings
	* @obj: obj in question
	*
	* Preserve the reservation of the mmapping with the DRM core code, but
	* relinquish ownership of the pages back to the system.
	*
	* It is vital that we remove the page mapping if we have mapped a tiled
	* object through the GTT and then lose the fence register due to
	* resource pressure. Similarly if the object has been moved out of the
	* aperture, than pages mapped into userspace must be revoked. Removing the
	* mapping will then trigger a page fault on the next user access, allowing
	* fixup by i915_gem_fault().
	*/
	void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
	{
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
	intel_wakeref_t wakeref;

	/* Serialisation between user GTT access and our code depends upon
	* revoking the CPU's PTE whilst the mutex is held. The next user
	* pagefault then has to wait until we release the mutex.
	*
	* Note that RPM complicates somewhat by adding an additional
	* requirement that operations to the GGTT be made holding the RPM
	* wakeref.
	*/
	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
	mutex_lock(&i915->ggtt.vm.mutex);

	if (!obj->userfault_count)
	goto out;

	__i915_gem_object_release_mmap(obj);

	/* Ensure that the CPU's PTE are revoked and there are not outstanding
	* memory transactions from userspace before we return. The TLB
	* flushing implied above by changing the PTE above should be
	* sufficient, an extra barrier here just provides us with a bit
	* of paranoid documentation about our requirement to serialise
	* memory writes before touching registers / GSM.
	*/
	wmb();

	out:
	mutex_unlock(&i915->ggtt.vm.mutex);
	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
	}

	static int create_mmap_offset(struct drm_i915_gem_object *obj)
	{
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
	int err;

	err = drm_gem_create_mmap_offset(&obj->base);
	if (likely(!err))
	return 0;

	/* Attempt to reap some mmap space from dead objects */
	do {
	err = i915_gem_wait_for_idle(i915,
	I915_WAIT_INTERRUPTIBLE,
	MAX_SCHEDULE_TIMEOUT);
	if (err)
	break;

	i915_gem_drain_freed_objects(i915);
	err = drm_gem_create_mmap_offset(&obj->base);
	if (!err)
	break;

	} while (flush_delayed_work(&i915->gem.retire_work));

	return err;
	}

	int
	i915_gem_mmap_gtt(struct drm_file *file,
	struct drm_device *dev,
	u32 handle,
	u64 *offset)
	{
	struct drm_i915_gem_object *obj;
	int ret;

	obj = i915_gem_object_lookup(file, handle);
	if (!obj)
	return -ENOENT;

	ret = create_mmap_offset(obj);
	if (ret == 0)
	*offset = drm_vma_node_offset_addr(&obj->base.vma_node);

	i915_gem_object_put(obj);
	return ret;
	}

	/**
	* i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
	* @dev: DRM device
	* @data: GTT mapping ioctl data
	* @file: GEM object info
	*
	* Simply returns the fake offset to userspace so it can mmap it.
	* The mmap call will end up in drm_gem_mmap(), which will set things
	* up so we can get faults in the handler above.
	*
	* The fault handler will take care of binding the object into the GTT
	* (since it may have been evicted to make room for something), allocating
	* a fence register, and mapping the appropriate aperture address into
	* userspace.
	*/
	int
	i915_gem_mmap_gtt_ioctl(struct drm_device dev, void data,
	struct drm_file *file)
	{
	struct drm_i915_gem_mmap_gtt *args = data;

	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
	}

	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
	#include "selftests/i915_gem_mman.c"
	#endif