| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
| * |
| * VFIO container (/dev/vfio/vfio) |
| */ |
| #include <linux/file.h> |
| #include <linux/slab.h> |
| #include <linux/fs.h> |
| #include <linux/capability.h> |
| #include <linux/iommu.h> |
| #include <linux/miscdevice.h> |
| #include <linux/vfio.h> |
| #include <uapi/linux/vfio.h> |
| |
| #include "vfio.h" |
| |
| struct vfio_container { |
| struct kref kref; |
| struct list_head group_list; |
| struct rw_semaphore group_lock; |
| struct vfio_iommu_driver *iommu_driver; |
| void *iommu_data; |
| bool noiommu; |
| }; |
| |
| static struct vfio { |
| struct list_head iommu_drivers_list; |
| struct mutex iommu_drivers_lock; |
| } vfio; |
| |
| static void *vfio_noiommu_open(unsigned long arg) |
| { |
| if (arg != VFIO_NOIOMMU_IOMMU) |
| return ERR_PTR(-EINVAL); |
| if (!capable(CAP_SYS_RAWIO)) |
| return ERR_PTR(-EPERM); |
| |
| return NULL; |
| } |
| |
| static void vfio_noiommu_release(void *iommu_data) |
| { |
| } |
| |
| static long vfio_noiommu_ioctl(void *iommu_data, |
| unsigned int cmd, unsigned long arg) |
| { |
| if (cmd == VFIO_CHECK_EXTENSION) |
| return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0; |
| |
| return -ENOTTY; |
| } |
| |
| static int vfio_noiommu_attach_group(void *iommu_data, |
| struct iommu_group *iommu_group, enum vfio_group_type type) |
| { |
| return 0; |
| } |
| |
| static void vfio_noiommu_detach_group(void *iommu_data, |
| struct iommu_group *iommu_group) |
| { |
| } |
| |
| static const struct vfio_iommu_driver_ops vfio_noiommu_ops = { |
| .name = "vfio-noiommu", |
| .owner = THIS_MODULE, |
| .open = vfio_noiommu_open, |
| .release = vfio_noiommu_release, |
| .ioctl = vfio_noiommu_ioctl, |
| .attach_group = vfio_noiommu_attach_group, |
| .detach_group = vfio_noiommu_detach_group, |
| }; |
| |
| /* |
| * Only noiommu containers can use vfio-noiommu and noiommu containers can only |
| * use vfio-noiommu. |
| */ |
| static bool vfio_iommu_driver_allowed(struct vfio_container *container, |
| const struct vfio_iommu_driver *driver) |
| { |
| if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU)) |
| return true; |
| return container->noiommu == (driver->ops == &vfio_noiommu_ops); |
| } |
| |
| /* |
| * IOMMU driver registration |
| */ |
| int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
| { |
| struct vfio_iommu_driver *driver, *tmp; |
| |
| if (WARN_ON(!ops->register_device != !ops->unregister_device)) |
| return -EINVAL; |
| |
| driver = kzalloc(sizeof(*driver), GFP_KERNEL); |
| if (!driver) |
| return -ENOMEM; |
| |
| driver->ops = ops; |
| |
| mutex_lock(&vfio.iommu_drivers_lock); |
| |
| /* Check for duplicates */ |
| list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) { |
| if (tmp->ops == ops) { |
| mutex_unlock(&vfio.iommu_drivers_lock); |
| kfree(driver); |
| return -EINVAL; |
| } |
| } |
| |
| list_add(&driver->vfio_next, &vfio.iommu_drivers_list); |
| |
| mutex_unlock(&vfio.iommu_drivers_lock); |
| |
| return 0; |
| } |
| EXPORT_SYMBOL_GPL(vfio_register_iommu_driver); |
| |
| void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops) |
| { |
| struct vfio_iommu_driver *driver; |
| |
| mutex_lock(&vfio.iommu_drivers_lock); |
| list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { |
| if (driver->ops == ops) { |
| list_del(&driver->vfio_next); |
| mutex_unlock(&vfio.iommu_drivers_lock); |
| kfree(driver); |
| return; |
| } |
| } |
| mutex_unlock(&vfio.iommu_drivers_lock); |
| } |
| EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver); |
| |
| /* |
| * Container objects - containers are created when /dev/vfio/vfio is |
| * opened, but their lifecycle extends until the last user is done, so |
| * it's freed via kref. Must support container/group/device being |
| * closed in any order. |
| */ |
| static void vfio_container_release(struct kref *kref) |
| { |
| struct vfio_container *container; |
| container = container_of(kref, struct vfio_container, kref); |
| |
| kfree(container); |
| } |
| |
| static void vfio_container_get(struct vfio_container *container) |
| { |
| kref_get(&container->kref); |
| } |
| |
| static void vfio_container_put(struct vfio_container *container) |
| { |
| kref_put(&container->kref, vfio_container_release); |
| } |
| |
| void vfio_device_container_register(struct vfio_device *device) |
| { |
| struct vfio_iommu_driver *iommu_driver = |
| device->group->container->iommu_driver; |
| |
| if (iommu_driver && iommu_driver->ops->register_device) |
| iommu_driver->ops->register_device( |
| device->group->container->iommu_data, device); |
| } |
| |
| void vfio_device_container_unregister(struct vfio_device *device) |
| { |
| struct vfio_iommu_driver *iommu_driver = |
| device->group->container->iommu_driver; |
| |
| if (iommu_driver && iommu_driver->ops->unregister_device) |
| iommu_driver->ops->unregister_device( |
| device->group->container->iommu_data, device); |
| } |
| |
| static long |
| vfio_container_ioctl_check_extension(struct vfio_container *container, |
| unsigned long arg) |
| { |
| struct vfio_iommu_driver *driver; |
| long ret = 0; |
| |
| down_read(&container->group_lock); |
| |
| driver = container->iommu_driver; |
| |
| switch (arg) { |
| /* No base extensions yet */ |
| default: |
| /* |
| * If no driver is set, poll all registered drivers for |
| * extensions and return the first positive result. If |
| * a driver is already set, further queries will be passed |
| * only to that driver. |
| */ |
| if (!driver) { |
| mutex_lock(&vfio.iommu_drivers_lock); |
| list_for_each_entry(driver, &vfio.iommu_drivers_list, |
| vfio_next) { |
| |
| if (!list_empty(&container->group_list) && |
| !vfio_iommu_driver_allowed(container, |
| driver)) |
| continue; |
| if (!try_module_get(driver->ops->owner)) |
| continue; |
| |
| ret = driver->ops->ioctl(NULL, |
| VFIO_CHECK_EXTENSION, |
| arg); |
| module_put(driver->ops->owner); |
| if (ret > 0) |
| break; |
| } |
| mutex_unlock(&vfio.iommu_drivers_lock); |
| } else |
| ret = driver->ops->ioctl(container->iommu_data, |
| VFIO_CHECK_EXTENSION, arg); |
| } |
| |
| up_read(&container->group_lock); |
| |
| return ret; |
| } |
| |
| /* hold write lock on container->group_lock */ |
| static int __vfio_container_attach_groups(struct vfio_container *container, |
| struct vfio_iommu_driver *driver, |
| void *data) |
| { |
| struct vfio_group *group; |
| int ret = -ENODEV; |
| |
| list_for_each_entry(group, &container->group_list, container_next) { |
| ret = driver->ops->attach_group(data, group->iommu_group, |
| group->type); |
| if (ret) |
| goto unwind; |
| } |
| |
| return ret; |
| |
| unwind: |
| list_for_each_entry_continue_reverse(group, &container->group_list, |
| container_next) { |
| driver->ops->detach_group(data, group->iommu_group); |
| } |
| |
| return ret; |
| } |
| |
| static long vfio_ioctl_set_iommu(struct vfio_container *container, |
| unsigned long arg) |
| { |
| struct vfio_iommu_driver *driver; |
| long ret = -ENODEV; |
| |
| down_write(&container->group_lock); |
| |
| /* |
| * The container is designed to be an unprivileged interface while |
| * the group can be assigned to specific users. Therefore, only by |
| * adding a group to a container does the user get the privilege of |
| * enabling the iommu, which may allocate finite resources. There |
| * is no unset_iommu, but by removing all the groups from a container, |
| * the container is deprivileged and returns to an unset state. |
| */ |
| if (list_empty(&container->group_list) || container->iommu_driver) { |
| up_write(&container->group_lock); |
| return -EINVAL; |
| } |
| |
| mutex_lock(&vfio.iommu_drivers_lock); |
| list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { |
| void *data; |
| |
| if (!vfio_iommu_driver_allowed(container, driver)) |
| continue; |
| if (!try_module_get(driver->ops->owner)) |
| continue; |
| |
| /* |
| * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION, |
| * so test which iommu driver reported support for this |
| * extension and call open on them. We also pass them the |
| * magic, allowing a single driver to support multiple |
| * interfaces if they'd like. |
| */ |
| if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) { |
| module_put(driver->ops->owner); |
| continue; |
| } |
| |
| data = driver->ops->open(arg); |
| if (IS_ERR(data)) { |
| ret = PTR_ERR(data); |
| module_put(driver->ops->owner); |
| continue; |
| } |
| |
| ret = __vfio_container_attach_groups(container, driver, data); |
| if (ret) { |
| driver->ops->release(data); |
| module_put(driver->ops->owner); |
| continue; |
| } |
| |
| container->iommu_driver = driver; |
| container->iommu_data = data; |
| break; |
| } |
| |
| mutex_unlock(&vfio.iommu_drivers_lock); |
| up_write(&container->group_lock); |
| |
| return ret; |
| } |
| |
| static long vfio_fops_unl_ioctl(struct file *filep, |
| unsigned int cmd, unsigned long arg) |
| { |
| struct vfio_container *container = filep->private_data; |
| struct vfio_iommu_driver *driver; |
| void *data; |
| long ret = -EINVAL; |
| |
| if (!container) |
| return ret; |
| |
| switch (cmd) { |
| case VFIO_GET_API_VERSION: |
| ret = VFIO_API_VERSION; |
| break; |
| case VFIO_CHECK_EXTENSION: |
| ret = vfio_container_ioctl_check_extension(container, arg); |
| break; |
| case VFIO_SET_IOMMU: |
| ret = vfio_ioctl_set_iommu(container, arg); |
| break; |
| default: |
| driver = container->iommu_driver; |
| data = container->iommu_data; |
| |
| if (driver) /* passthrough all unrecognized ioctls */ |
| ret = driver->ops->ioctl(data, cmd, arg); |
| } |
| |
| return ret; |
| } |
| |
| static int vfio_fops_open(struct inode *inode, struct file *filep) |
| { |
| struct vfio_container *container; |
| |
| container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT); |
| if (!container) |
| return -ENOMEM; |
| |
| INIT_LIST_HEAD(&container->group_list); |
| init_rwsem(&container->group_lock); |
| kref_init(&container->kref); |
| |
| filep->private_data = container; |
| |
| return 0; |
| } |
| |
| static int vfio_fops_release(struct inode *inode, struct file *filep) |
| { |
| struct vfio_container *container = filep->private_data; |
| |
| filep->private_data = NULL; |
| |
| vfio_container_put(container); |
| |
| return 0; |
| } |
| |
| static const struct file_operations vfio_fops = { |
| .owner = THIS_MODULE, |
| .open = vfio_fops_open, |
| .release = vfio_fops_release, |
| .unlocked_ioctl = vfio_fops_unl_ioctl, |
| .compat_ioctl = compat_ptr_ioctl, |
| }; |
| |
| struct vfio_container *vfio_container_from_file(struct file *file) |
| { |
| struct vfio_container *container; |
| |
| /* Sanity check, is this really our fd? */ |
| if (file->f_op != &vfio_fops) |
| return NULL; |
| |
| container = file->private_data; |
| WARN_ON(!container); /* fget ensures we don't race vfio_release */ |
| return container; |
| } |
| |
| static struct miscdevice vfio_dev = { |
| .minor = VFIO_MINOR, |
| .name = "vfio", |
| .fops = &vfio_fops, |
| .nodename = "vfio/vfio", |
| .mode = S_IRUGO | S_IWUGO, |
| }; |
| |
| int vfio_container_attach_group(struct vfio_container *container, |
| struct vfio_group *group) |
| { |
| struct vfio_iommu_driver *driver; |
| int ret = 0; |
| |
| lockdep_assert_held(&group->group_lock); |
| |
| if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) |
| return -EPERM; |
| |
| down_write(&container->group_lock); |
| |
| /* Real groups and fake groups cannot mix */ |
| if (!list_empty(&container->group_list) && |
| container->noiommu != (group->type == VFIO_NO_IOMMU)) { |
| ret = -EPERM; |
| goto out_unlock_container; |
| } |
| |
| if (group->type == VFIO_IOMMU) { |
| ret = iommu_group_claim_dma_owner(group->iommu_group, group); |
| if (ret) |
| goto out_unlock_container; |
| } |
| |
| driver = container->iommu_driver; |
| if (driver) { |
| ret = driver->ops->attach_group(container->iommu_data, |
| group->iommu_group, |
| group->type); |
| if (ret) { |
| if (group->type == VFIO_IOMMU) |
| iommu_group_release_dma_owner( |
| group->iommu_group); |
| goto out_unlock_container; |
| } |
| } |
| |
| group->container = container; |
| group->container_users = 1; |
| container->noiommu = (group->type == VFIO_NO_IOMMU); |
| list_add(&group->container_next, &container->group_list); |
| |
| /* Get a reference on the container and mark a user within the group */ |
| vfio_container_get(container); |
| |
| out_unlock_container: |
| up_write(&container->group_lock); |
| return ret; |
| } |
| |
| void vfio_group_detach_container(struct vfio_group *group) |
| { |
| struct vfio_container *container = group->container; |
| struct vfio_iommu_driver *driver; |
| |
| lockdep_assert_held(&group->group_lock); |
| WARN_ON(group->container_users != 1); |
| |
| down_write(&container->group_lock); |
| |
| driver = container->iommu_driver; |
| if (driver) |
| driver->ops->detach_group(container->iommu_data, |
| group->iommu_group); |
| |
| if (group->type == VFIO_IOMMU) |
| iommu_group_release_dma_owner(group->iommu_group); |
| |
| group->container = NULL; |
| group->container_users = 0; |
| list_del(&group->container_next); |
| |
| /* Detaching the last group deprivileges a container, remove iommu */ |
| if (driver && list_empty(&container->group_list)) { |
| driver->ops->release(container->iommu_data); |
| module_put(driver->ops->owner); |
| container->iommu_driver = NULL; |
| container->iommu_data = NULL; |
| } |
| |
| up_write(&container->group_lock); |
| |
| vfio_container_put(container); |
| } |
| |
| int vfio_group_use_container(struct vfio_group *group) |
| { |
| lockdep_assert_held(&group->group_lock); |
| |
| /* |
| * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but |
| * VFIO_SET_IOMMU hasn't been done yet. |
| */ |
| if (!group->container->iommu_driver) |
| return -EINVAL; |
| |
| if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) |
| return -EPERM; |
| |
| get_file(group->opened_file); |
| group->container_users++; |
| return 0; |
| } |
| |
| void vfio_group_unuse_container(struct vfio_group *group) |
| { |
| lockdep_assert_held(&group->group_lock); |
| |
| WARN_ON(group->container_users <= 1); |
| group->container_users--; |
| fput(group->opened_file); |
| } |
| |
| int vfio_device_container_pin_pages(struct vfio_device *device, |
| dma_addr_t iova, int npage, |
| int prot, struct page **pages) |
| { |
| struct vfio_container *container = device->group->container; |
| struct iommu_group *iommu_group = device->group->iommu_group; |
| struct vfio_iommu_driver *driver = container->iommu_driver; |
| |
| if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) |
| return -E2BIG; |
| |
| if (unlikely(!driver || !driver->ops->pin_pages)) |
| return -ENOTTY; |
| return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, |
| npage, prot, pages); |
| } |
| |
| void vfio_device_container_unpin_pages(struct vfio_device *device, |
| dma_addr_t iova, int npage) |
| { |
| struct vfio_container *container = device->group->container; |
| |
| if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) |
| return; |
| |
| container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, |
| npage); |
| } |
| |
| int vfio_device_container_dma_rw(struct vfio_device *device, |
| dma_addr_t iova, void *data, |
| size_t len, bool write) |
| { |
| struct vfio_container *container = device->group->container; |
| struct vfio_iommu_driver *driver = container->iommu_driver; |
| |
| if (unlikely(!driver || !driver->ops->dma_rw)) |
| return -ENOTTY; |
| return driver->ops->dma_rw(container->iommu_data, iova, data, len, |
| write); |
| } |
| |
| int __init vfio_container_init(void) |
| { |
| int ret; |
| |
| mutex_init(&vfio.iommu_drivers_lock); |
| INIT_LIST_HEAD(&vfio.iommu_drivers_list); |
| |
| ret = misc_register(&vfio_dev); |
| if (ret) { |
| pr_err("vfio: misc device register failed\n"); |
| return ret; |
| } |
| |
| if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) { |
| ret = vfio_register_iommu_driver(&vfio_noiommu_ops); |
| if (ret) |
| goto err_misc; |
| } |
| return 0; |
| |
| err_misc: |
| misc_deregister(&vfio_dev); |
| return ret; |
| } |
| |
| void vfio_container_cleanup(void) |
| { |
| if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) |
| vfio_unregister_iommu_driver(&vfio_noiommu_ops); |
| misc_deregister(&vfio_dev); |
| mutex_destroy(&vfio.iommu_drivers_lock); |
| } |
| |
| MODULE_ALIAS_MISCDEV(VFIO_MINOR); |
| MODULE_ALIAS("devname:vfio/vfio"); |