| // SPDX-License-Identifier: GPL-2.0 |
| #include <linux/capability.h> |
| #include <linux/compat.h> |
| #include <linux/blkdev.h> |
| #include <linux/export.h> |
| #include <linux/gfp.h> |
| #include <linux/blkpg.h> |
| #include <linux/hdreg.h> |
| #include <linux/backing-dev.h> |
| #include <linux/fs.h> |
| #include <linux/blktrace_api.h> |
| #include <linux/pr.h> |
| #include <linux/uaccess.h> |
| #include <linux/pagemap.h> |
| #include <linux/io_uring/cmd.h> |
| #include <uapi/linux/blkdev.h> |
| #include "blk.h" |
| |
| static int blkpg_do_ioctl(struct block_device *bdev, |
| struct blkpg_partition __user *upart, int op) |
| { |
| struct gendisk *disk = bdev->bd_disk; |
| struct blkpg_partition p; |
| sector_t start, length, capacity, end; |
| |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EACCES; |
| if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) |
| return -EFAULT; |
| if (bdev_is_partition(bdev)) |
| return -EINVAL; |
| |
| if (p.pno <= 0) |
| return -EINVAL; |
| |
| if (op == BLKPG_DEL_PARTITION) |
| return bdev_del_partition(disk, p.pno); |
| |
| if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) |
| return -EINVAL; |
| /* Check that the partition is aligned to the block size */ |
| if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) |
| return -EINVAL; |
| |
| start = p.start >> SECTOR_SHIFT; |
| length = p.length >> SECTOR_SHIFT; |
| capacity = get_capacity(disk); |
| |
| if (check_add_overflow(start, length, &end)) |
| return -EINVAL; |
| |
| if (start >= capacity || end > capacity) |
| return -EINVAL; |
| |
| switch (op) { |
| case BLKPG_ADD_PARTITION: |
| return bdev_add_partition(disk, p.pno, start, length); |
| case BLKPG_RESIZE_PARTITION: |
| return bdev_resize_partition(disk, p.pno, start, length); |
| default: |
| return -EINVAL; |
| } |
| } |
| |
| static int blkpg_ioctl(struct block_device *bdev, |
| struct blkpg_ioctl_arg __user *arg) |
| { |
| struct blkpg_partition __user *udata; |
| int op; |
| |
| if (get_user(op, &arg->op) || get_user(udata, &arg->data)) |
| return -EFAULT; |
| |
| return blkpg_do_ioctl(bdev, udata, op); |
| } |
| |
| #ifdef CONFIG_COMPAT |
| struct compat_blkpg_ioctl_arg { |
| compat_int_t op; |
| compat_int_t flags; |
| compat_int_t datalen; |
| compat_caddr_t data; |
| }; |
| |
| static int compat_blkpg_ioctl(struct block_device *bdev, |
| struct compat_blkpg_ioctl_arg __user *arg) |
| { |
| compat_caddr_t udata; |
| int op; |
| |
| if (get_user(op, &arg->op) || get_user(udata, &arg->data)) |
| return -EFAULT; |
| |
| return blkpg_do_ioctl(bdev, compat_ptr(udata), op); |
| } |
| #endif |
| |
| /* |
| * Check that [start, start + len) is a valid range from the block device's |
| * perspective, including verifying that it can be correctly translated into |
| * logical block addresses. |
| */ |
| static int blk_validate_byte_range(struct block_device *bdev, |
| uint64_t start, uint64_t len) |
| { |
| unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; |
| uint64_t end; |
| |
| if ((start | len) & bs_mask) |
| return -EINVAL; |
| if (!len) |
| return -EINVAL; |
| if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) |
| return -EINVAL; |
| |
| return 0; |
| } |
| |
| static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, |
| unsigned long arg) |
| { |
| uint64_t range[2], start, len; |
| struct bio *prev = NULL, *bio; |
| sector_t sector, nr_sects; |
| struct blk_plug plug; |
| int err; |
| |
| if (copy_from_user(range, (void __user *)arg, sizeof(range))) |
| return -EFAULT; |
| start = range[0]; |
| len = range[1]; |
| |
| if (!bdev_max_discard_sectors(bdev)) |
| return -EOPNOTSUPP; |
| |
| if (!(mode & BLK_OPEN_WRITE)) |
| return -EBADF; |
| if (bdev_read_only(bdev)) |
| return -EPERM; |
| err = blk_validate_byte_range(bdev, start, len); |
| if (err) |
| return err; |
| |
| filemap_invalidate_lock(bdev->bd_mapping); |
| err = truncate_bdev_range(bdev, mode, start, start + len - 1); |
| if (err) |
| goto fail; |
| |
| sector = start >> SECTOR_SHIFT; |
| nr_sects = len >> SECTOR_SHIFT; |
| |
| blk_start_plug(&plug); |
| while (1) { |
| if (fatal_signal_pending(current)) { |
| if (prev) |
| bio_await_chain(prev); |
| err = -EINTR; |
| goto out_unplug; |
| } |
| bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, |
| GFP_KERNEL); |
| if (!bio) |
| break; |
| prev = bio_chain_and_submit(prev, bio); |
| } |
| if (prev) { |
| err = submit_bio_wait(prev); |
| if (err == -EOPNOTSUPP) |
| err = 0; |
| bio_put(prev); |
| } |
| out_unplug: |
| blk_finish_plug(&plug); |
| fail: |
| filemap_invalidate_unlock(bdev->bd_mapping); |
| return err; |
| } |
| |
| static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, |
| void __user *argp) |
| { |
| uint64_t start, len, end; |
| uint64_t range[2]; |
| int err; |
| |
| if (!(mode & BLK_OPEN_WRITE)) |
| return -EBADF; |
| if (!bdev_max_secure_erase_sectors(bdev)) |
| return -EOPNOTSUPP; |
| if (copy_from_user(range, argp, sizeof(range))) |
| return -EFAULT; |
| |
| start = range[0]; |
| len = range[1]; |
| if ((start & 511) || (len & 511)) |
| return -EINVAL; |
| if (check_add_overflow(start, len, &end) || |
| end > bdev_nr_bytes(bdev)) |
| return -EINVAL; |
| |
| filemap_invalidate_lock(bdev->bd_mapping); |
| err = truncate_bdev_range(bdev, mode, start, end - 1); |
| if (!err) |
| err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, |
| GFP_KERNEL); |
| filemap_invalidate_unlock(bdev->bd_mapping); |
| return err; |
| } |
| |
| |
| static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, |
| unsigned long arg) |
| { |
| uint64_t range[2]; |
| uint64_t start, end, len; |
| int err; |
| |
| if (!(mode & BLK_OPEN_WRITE)) |
| return -EBADF; |
| |
| if (copy_from_user(range, (void __user *)arg, sizeof(range))) |
| return -EFAULT; |
| |
| start = range[0]; |
| len = range[1]; |
| end = start + len - 1; |
| |
| if (start & 511) |
| return -EINVAL; |
| if (len & 511) |
| return -EINVAL; |
| if (end >= (uint64_t)bdev_nr_bytes(bdev)) |
| return -EINVAL; |
| if (end < start) |
| return -EINVAL; |
| |
| /* Invalidate the page cache, including dirty pages */ |
| filemap_invalidate_lock(bdev->bd_mapping); |
| err = truncate_bdev_range(bdev, mode, start, end); |
| if (err) |
| goto fail; |
| |
| err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, |
| BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); |
| |
| fail: |
| filemap_invalidate_unlock(bdev->bd_mapping); |
| return err; |
| } |
| |
| static int put_ushort(unsigned short __user *argp, unsigned short val) |
| { |
| return put_user(val, argp); |
| } |
| |
| static int put_int(int __user *argp, int val) |
| { |
| return put_user(val, argp); |
| } |
| |
| static int put_uint(unsigned int __user *argp, unsigned int val) |
| { |
| return put_user(val, argp); |
| } |
| |
| static int put_long(long __user *argp, long val) |
| { |
| return put_user(val, argp); |
| } |
| |
| static int put_ulong(unsigned long __user *argp, unsigned long val) |
| { |
| return put_user(val, argp); |
| } |
| |
| static int put_u64(u64 __user *argp, u64 val) |
| { |
| return put_user(val, argp); |
| } |
| |
| #ifdef CONFIG_COMPAT |
| static int compat_put_long(compat_long_t __user *argp, long val) |
| { |
| return put_user(val, argp); |
| } |
| |
| static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) |
| { |
| return put_user(val, argp); |
| } |
| #endif |
| |
| #ifdef CONFIG_COMPAT |
| /* |
| * This is the equivalent of compat_ptr_ioctl(), to be used by block |
| * drivers that implement only commands that are completely compatible |
| * between 32-bit and 64-bit user space |
| */ |
| int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, |
| unsigned cmd, unsigned long arg) |
| { |
| struct gendisk *disk = bdev->bd_disk; |
| |
| if (disk->fops->ioctl) |
| return disk->fops->ioctl(bdev, mode, cmd, |
| (unsigned long)compat_ptr(arg)); |
| |
| return -ENOIOCTLCMD; |
| } |
| EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); |
| #endif |
| |
| static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) |
| { |
| /* no sense to make reservations for partitions */ |
| if (bdev_is_partition(bdev)) |
| return false; |
| |
| if (capable(CAP_SYS_ADMIN)) |
| return true; |
| /* |
| * Only allow unprivileged reservations if the file descriptor is open |
| * for writing. |
| */ |
| return mode & BLK_OPEN_WRITE; |
| } |
| |
| static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, |
| struct pr_registration __user *arg) |
| { |
| const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| struct pr_registration reg; |
| |
| if (!blkdev_pr_allowed(bdev, mode)) |
| return -EPERM; |
| if (!ops || !ops->pr_register) |
| return -EOPNOTSUPP; |
| if (copy_from_user(®, arg, sizeof(reg))) |
| return -EFAULT; |
| |
| if (reg.flags & ~PR_FL_IGNORE_KEY) |
| return -EOPNOTSUPP; |
| return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); |
| } |
| |
| static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, |
| struct pr_reservation __user *arg) |
| { |
| const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| struct pr_reservation rsv; |
| |
| if (!blkdev_pr_allowed(bdev, mode)) |
| return -EPERM; |
| if (!ops || !ops->pr_reserve) |
| return -EOPNOTSUPP; |
| if (copy_from_user(&rsv, arg, sizeof(rsv))) |
| return -EFAULT; |
| |
| if (rsv.flags & ~PR_FL_IGNORE_KEY) |
| return -EOPNOTSUPP; |
| return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); |
| } |
| |
| static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, |
| struct pr_reservation __user *arg) |
| { |
| const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| struct pr_reservation rsv; |
| |
| if (!blkdev_pr_allowed(bdev, mode)) |
| return -EPERM; |
| if (!ops || !ops->pr_release) |
| return -EOPNOTSUPP; |
| if (copy_from_user(&rsv, arg, sizeof(rsv))) |
| return -EFAULT; |
| |
| if (rsv.flags) |
| return -EOPNOTSUPP; |
| return ops->pr_release(bdev, rsv.key, rsv.type); |
| } |
| |
| static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, |
| struct pr_preempt __user *arg, bool abort) |
| { |
| const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| struct pr_preempt p; |
| |
| if (!blkdev_pr_allowed(bdev, mode)) |
| return -EPERM; |
| if (!ops || !ops->pr_preempt) |
| return -EOPNOTSUPP; |
| if (copy_from_user(&p, arg, sizeof(p))) |
| return -EFAULT; |
| |
| if (p.flags) |
| return -EOPNOTSUPP; |
| return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); |
| } |
| |
| static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, |
| struct pr_clear __user *arg) |
| { |
| const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
| struct pr_clear c; |
| |
| if (!blkdev_pr_allowed(bdev, mode)) |
| return -EPERM; |
| if (!ops || !ops->pr_clear) |
| return -EOPNOTSUPP; |
| if (copy_from_user(&c, arg, sizeof(c))) |
| return -EFAULT; |
| |
| if (c.flags) |
| return -EOPNOTSUPP; |
| return ops->pr_clear(bdev, c.key); |
| } |
| |
| static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, |
| unsigned long arg) |
| { |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EACCES; |
| |
| mutex_lock(&bdev->bd_holder_lock); |
| if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) |
| bdev->bd_holder_ops->sync(bdev); |
| else { |
| mutex_unlock(&bdev->bd_holder_lock); |
| sync_blockdev(bdev); |
| } |
| |
| invalidate_bdev(bdev); |
| return 0; |
| } |
| |
| static int blkdev_roset(struct block_device *bdev, unsigned cmd, |
| unsigned long arg) |
| { |
| int ret, n; |
| |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EACCES; |
| |
| if (get_user(n, (int __user *)arg)) |
| return -EFAULT; |
| if (bdev->bd_disk->fops->set_read_only) { |
| ret = bdev->bd_disk->fops->set_read_only(bdev, n); |
| if (ret) |
| return ret; |
| } |
| if (n) |
| bdev_set_flag(bdev, BD_READ_ONLY); |
| else |
| bdev_clear_flag(bdev, BD_READ_ONLY); |
| return 0; |
| } |
| |
| static int blkdev_getgeo(struct block_device *bdev, |
| struct hd_geometry __user *argp) |
| { |
| struct gendisk *disk = bdev->bd_disk; |
| struct hd_geometry geo; |
| int ret; |
| |
| if (!argp) |
| return -EINVAL; |
| if (!disk->fops->getgeo) |
| return -ENOTTY; |
| |
| /* |
| * We need to set the startsect first, the driver may |
| * want to override it. |
| */ |
| memset(&geo, 0, sizeof(geo)); |
| geo.start = get_start_sect(bdev); |
| ret = disk->fops->getgeo(bdev, &geo); |
| if (ret) |
| return ret; |
| if (copy_to_user(argp, &geo, sizeof(geo))) |
| return -EFAULT; |
| return 0; |
| } |
| |
| #ifdef CONFIG_COMPAT |
| struct compat_hd_geometry { |
| unsigned char heads; |
| unsigned char sectors; |
| unsigned short cylinders; |
| u32 start; |
| }; |
| |
| static int compat_hdio_getgeo(struct block_device *bdev, |
| struct compat_hd_geometry __user *ugeo) |
| { |
| struct gendisk *disk = bdev->bd_disk; |
| struct hd_geometry geo; |
| int ret; |
| |
| if (!ugeo) |
| return -EINVAL; |
| if (!disk->fops->getgeo) |
| return -ENOTTY; |
| |
| memset(&geo, 0, sizeof(geo)); |
| /* |
| * We need to set the startsect first, the driver may |
| * want to override it. |
| */ |
| geo.start = get_start_sect(bdev); |
| ret = disk->fops->getgeo(bdev, &geo); |
| if (ret) |
| return ret; |
| |
| ret = copy_to_user(ugeo, &geo, 4); |
| ret |= put_user(geo.start, &ugeo->start); |
| if (ret) |
| ret = -EFAULT; |
| |
| return ret; |
| } |
| #endif |
| |
| /* set the logical block size */ |
| static int blkdev_bszset(struct file *file, blk_mode_t mode, |
| int __user *argp) |
| { |
| // this one might be file_inode(file)->i_rdev - a rare valid |
| // use of file_inode() for those. |
| dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; |
| struct file *excl_file; |
| int ret, n; |
| |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EACCES; |
| if (!argp) |
| return -EINVAL; |
| if (get_user(n, argp)) |
| return -EFAULT; |
| |
| if (mode & BLK_OPEN_EXCL) |
| return set_blocksize(file, n); |
| |
| excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); |
| if (IS_ERR(excl_file)) |
| return -EBUSY; |
| ret = set_blocksize(excl_file, n); |
| fput(excl_file); |
| return ret; |
| } |
| |
| /* |
| * Common commands that are handled the same way on native and compat |
| * user space. Note the separate arg/argp parameters that are needed |
| * to deal with the compat_ptr() conversion. |
| */ |
| static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, |
| unsigned int cmd, unsigned long arg, |
| void __user *argp) |
| { |
| unsigned int max_sectors; |
| |
| switch (cmd) { |
| case BLKFLSBUF: |
| return blkdev_flushbuf(bdev, cmd, arg); |
| case BLKROSET: |
| return blkdev_roset(bdev, cmd, arg); |
| case BLKDISCARD: |
| return blk_ioctl_discard(bdev, mode, arg); |
| case BLKSECDISCARD: |
| return blk_ioctl_secure_erase(bdev, mode, argp); |
| case BLKZEROOUT: |
| return blk_ioctl_zeroout(bdev, mode, arg); |
| case BLKGETDISKSEQ: |
| return put_u64(argp, bdev->bd_disk->diskseq); |
| case BLKREPORTZONE: |
| return blkdev_report_zones_ioctl(bdev, cmd, arg); |
| case BLKRESETZONE: |
| case BLKOPENZONE: |
| case BLKCLOSEZONE: |
| case BLKFINISHZONE: |
| return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); |
| case BLKGETZONESZ: |
| return put_uint(argp, bdev_zone_sectors(bdev)); |
| case BLKGETNRZONES: |
| return put_uint(argp, bdev_nr_zones(bdev)); |
| case BLKROGET: |
| return put_int(argp, bdev_read_only(bdev) != 0); |
| case BLKSSZGET: /* get block device logical block size */ |
| return put_int(argp, bdev_logical_block_size(bdev)); |
| case BLKPBSZGET: /* get block device physical block size */ |
| return put_uint(argp, bdev_physical_block_size(bdev)); |
| case BLKIOMIN: |
| return put_uint(argp, bdev_io_min(bdev)); |
| case BLKIOOPT: |
| return put_uint(argp, bdev_io_opt(bdev)); |
| case BLKALIGNOFF: |
| return put_int(argp, bdev_alignment_offset(bdev)); |
| case BLKDISCARDZEROES: |
| return put_uint(argp, 0); |
| case BLKSECTGET: |
| max_sectors = min_t(unsigned int, USHRT_MAX, |
| queue_max_sectors(bdev_get_queue(bdev))); |
| return put_ushort(argp, max_sectors); |
| case BLKROTATIONAL: |
| return put_ushort(argp, !bdev_nonrot(bdev)); |
| case BLKRASET: |
| case BLKFRASET: |
| if(!capable(CAP_SYS_ADMIN)) |
| return -EACCES; |
| bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; |
| return 0; |
| case BLKRRPART: |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EACCES; |
| if (bdev_is_partition(bdev)) |
| return -EINVAL; |
| return disk_scan_partitions(bdev->bd_disk, |
| mode | BLK_OPEN_STRICT_SCAN); |
| case BLKTRACESTART: |
| case BLKTRACESTOP: |
| case BLKTRACETEARDOWN: |
| return blk_trace_ioctl(bdev, cmd, argp); |
| case IOC_PR_REGISTER: |
| return blkdev_pr_register(bdev, mode, argp); |
| case IOC_PR_RESERVE: |
| return blkdev_pr_reserve(bdev, mode, argp); |
| case IOC_PR_RELEASE: |
| return blkdev_pr_release(bdev, mode, argp); |
| case IOC_PR_PREEMPT: |
| return blkdev_pr_preempt(bdev, mode, argp, false); |
| case IOC_PR_PREEMPT_ABORT: |
| return blkdev_pr_preempt(bdev, mode, argp, true); |
| case IOC_PR_CLEAR: |
| return blkdev_pr_clear(bdev, mode, argp); |
| default: |
| return -ENOIOCTLCMD; |
| } |
| } |
| |
| /* |
| * Always keep this in sync with compat_blkdev_ioctl() |
| * to handle all incompatible commands in both functions. |
| * |
| * New commands must be compatible and go into blkdev_common_ioctl |
| */ |
| long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| { |
| struct block_device *bdev = I_BDEV(file->f_mapping->host); |
| void __user *argp = (void __user *)arg; |
| blk_mode_t mode = file_to_blk_mode(file); |
| int ret; |
| |
| switch (cmd) { |
| /* These need separate implementations for the data structure */ |
| case HDIO_GETGEO: |
| return blkdev_getgeo(bdev, argp); |
| case BLKPG: |
| return blkpg_ioctl(bdev, argp); |
| |
| /* Compat mode returns 32-bit data instead of 'long' */ |
| case BLKRAGET: |
| case BLKFRAGET: |
| if (!argp) |
| return -EINVAL; |
| return put_long(argp, |
| (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); |
| case BLKGETSIZE: |
| if (bdev_nr_sectors(bdev) > ~0UL) |
| return -EFBIG; |
| return put_ulong(argp, bdev_nr_sectors(bdev)); |
| |
| /* The data is compatible, but the command number is different */ |
| case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ |
| return put_int(argp, block_size(bdev)); |
| case BLKBSZSET: |
| return blkdev_bszset(file, mode, argp); |
| case BLKGETSIZE64: |
| return put_u64(argp, bdev_nr_bytes(bdev)); |
| |
| /* Incompatible alignment on i386 */ |
| case BLKTRACESETUP: |
| return blk_trace_ioctl(bdev, cmd, argp); |
| default: |
| break; |
| } |
| |
| ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); |
| if (ret != -ENOIOCTLCMD) |
| return ret; |
| |
| if (!bdev->bd_disk->fops->ioctl) |
| return -ENOTTY; |
| return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); |
| } |
| |
| #ifdef CONFIG_COMPAT |
| |
| #define BLKBSZGET_32 _IOR(0x12, 112, int) |
| #define BLKBSZSET_32 _IOW(0x12, 113, int) |
| #define BLKGETSIZE64_32 _IOR(0x12, 114, int) |
| |
| /* Most of the generic ioctls are handled in the normal fallback path. |
| This assumes the blkdev's low level compat_ioctl always returns |
| ENOIOCTLCMD for unknown ioctls. */ |
| long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| { |
| int ret; |
| void __user *argp = compat_ptr(arg); |
| struct block_device *bdev = I_BDEV(file->f_mapping->host); |
| struct gendisk *disk = bdev->bd_disk; |
| blk_mode_t mode = file_to_blk_mode(file); |
| |
| switch (cmd) { |
| /* These need separate implementations for the data structure */ |
| case HDIO_GETGEO: |
| return compat_hdio_getgeo(bdev, argp); |
| case BLKPG: |
| return compat_blkpg_ioctl(bdev, argp); |
| |
| /* Compat mode returns 32-bit data instead of 'long' */ |
| case BLKRAGET: |
| case BLKFRAGET: |
| if (!argp) |
| return -EINVAL; |
| return compat_put_long(argp, |
| (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); |
| case BLKGETSIZE: |
| if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) |
| return -EFBIG; |
| return compat_put_ulong(argp, bdev_nr_sectors(bdev)); |
| |
| /* The data is compatible, but the command number is different */ |
| case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ |
| return put_int(argp, bdev_logical_block_size(bdev)); |
| case BLKBSZSET_32: |
| return blkdev_bszset(file, mode, argp); |
| case BLKGETSIZE64_32: |
| return put_u64(argp, bdev_nr_bytes(bdev)); |
| |
| /* Incompatible alignment on i386 */ |
| case BLKTRACESETUP32: |
| return blk_trace_ioctl(bdev, cmd, argp); |
| default: |
| break; |
| } |
| |
| ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); |
| if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) |
| ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); |
| |
| return ret; |
| } |
| #endif |
| |
| struct blk_iou_cmd { |
| int res; |
| bool nowait; |
| }; |
| |
| static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| { |
| struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| |
| if (bic->res == -EAGAIN && bic->nowait) |
| io_uring_cmd_issue_blocking(cmd); |
| else |
| io_uring_cmd_done(cmd, bic->res, 0, issue_flags); |
| } |
| |
| static void bio_cmd_bio_end_io(struct bio *bio) |
| { |
| struct io_uring_cmd *cmd = bio->bi_private; |
| struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| |
| if (unlikely(bio->bi_status) && !bic->res) |
| bic->res = blk_status_to_errno(bio->bi_status); |
| |
| io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); |
| bio_put(bio); |
| } |
| |
| static int blkdev_cmd_discard(struct io_uring_cmd *cmd, |
| struct block_device *bdev, |
| uint64_t start, uint64_t len, bool nowait) |
| { |
| struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; |
| sector_t sector = start >> SECTOR_SHIFT; |
| sector_t nr_sects = len >> SECTOR_SHIFT; |
| struct bio *prev = NULL, *bio; |
| int err; |
| |
| if (!bdev_max_discard_sectors(bdev)) |
| return -EOPNOTSUPP; |
| if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) |
| return -EBADF; |
| if (bdev_read_only(bdev)) |
| return -EPERM; |
| err = blk_validate_byte_range(bdev, start, len); |
| if (err) |
| return err; |
| |
| err = filemap_invalidate_pages(bdev->bd_mapping, start, |
| start + len - 1, nowait); |
| if (err) |
| return err; |
| |
| while (true) { |
| bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); |
| if (!bio) |
| break; |
| if (nowait) { |
| /* |
| * Don't allow multi-bio non-blocking submissions as |
| * subsequent bios may fail but we won't get a direct |
| * indication of that. Normally, the caller should |
| * retry from a blocking context. |
| */ |
| if (unlikely(nr_sects)) { |
| bio_put(bio); |
| return -EAGAIN; |
| } |
| bio->bi_opf |= REQ_NOWAIT; |
| } |
| |
| prev = bio_chain_and_submit(prev, bio); |
| } |
| if (unlikely(!prev)) |
| return -EAGAIN; |
| if (unlikely(nr_sects)) |
| bic->res = -EAGAIN; |
| |
| prev->bi_private = cmd; |
| prev->bi_end_io = bio_cmd_bio_end_io; |
| submit_bio(prev); |
| return -EIOCBQUEUED; |
| } |
| |
| int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) |
| { |
| struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); |
| struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); |
| const struct io_uring_sqe *sqe = cmd->sqe; |
| u32 cmd_op = cmd->cmd_op; |
| uint64_t start, len; |
| |
| if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || |
| sqe->rw_flags || sqe->file_index)) |
| return -EINVAL; |
| |
| bic->res = 0; |
| bic->nowait = issue_flags & IO_URING_F_NONBLOCK; |
| |
| start = READ_ONCE(sqe->addr); |
| len = READ_ONCE(sqe->addr3); |
| |
| switch (cmd_op) { |
| case BLOCK_URING_CMD_DISCARD: |
| return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); |
| } |
| return -EINVAL; |
| } |