| // SPDX-License-Identifier: GPL-2.0 |
| #ifndef NO_BCACHEFS_FS |
| |
| #include "bcachefs.h" |
| #include "chardev.h" |
| #include "dirent.h" |
| #include "fs.h" |
| #include "fs-common.h" |
| #include "fs-ioctl.h" |
| #include "quota.h" |
| |
| #include <linux/compat.h> |
| #include <linux/fsnotify.h> |
| #include <linux/mount.h> |
| #include <linux/namei.h> |
| #include <linux/security.h> |
| #include <linux/writeback.h> |
| |
| #define FS_IOC_GOINGDOWN _IOR('X', 125, __u32) |
| #define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ |
| #define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ |
| #define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ |
| |
| struct flags_set { |
| unsigned mask; |
| unsigned flags; |
| |
| unsigned projid; |
| |
| bool set_projinherit; |
| bool projinherit; |
| }; |
| |
| static int bch2_inode_flags_set(struct btree_trans *trans, |
| struct bch_inode_info *inode, |
| struct bch_inode_unpacked *bi, |
| void *p) |
| { |
| struct bch_fs *c = inode->v.i_sb->s_fs_info; |
| /* |
| * We're relying on btree locking here for exclusion with other ioctl |
| * calls - use the flags in the btree (@bi), not inode->i_flags: |
| */ |
| struct flags_set *s = p; |
| unsigned newflags = s->flags; |
| unsigned oldflags = bi->bi_flags & s->mask; |
| |
| if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && |
| !capable(CAP_LINUX_IMMUTABLE)) |
| return -EPERM; |
| |
| if (!S_ISREG(bi->bi_mode) && |
| !S_ISDIR(bi->bi_mode) && |
| (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) |
| return -EINVAL; |
| |
| if (s->set_projinherit) { |
| bi->bi_fields_set &= ~(1 << Inode_opt_project); |
| bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); |
| } |
| |
| bi->bi_flags &= ~s->mask; |
| bi->bi_flags |= newflags; |
| |
| bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); |
| return 0; |
| } |
| |
| static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) |
| { |
| unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); |
| |
| return put_user(flags, arg); |
| } |
| |
| static int bch2_ioc_setflags(struct bch_fs *c, |
| struct file *file, |
| struct bch_inode_info *inode, |
| void __user *arg) |
| { |
| struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; |
| unsigned uflags; |
| int ret; |
| |
| if (get_user(uflags, (int __user *) arg)) |
| return -EFAULT; |
| |
| s.flags = map_flags_rev(bch_flags_to_uflags, uflags); |
| if (uflags) |
| return -EOPNOTSUPP; |
| |
| ret = mnt_want_write_file(file); |
| if (ret) |
| return ret; |
| |
| inode_lock(&inode->v); |
| if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { |
| ret = -EACCES; |
| goto setflags_out; |
| } |
| |
| mutex_lock(&inode->ei_update_lock); |
| ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: |
| bch2_write_inode(c, inode, bch2_inode_flags_set, &s, |
| ATTR_CTIME); |
| mutex_unlock(&inode->ei_update_lock); |
| |
| setflags_out: |
| inode_unlock(&inode->v); |
| mnt_drop_write_file(file); |
| return ret; |
| } |
| |
| static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, |
| struct fsxattr __user *arg) |
| { |
| struct fsxattr fa = { 0 }; |
| |
| fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); |
| |
| if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) |
| fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; |
| |
| fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; |
| |
| if (copy_to_user(arg, &fa, sizeof(fa))) |
| return -EFAULT; |
| |
| return 0; |
| } |
| |
| static int fssetxattr_inode_update_fn(struct btree_trans *trans, |
| struct bch_inode_info *inode, |
| struct bch_inode_unpacked *bi, |
| void *p) |
| { |
| struct flags_set *s = p; |
| |
| if (s->projid != bi->bi_project) { |
| bi->bi_fields_set |= 1U << Inode_opt_project; |
| bi->bi_project = s->projid; |
| } |
| |
| return bch2_inode_flags_set(trans, inode, bi, p); |
| } |
| |
| static int bch2_ioc_fssetxattr(struct bch_fs *c, |
| struct file *file, |
| struct bch_inode_info *inode, |
| struct fsxattr __user *arg) |
| { |
| struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; |
| struct fsxattr fa; |
| int ret; |
| |
| if (copy_from_user(&fa, arg, sizeof(fa))) |
| return -EFAULT; |
| |
| s.set_projinherit = true; |
| s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; |
| fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; |
| |
| s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); |
| if (fa.fsx_xflags) |
| return -EOPNOTSUPP; |
| |
| if (fa.fsx_projid >= U32_MAX) |
| return -EINVAL; |
| |
| /* |
| * inode fields accessible via the xattr interface are stored with a +1 |
| * bias, so that 0 means unset: |
| */ |
| s.projid = fa.fsx_projid + 1; |
| |
| ret = mnt_want_write_file(file); |
| if (ret) |
| return ret; |
| |
| inode_lock(&inode->v); |
| if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { |
| ret = -EACCES; |
| goto err; |
| } |
| |
| mutex_lock(&inode->ei_update_lock); |
| ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: |
| bch2_set_projid(c, inode, fa.fsx_projid) ?: |
| bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, |
| ATTR_CTIME); |
| mutex_unlock(&inode->ei_update_lock); |
| err: |
| inode_unlock(&inode->v); |
| mnt_drop_write_file(file); |
| return ret; |
| } |
| |
| static int bch2_reinherit_attrs_fn(struct btree_trans *trans, |
| struct bch_inode_info *inode, |
| struct bch_inode_unpacked *bi, |
| void *p) |
| { |
| struct bch_inode_info *dir = p; |
| |
| return !bch2_reinherit_attrs(bi, &dir->ei_inode); |
| } |
| |
| static int bch2_ioc_reinherit_attrs(struct bch_fs *c, |
| struct file *file, |
| struct bch_inode_info *src, |
| const char __user *name) |
| { |
| struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode); |
| struct bch_inode_info *dst; |
| struct inode *vinode = NULL; |
| char *kname = NULL; |
| struct qstr qstr; |
| int ret = 0; |
| subvol_inum inum; |
| |
| kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL); |
| if (!kname) |
| return -ENOMEM; |
| |
| ret = strncpy_from_user(kname, name, BCH_NAME_MAX); |
| if (unlikely(ret < 0)) |
| goto err1; |
| |
| qstr.len = ret; |
| qstr.name = kname; |
| |
| ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum); |
| if (ret) |
| goto err1; |
| |
| vinode = bch2_vfs_inode_get(c, inum); |
| ret = PTR_ERR_OR_ZERO(vinode); |
| if (ret) |
| goto err1; |
| |
| dst = to_bch_ei(vinode); |
| |
| ret = mnt_want_write_file(file); |
| if (ret) |
| goto err2; |
| |
| bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst); |
| |
| if (inode_attr_changing(src, dst, Inode_opt_project)) { |
| ret = bch2_fs_quota_transfer(c, dst, |
| src->ei_qid, |
| 1 << QTYP_PRJ, |
| KEY_TYPE_QUOTA_PREALLOC); |
| if (ret) |
| goto err3; |
| } |
| |
| ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0); |
| err3: |
| bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst); |
| |
| /* return true if we did work */ |
| if (ret >= 0) |
| ret = !ret; |
| |
| mnt_drop_write_file(file); |
| err2: |
| iput(vinode); |
| err1: |
| kfree(kname); |
| |
| return ret; |
| } |
| |
| static int bch2_ioc_getversion(struct bch_inode_info *inode, u32 __user *arg) |
| { |
| return put_user(inode->v.i_generation, arg); |
| } |
| |
| static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label) |
| { |
| int ret; |
| size_t len; |
| char label[BCH_SB_LABEL_SIZE]; |
| |
| BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX); |
| |
| mutex_lock(&c->sb_lock); |
| memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); |
| mutex_unlock(&c->sb_lock); |
| |
| len = strnlen(label, BCH_SB_LABEL_SIZE); |
| if (len == BCH_SB_LABEL_SIZE) { |
| bch_warn(c, |
| "label is too long, return the first %zu bytes", |
| --len); |
| } |
| |
| ret = copy_to_user(user_label, label, len); |
| |
| return ret ? -EFAULT : 0; |
| } |
| |
| static int bch2_ioc_setlabel(struct bch_fs *c, |
| struct file *file, |
| struct bch_inode_info *inode, |
| const char __user *user_label) |
| { |
| int ret; |
| char label[BCH_SB_LABEL_SIZE]; |
| |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EPERM; |
| |
| if (copy_from_user(label, user_label, sizeof(label))) |
| return -EFAULT; |
| |
| if (strnlen(label, BCH_SB_LABEL_SIZE) == BCH_SB_LABEL_SIZE) { |
| bch_err(c, |
| "unable to set label with more than %d bytes", |
| BCH_SB_LABEL_SIZE - 1); |
| return -EINVAL; |
| } |
| |
| ret = mnt_want_write_file(file); |
| if (ret) |
| return ret; |
| |
| mutex_lock(&c->sb_lock); |
| strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); |
| ret = bch2_write_super(c); |
| mutex_unlock(&c->sb_lock); |
| |
| mnt_drop_write_file(file); |
| return ret; |
| } |
| |
| static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) |
| { |
| u32 flags; |
| int ret = 0; |
| |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EPERM; |
| |
| if (get_user(flags, arg)) |
| return -EFAULT; |
| |
| bch_notice(c, "shutdown by ioctl type %u", flags); |
| |
| switch (flags) { |
| case FSOP_GOING_FLAGS_DEFAULT: |
| ret = bdev_freeze(c->vfs_sb->s_bdev); |
| if (ret) |
| break; |
| bch2_journal_flush(&c->journal); |
| bch2_fs_emergency_read_only(c); |
| bdev_thaw(c->vfs_sb->s_bdev); |
| break; |
| case FSOP_GOING_FLAGS_LOGFLUSH: |
| bch2_journal_flush(&c->journal); |
| fallthrough; |
| case FSOP_GOING_FLAGS_NOLOGFLUSH: |
| bch2_fs_emergency_read_only(c); |
| break; |
| default: |
| ret = -EINVAL; |
| break; |
| } |
| |
| return ret; |
| } |
| |
| static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, |
| struct bch_ioctl_subvolume arg) |
| { |
| struct inode *dir; |
| struct bch_inode_info *inode; |
| struct user_namespace *s_user_ns; |
| struct dentry *dst_dentry; |
| struct path src_path, dst_path; |
| int how = LOOKUP_FOLLOW; |
| int error; |
| subvol_inum snapshot_src = { 0 }; |
| unsigned lookup_flags = 0; |
| unsigned create_flags = BCH_CREATE_SUBVOL; |
| |
| if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE| |
| BCH_SUBVOL_SNAPSHOT_RO)) |
| return -EINVAL; |
| |
| if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && |
| (arg.src_ptr || |
| (arg.flags & BCH_SUBVOL_SNAPSHOT_RO))) |
| return -EINVAL; |
| |
| if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) |
| create_flags |= BCH_CREATE_SNAPSHOT; |
| |
| if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO) |
| create_flags |= BCH_CREATE_SNAPSHOT_RO; |
| |
| if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { |
| /* sync_inodes_sb enforce s_umount is locked */ |
| down_read(&c->vfs_sb->s_umount); |
| sync_inodes_sb(c->vfs_sb); |
| up_read(&c->vfs_sb->s_umount); |
| } |
| retry: |
| if (arg.src_ptr) { |
| error = user_path_at(arg.dirfd, |
| (const char __user *)(unsigned long)arg.src_ptr, |
| how, &src_path); |
| if (error) |
| goto err1; |
| |
| if (src_path.dentry->d_sb->s_fs_info != c) { |
| path_put(&src_path); |
| error = -EXDEV; |
| goto err1; |
| } |
| |
| snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode)); |
| } |
| |
| dst_dentry = user_path_create(arg.dirfd, |
| (const char __user *)(unsigned long)arg.dst_ptr, |
| &dst_path, lookup_flags); |
| error = PTR_ERR_OR_ZERO(dst_dentry); |
| if (error) |
| goto err2; |
| |
| if (dst_dentry->d_sb->s_fs_info != c) { |
| error = -EXDEV; |
| goto err3; |
| } |
| |
| if (dst_dentry->d_inode) { |
| error = -BCH_ERR_EEXIST_subvolume_create; |
| goto err3; |
| } |
| |
| dir = dst_path.dentry->d_inode; |
| if (IS_DEADDIR(dir)) { |
| error = -BCH_ERR_ENOENT_directory_dead; |
| goto err3; |
| } |
| |
| s_user_ns = dir->i_sb->s_user_ns; |
| if (!kuid_has_mapping(s_user_ns, current_fsuid()) || |
| !kgid_has_mapping(s_user_ns, current_fsgid())) { |
| error = -EOVERFLOW; |
| goto err3; |
| } |
| |
| error = inode_permission(file_mnt_idmap(filp), |
| dir, MAY_WRITE | MAY_EXEC); |
| if (error) |
| goto err3; |
| |
| if (!IS_POSIXACL(dir)) |
| arg.mode &= ~current_umask(); |
| |
| error = security_path_mkdir(&dst_path, dst_dentry, arg.mode); |
| if (error) |
| goto err3; |
| |
| if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && |
| !arg.src_ptr) |
| snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; |
| |
| down_write(&c->snapshot_create_lock); |
| inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), |
| dst_dentry, arg.mode|S_IFDIR, |
| 0, snapshot_src, create_flags); |
| up_write(&c->snapshot_create_lock); |
| |
| error = PTR_ERR_OR_ZERO(inode); |
| if (error) |
| goto err3; |
| |
| d_instantiate(dst_dentry, &inode->v); |
| fsnotify_mkdir(dir, dst_dentry); |
| err3: |
| done_path_create(&dst_path, dst_dentry); |
| err2: |
| if (arg.src_ptr) |
| path_put(&src_path); |
| |
| if (retry_estale(error, lookup_flags)) { |
| lookup_flags |= LOOKUP_REVAL; |
| goto retry; |
| } |
| err1: |
| return error; |
| } |
| |
| static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, |
| struct bch_ioctl_subvolume arg) |
| { |
| const char __user *name = (void __user *)(unsigned long)arg.dst_ptr; |
| struct path path; |
| struct inode *dir; |
| struct dentry *victim; |
| int ret = 0; |
| |
| if (arg.flags) |
| return -EINVAL; |
| |
| victim = user_path_locked_at(arg.dirfd, name, &path); |
| if (IS_ERR(victim)) |
| return PTR_ERR(victim); |
| |
| dir = d_inode(path.dentry); |
| if (victim->d_sb->s_fs_info != c) { |
| ret = -EXDEV; |
| goto err; |
| } |
| if (!d_is_positive(victim)) { |
| ret = -ENOENT; |
| goto err; |
| } |
| ret = __bch2_unlink(dir, victim, true); |
| if (!ret) { |
| fsnotify_rmdir(dir, victim); |
| d_delete(victim); |
| } |
| err: |
| inode_unlock(dir); |
| dput(victim); |
| path_put(&path); |
| return ret; |
| } |
| |
| long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| { |
| struct bch_inode_info *inode = file_bch_inode(file); |
| struct bch_fs *c = inode->v.i_sb->s_fs_info; |
| long ret; |
| |
| switch (cmd) { |
| case FS_IOC_GETFLAGS: |
| ret = bch2_ioc_getflags(inode, (int __user *) arg); |
| break; |
| |
| case FS_IOC_SETFLAGS: |
| ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); |
| break; |
| |
| case FS_IOC_FSGETXATTR: |
| ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); |
| break; |
| |
| case FS_IOC_FSSETXATTR: |
| ret = bch2_ioc_fssetxattr(c, file, inode, |
| (void __user *) arg); |
| break; |
| |
| case BCHFS_IOC_REINHERIT_ATTRS: |
| ret = bch2_ioc_reinherit_attrs(c, file, inode, |
| (void __user *) arg); |
| break; |
| |
| case FS_IOC_GETVERSION: |
| ret = bch2_ioc_getversion(inode, (u32 __user *) arg); |
| break; |
| |
| case FS_IOC_SETVERSION: |
| ret = -ENOTTY; |
| break; |
| |
| case FS_IOC_GETFSLABEL: |
| ret = bch2_ioc_getlabel(c, (void __user *) arg); |
| break; |
| |
| case FS_IOC_SETFSLABEL: |
| ret = bch2_ioc_setlabel(c, file, inode, (const void __user *) arg); |
| break; |
| |
| case FS_IOC_GOINGDOWN: |
| ret = bch2_ioc_goingdown(c, (u32 __user *) arg); |
| break; |
| |
| case BCH_IOCTL_SUBVOLUME_CREATE: { |
| struct bch_ioctl_subvolume i; |
| |
| ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) |
| ? -EFAULT |
| : bch2_ioctl_subvolume_create(c, file, i); |
| break; |
| } |
| |
| case BCH_IOCTL_SUBVOLUME_DESTROY: { |
| struct bch_ioctl_subvolume i; |
| |
| ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) |
| ? -EFAULT |
| : bch2_ioctl_subvolume_destroy(c, file, i); |
| break; |
| } |
| |
| default: |
| ret = bch2_fs_ioctl(c, cmd, (void __user *) arg); |
| break; |
| } |
| |
| return bch2_err_class(ret); |
| } |
| |
| #ifdef CONFIG_COMPAT |
| long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| { |
| /* These are just misnamed, they actually get/put from/to user an int */ |
| switch (cmd) { |
| case FS_IOC32_GETFLAGS: |
| cmd = FS_IOC_GETFLAGS; |
| break; |
| case FS_IOC32_SETFLAGS: |
| cmd = FS_IOC_SETFLAGS; |
| break; |
| case FS_IOC32_GETVERSION: |
| cmd = FS_IOC_GETVERSION; |
| break; |
| case FS_IOC_GETFSLABEL: |
| case FS_IOC_SETFSLABEL: |
| break; |
| default: |
| return -ENOIOCTLCMD; |
| } |
| return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); |
| } |
| #endif |
| |
| #endif /* NO_BCACHEFS_FS */ |