| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Common helpers for stackable filesystems and backing files. |
| * |
| * Forked from fs/overlayfs/file.c. |
| * |
| * Copyright (C) 2017 Red Hat, Inc. |
| * Copyright (C) 2023 CTERA Networks. |
| */ |
| |
| #include <linux/fs.h> |
| #include <linux/backing-file.h> |
| #include <linux/splice.h> |
| #include <linux/mm.h> |
| |
| #include "internal.h" |
| |
| /** |
| * backing_file_open - open a backing file for kernel internal use |
| * @user_path: path that the user reuqested to open |
| * @flags: open flags |
| * @real_path: path of the backing file |
| * @cred: credentials for open |
| * |
| * Open a backing file for a stackable filesystem (e.g., overlayfs). |
| * @user_path may be on the stackable filesystem and @real_path on the |
| * underlying filesystem. In this case, we want to be able to return the |
| * @user_path of the stackable filesystem. This is done by embedding the |
| * returned file into a container structure that also stores the stacked |
| * file's path, which can be retrieved using backing_file_user_path(). |
| */ |
| struct file *backing_file_open(const struct path *user_path, int flags, |
| const struct path *real_path, |
| const struct cred *cred) |
| { |
| struct file *f; |
| int error; |
| |
| f = alloc_empty_backing_file(flags, cred); |
| if (IS_ERR(f)) |
| return f; |
| |
| path_get(user_path); |
| *backing_file_user_path(f) = *user_path; |
| error = vfs_open(real_path, f); |
| if (error) { |
| fput(f); |
| f = ERR_PTR(error); |
| } |
| |
| return f; |
| } |
| EXPORT_SYMBOL_GPL(backing_file_open); |
| |
| struct file *backing_tmpfile_open(const struct path *user_path, int flags, |
| const struct path *real_parentpath, |
| umode_t mode, const struct cred *cred) |
| { |
| struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt); |
| struct file *f; |
| int error; |
| |
| f = alloc_empty_backing_file(flags, cred); |
| if (IS_ERR(f)) |
| return f; |
| |
| path_get(user_path); |
| *backing_file_user_path(f) = *user_path; |
| error = vfs_tmpfile(real_idmap, real_parentpath, f, mode); |
| if (error) { |
| fput(f); |
| f = ERR_PTR(error); |
| } |
| return f; |
| } |
| EXPORT_SYMBOL(backing_tmpfile_open); |
| |
| struct backing_aio { |
| struct kiocb iocb; |
| refcount_t ref; |
| struct kiocb *orig_iocb; |
| /* used for aio completion */ |
| void (*end_write)(struct file *); |
| struct work_struct work; |
| long res; |
| }; |
| |
| static struct kmem_cache *backing_aio_cachep; |
| |
| #define BACKING_IOCB_MASK \ |
| (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) |
| |
| static rwf_t iocb_to_rw_flags(int flags) |
| { |
| return (__force rwf_t)(flags & BACKING_IOCB_MASK); |
| } |
| |
| static void backing_aio_put(struct backing_aio *aio) |
| { |
| if (refcount_dec_and_test(&aio->ref)) { |
| fput(aio->iocb.ki_filp); |
| kmem_cache_free(backing_aio_cachep, aio); |
| } |
| } |
| |
| static void backing_aio_cleanup(struct backing_aio *aio, long res) |
| { |
| struct kiocb *iocb = &aio->iocb; |
| struct kiocb *orig_iocb = aio->orig_iocb; |
| |
| if (aio->end_write) |
| aio->end_write(orig_iocb->ki_filp); |
| |
| orig_iocb->ki_pos = iocb->ki_pos; |
| backing_aio_put(aio); |
| } |
| |
| static void backing_aio_rw_complete(struct kiocb *iocb, long res) |
| { |
| struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); |
| struct kiocb *orig_iocb = aio->orig_iocb; |
| |
| if (iocb->ki_flags & IOCB_WRITE) |
| kiocb_end_write(iocb); |
| |
| backing_aio_cleanup(aio, res); |
| orig_iocb->ki_complete(orig_iocb, res); |
| } |
| |
| static void backing_aio_complete_work(struct work_struct *work) |
| { |
| struct backing_aio *aio = container_of(work, struct backing_aio, work); |
| |
| backing_aio_rw_complete(&aio->iocb, aio->res); |
| } |
| |
| static void backing_aio_queue_completion(struct kiocb *iocb, long res) |
| { |
| struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); |
| |
| /* |
| * Punt to a work queue to serialize updates of mtime/size. |
| */ |
| aio->res = res; |
| INIT_WORK(&aio->work, backing_aio_complete_work); |
| queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq, |
| &aio->work); |
| } |
| |
| static int backing_aio_init_wq(struct kiocb *iocb) |
| { |
| struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; |
| |
| if (sb->s_dio_done_wq) |
| return 0; |
| |
| return sb_init_dio_done_wq(sb); |
| } |
| |
| |
| ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, |
| struct kiocb *iocb, int flags, |
| struct backing_file_ctx *ctx) |
| { |
| struct backing_aio *aio = NULL; |
| const struct cred *old_cred = NULL; |
| ssize_t ret; |
| |
| if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) |
| return -EIO; |
| |
| if (!iov_iter_count(iter)) |
| return 0; |
| |
| if (iocb->ki_flags & IOCB_DIRECT && |
| !(file->f_mode & FMODE_CAN_ODIRECT)) |
| return -EINVAL; |
| |
| if (ctx->cred) |
| old_cred = override_creds(ctx->cred); |
| if (is_sync_kiocb(iocb)) { |
| rwf_t rwf = iocb_to_rw_flags(flags); |
| |
| ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf); |
| } else { |
| ret = -ENOMEM; |
| aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); |
| if (!aio) |
| goto out; |
| |
| aio->orig_iocb = iocb; |
| kiocb_clone(&aio->iocb, iocb, get_file(file)); |
| aio->iocb.ki_complete = backing_aio_rw_complete; |
| refcount_set(&aio->ref, 2); |
| ret = vfs_iocb_iter_read(file, &aio->iocb, iter); |
| backing_aio_put(aio); |
| if (ret != -EIOCBQUEUED) |
| backing_aio_cleanup(aio, ret); |
| } |
| out: |
| if (old_cred) |
| revert_creds(old_cred); |
| |
| if (ctx->accessed) |
| ctx->accessed(ctx->user_file); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_GPL(backing_file_read_iter); |
| |
| ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, |
| struct kiocb *iocb, int flags, |
| struct backing_file_ctx *ctx) |
| { |
| const struct cred *old_cred = NULL; |
| ssize_t ret; |
| |
| if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) |
| return -EIO; |
| |
| if (!iov_iter_count(iter)) |
| return 0; |
| |
| ret = file_remove_privs(ctx->user_file); |
| if (ret) |
| return ret; |
| |
| if (iocb->ki_flags & IOCB_DIRECT && |
| !(file->f_mode & FMODE_CAN_ODIRECT)) |
| return -EINVAL; |
| |
| /* |
| * Stacked filesystems don't support deferred completions, don't copy |
| * this property in case it is set by the issuer. |
| */ |
| flags &= ~IOCB_DIO_CALLER_COMP; |
| |
| if (ctx->cred) |
| old_cred = override_creds(ctx->cred); |
| if (is_sync_kiocb(iocb)) { |
| rwf_t rwf = iocb_to_rw_flags(flags); |
| |
| ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); |
| if (ctx->end_write) |
| ctx->end_write(ctx->user_file); |
| } else { |
| struct backing_aio *aio; |
| |
| ret = backing_aio_init_wq(iocb); |
| if (ret) |
| goto out; |
| |
| ret = -ENOMEM; |
| aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); |
| if (!aio) |
| goto out; |
| |
| aio->orig_iocb = iocb; |
| aio->end_write = ctx->end_write; |
| kiocb_clone(&aio->iocb, iocb, get_file(file)); |
| aio->iocb.ki_flags = flags; |
| aio->iocb.ki_complete = backing_aio_queue_completion; |
| refcount_set(&aio->ref, 2); |
| ret = vfs_iocb_iter_write(file, &aio->iocb, iter); |
| backing_aio_put(aio); |
| if (ret != -EIOCBQUEUED) |
| backing_aio_cleanup(aio, ret); |
| } |
| out: |
| if (old_cred) |
| revert_creds(old_cred); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_GPL(backing_file_write_iter); |
| |
| ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, |
| struct pipe_inode_info *pipe, size_t len, |
| unsigned int flags, |
| struct backing_file_ctx *ctx) |
| { |
| const struct cred *old_cred = NULL; |
| ssize_t ret; |
| |
| if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) |
| return -EIO; |
| |
| if (ctx->cred) |
| old_cred = override_creds(ctx->cred); |
| ret = vfs_splice_read(in, ppos, pipe, len, flags); |
| if (old_cred) |
| revert_creds(old_cred); |
| |
| if (ctx->accessed) |
| ctx->accessed(ctx->user_file); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_GPL(backing_file_splice_read); |
| |
| ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, |
| struct file *out, loff_t *ppos, size_t len, |
| unsigned int flags, |
| struct backing_file_ctx *ctx) |
| { |
| const struct cred *old_cred = NULL; |
| ssize_t ret; |
| |
| if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) |
| return -EIO; |
| |
| if (!out->f_op->splice_write) |
| return -EINVAL; |
| |
| ret = file_remove_privs(ctx->user_file); |
| if (ret) |
| return ret; |
| |
| if (ctx->cred) |
| old_cred = override_creds(ctx->cred); |
| file_start_write(out); |
| ret = out->f_op->splice_write(pipe, out, ppos, len, flags); |
| file_end_write(out); |
| if (old_cred) |
| revert_creds(old_cred); |
| |
| if (ctx->end_write) |
| ctx->end_write(ctx->user_file); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_GPL(backing_file_splice_write); |
| |
| int backing_file_mmap(struct file *file, struct vm_area_struct *vma, |
| struct backing_file_ctx *ctx) |
| { |
| const struct cred *old_cred = NULL; |
| int ret; |
| |
| if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) || |
| WARN_ON_ONCE(ctx->user_file != vma->vm_file)) |
| return -EIO; |
| |
| if (!file->f_op->mmap) |
| return -ENODEV; |
| |
| vma_set_file(vma, file); |
| |
| if (ctx->cred) |
| old_cred = override_creds(ctx->cred); |
| ret = call_mmap(vma->vm_file, vma); |
| if (old_cred) |
| revert_creds(old_cred); |
| |
| if (ctx->accessed) |
| ctx->accessed(ctx->user_file); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_GPL(backing_file_mmap); |
| |
| static int __init backing_aio_init(void) |
| { |
| backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN); |
| if (!backing_aio_cachep) |
| return -ENOMEM; |
| |
| return 0; |
| } |
| fs_initcall(backing_aio_init); |