io_uring: split out open/close operations
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 5efc4fe..e60def3 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -3,5 +3,6 @@
# Makefile for io_uring
obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
- sync.o advise.o filetable.o
+ sync.o advise.o filetable.o \
+ openclose.o
obj-$(CONFIG_IO_WQ) += io-wq.o
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4b4d6fd..a79186b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -98,6 +98,7 @@
#include "splice.h"
#include "sync.h"
#include "advise.h"
+#include "openclose.h"
#define IORING_MAX_ENTRIES 32768
#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
@@ -283,12 +284,6 @@ struct io_poll_update {
bool update_user_data;
};
-struct io_close {
- struct file *file;
- int fd;
- u32 file_slot;
-};
-
struct io_timeout_data {
struct io_kiocb *req;
struct hrtimer timer;
@@ -371,15 +366,6 @@ struct io_sr_msg {
unsigned int flags;
};
-struct io_open {
- struct file *file;
- int dfd;
- u32 file_slot;
- struct filename *filename;
- struct open_how how;
- unsigned long nofile;
-};
-
struct io_rsrc_update {
struct file *file;
u64 arg;
@@ -555,9 +541,6 @@ static int io_req_prep_async(struct io_kiocb *req);
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index);
-static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
- unsigned int offset);
-static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static void io_eventfd_signal(struct io_ring_ctx *ctx);
@@ -670,10 +653,15 @@ const char *io_uring_get_opcode(u8 opcode)
return "INVALID";
}
+bool io_is_uring_fops(struct file *file)
+{
+ return file->f_op == &io_uring_fops;
+}
+
struct sock *io_uring_get_socket(struct file *file)
{
#if defined(CONFIG_UNIX)
- if (file->f_op == &io_uring_fops) {
+ if (io_is_uring_fops(file)) {
struct io_ring_ctx *ctx = file->private_data;
return ctx->ring_sock->sk;
@@ -699,26 +687,6 @@ static inline bool io_file_need_scm(struct file *filp)
}
#endif
-static void io_ring_submit_unlock(struct io_ring_ctx *ctx, unsigned issue_flags)
-{
- lockdep_assert_held(&ctx->uring_lock);
- if (issue_flags & IO_URING_F_UNLOCKED)
- mutex_unlock(&ctx->uring_lock);
-}
-
-static void io_ring_submit_lock(struct io_ring_ctx *ctx, unsigned issue_flags)
-{
- /*
- * "Normal" inline submissions always hold the uring_lock, since we
- * grab it from the system call. Same is true for the SQPOLL offload.
- * The only exception is when we've detached the request and issue it
- * from an async worker thread, grab the lock for that case.
- */
- if (issue_flags & IO_URING_F_UNLOCKED)
- mutex_lock(&ctx->uring_lock);
- lockdep_assert_held(&ctx->uring_lock);
-}
-
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
{
if (!*locked) {
@@ -3899,74 +3867,12 @@ static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
-static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_open *open = io_kiocb_to_cmd(req);
- const char __user *fname;
- int ret;
-
- if (unlikely(sqe->buf_index))
- return -EINVAL;
- if (unlikely(req->flags & REQ_F_FIXED_FILE))
- return -EBADF;
-
- /* open.how should be already initialised */
- if (!(open->how.flags & O_PATH) && force_o_largefile())
- open->how.flags |= O_LARGEFILE;
-
- open->dfd = READ_ONCE(sqe->fd);
- fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
- open->filename = getname(fname);
- if (IS_ERR(open->filename)) {
- ret = PTR_ERR(open->filename);
- open->filename = NULL;
- return ret;
- }
-
- open->file_slot = READ_ONCE(sqe->file_index);
- if (open->file_slot && (open->how.flags & O_CLOEXEC))
- return -EINVAL;
-
- open->nofile = rlimit(RLIMIT_NOFILE);
- req->flags |= REQ_F_NEED_CLEANUP;
- return 0;
-}
-
-static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_open *open = io_kiocb_to_cmd(req);
- u64 mode = READ_ONCE(sqe->len);
- u64 flags = READ_ONCE(sqe->open_flags);
-
- open->how = build_open_how(flags, mode);
- return __io_openat_prep(req, sqe);
-}
-
-static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_open *open = io_kiocb_to_cmd(req);
- struct open_how __user *how;
- size_t len;
- int ret;
-
- how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
- len = READ_ONCE(sqe->len);
- if (len < OPEN_HOW_SIZE_VER0)
- return -EINVAL;
-
- ret = copy_struct_from_user(&open->how, sizeof(open->how), how, len);
- if (ret)
- return ret;
-
- return __io_openat_prep(req, sqe);
-}
-
/*
* Note when io_fixed_fd_install() returns error value, it will ensure
* fput() is called correspondingly.
*/
-static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
- struct file *file, unsigned int file_slot)
+int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
+ struct file *file, unsigned int file_slot)
{
bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC;
struct io_ring_ctx *ctx = req->ctx;
@@ -3993,86 +3899,6 @@ static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
return ret;
}
-static int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_open *open = io_kiocb_to_cmd(req);
- struct open_flags op;
- struct file *file;
- bool resolve_nonblock, nonblock_set;
- bool fixed = !!open->file_slot;
- int ret;
-
- ret = build_open_flags(&open->how, &op);
- if (ret)
- goto err;
- nonblock_set = op.open_flag & O_NONBLOCK;
- resolve_nonblock = open->how.resolve & RESOLVE_CACHED;
- if (issue_flags & IO_URING_F_NONBLOCK) {
- /*
- * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
- * it'll always -EAGAIN
- */
- if (open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE))
- return -EAGAIN;
- op.lookup_flags |= LOOKUP_CACHED;
- op.open_flag |= O_NONBLOCK;
- }
-
- if (!fixed) {
- ret = __get_unused_fd_flags(open->how.flags, open->nofile);
- if (ret < 0)
- goto err;
- }
-
- file = do_filp_open(open->dfd, open->filename, &op);
- if (IS_ERR(file)) {
- /*
- * We could hang on to this 'fd' on retrying, but seems like
- * marginal gain for something that is now known to be a slower
- * path. So just put it, and we'll get a new one when we retry.
- */
- if (!fixed)
- put_unused_fd(ret);
-
- ret = PTR_ERR(file);
- /* only retry if RESOLVE_CACHED wasn't already set by application */
- if (ret == -EAGAIN &&
- (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK)))
- return -EAGAIN;
- goto err;
- }
-
- if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
- file->f_flags &= ~O_NONBLOCK;
- fsnotify_open(file);
-
- if (!fixed)
- fd_install(ret, file);
- else
- ret = io_fixed_fd_install(req, issue_flags, file,
- open->file_slot);
-err:
- putname(open->filename);
- req->flags &= ~REQ_F_NEED_CLEANUP;
- if (ret < 0)
- req_set_fail(req);
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-
-static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
-{
- return io_openat2(req, issue_flags);
-}
-
-static void io_open_cleanup(struct io_kiocb *req)
-{
- struct io_open *open = io_kiocb_to_cmd(req);
-
- if (open->filename)
- putname(open->filename);
-}
-
static int io_remove_buffers_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -4424,69 +4250,6 @@ static void io_statx_cleanup(struct io_kiocb *req)
putname(sx->filename);
}
-static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_close *close = io_kiocb_to_cmd(req);
-
- if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
- return -EINVAL;
- if (req->flags & REQ_F_FIXED_FILE)
- return -EBADF;
-
- close->fd = READ_ONCE(sqe->fd);
- close->file_slot = READ_ONCE(sqe->file_index);
- if (close->file_slot && close->fd)
- return -EINVAL;
-
- return 0;
-}
-
-static int io_close(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct files_struct *files = current->files;
- struct io_close *close = io_kiocb_to_cmd(req);
- struct fdtable *fdt;
- struct file *file;
- int ret = -EBADF;
-
- if (close->file_slot) {
- ret = io_close_fixed(req, issue_flags);
- goto err;
- }
-
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- if (close->fd >= fdt->max_fds) {
- spin_unlock(&files->file_lock);
- goto err;
- }
- file = rcu_dereference_protected(fdt->fd[close->fd],
- lockdep_is_held(&files->file_lock));
- if (!file || file->f_op == &io_uring_fops) {
- spin_unlock(&files->file_lock);
- goto err;
- }
-
- /* if the file has a flush method, be safe and punt to async */
- if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) {
- spin_unlock(&files->file_lock);
- return -EAGAIN;
- }
-
- file = __close_fd_get_file(close->fd);
- spin_unlock(&files->file_lock);
- if (!file)
- goto err;
-
- /* No ->flush() or already async, safely close from here */
- ret = filp_close(file, current->files);
-err:
- if (ret < 0)
- req_set_fail(req);
- io_req_set_res(req, ret, 0);
- return IOU_OK;
-}
-
#if defined(CONFIG_NET)
static int io_shutdown_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
@@ -7744,8 +7507,8 @@ static struct io_rsrc_node *io_rsrc_node_alloc(void)
return ref_node;
}
-static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
- struct io_rsrc_data *data_to_kill)
+void io_rsrc_node_switch(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *data_to_kill)
__must_hold(&ctx->uring_lock)
{
WARN_ON_ONCE(!ctx->rsrc_backup_node);
@@ -7772,7 +7535,7 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx,
}
}
-static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
+int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
@@ -8319,8 +8082,8 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
-static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
- struct io_rsrc_node *node, void *rsrc)
+int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
+ struct io_rsrc_node *node, void *rsrc)
{
u64 *tag_slot = io_get_tag_slot(data, idx);
struct io_rsrc_put *prsrc;
@@ -8386,52 +8149,6 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
return ret;
}
-static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
- unsigned int offset)
-{
- struct io_ring_ctx *ctx = req->ctx;
- struct io_fixed_file *file_slot;
- struct file *file;
- int ret;
-
- io_ring_submit_lock(ctx, issue_flags);
- ret = -ENXIO;
- if (unlikely(!ctx->file_data))
- goto out;
- ret = -EINVAL;
- if (offset >= ctx->nr_user_files)
- goto out;
- ret = io_rsrc_node_switch_start(ctx);
- if (ret)
- goto out;
-
- offset = array_index_nospec(offset, ctx->nr_user_files);
- file_slot = io_fixed_file_slot(&ctx->file_table, offset);
- ret = -EBADF;
- if (!file_slot->file_ptr)
- goto out;
-
- file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
- if (ret)
- goto out;
-
- file_slot->file_ptr = 0;
- io_file_bitmap_clear(&ctx->file_table, offset);
- io_rsrc_node_switch(ctx, ctx->file_data);
- ret = 0;
-out:
- io_ring_submit_unlock(ctx, issue_flags);
- return ret;
-}
-
-static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
-{
- struct io_close *close = io_kiocb_to_cmd(req);
-
- return __io_close_fixed(req, issue_flags, close->file_slot - 1);
-}
-
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up,
unsigned nr_args)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 02c0012..ebb225e 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -2,6 +2,7 @@
#define IOU_CORE_H
#include <linux/errno.h>
+#include <linux/lockdep.h>
#include "io_uring_types.h"
enum {
@@ -30,8 +31,39 @@ static inline void io_put_file(struct file *file)
fput(file);
}
+static inline void io_ring_submit_unlock(struct io_ring_ctx *ctx,
+ unsigned issue_flags)
+{
+ lockdep_assert_held(&ctx->uring_lock);
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ mutex_unlock(&ctx->uring_lock);
+}
+
+static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
+ unsigned issue_flags)
+{
+ /*
+ * "Normal" inline submissions always hold the uring_lock, since we
+ * grab it from the system call. Same is true for the SQPOLL offload.
+ * The only exception is when we've detached the request and issue it
+ * from an async worker thread, grab the lock for that case.
+ */
+ if (issue_flags & IO_URING_F_UNLOCKED)
+ mutex_lock(&ctx->uring_lock);
+ lockdep_assert_held(&ctx->uring_lock);
+}
+
struct file *io_file_get_normal(struct io_kiocb *req, int fd);
struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
+int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
+ struct file *file, unsigned int file_slot);
+
+int io_rsrc_node_switch_start(struct io_ring_ctx *ctx);
+int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
+ struct io_rsrc_node *node, void *rsrc);
+void io_rsrc_node_switch(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *data_to_kill);
+bool io_is_uring_fops(struct file *file);
#endif
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
new file mode 100644
index 0000000..fa35bd5
--- /dev/null
+++ b/io_uring/openclose.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fsnotify.h>
+#include <linux/namei.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "../fs/internal.h"
+
+#include "io_uring_types.h"
+#include "io_uring.h"
+#include "openclose.h"
+
+struct io_open {
+ struct file *file;
+ int dfd;
+ u32 file_slot;
+ struct filename *filename;
+ struct open_how how;
+ unsigned long nofile;
+};
+
+struct io_close {
+ struct file *file;
+ int fd;
+ u32 file_slot;
+};
+
+static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_open *open = io_kiocb_to_cmd(req);
+ const char __user *fname;
+ int ret;
+
+ if (unlikely(sqe->buf_index))
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
+ return -EBADF;
+
+ /* open.how should be already initialised */
+ if (!(open->how.flags & O_PATH) && force_o_largefile())
+ open->how.flags |= O_LARGEFILE;
+
+ open->dfd = READ_ONCE(sqe->fd);
+ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ open->filename = getname(fname);
+ if (IS_ERR(open->filename)) {
+ ret = PTR_ERR(open->filename);
+ open->filename = NULL;
+ return ret;
+ }
+
+ open->file_slot = READ_ONCE(sqe->file_index);
+ if (open->file_slot && (open->how.flags & O_CLOEXEC))
+ return -EINVAL;
+
+ open->nofile = rlimit(RLIMIT_NOFILE);
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return 0;
+}
+
+int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_open *open = io_kiocb_to_cmd(req);
+ u64 mode = READ_ONCE(sqe->len);
+ u64 flags = READ_ONCE(sqe->open_flags);
+
+ open->how = build_open_how(flags, mode);
+ return __io_openat_prep(req, sqe);
+}
+
+int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_open *open = io_kiocb_to_cmd(req);
+ struct open_how __user *how;
+ size_t len;
+ int ret;
+
+ how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ len = READ_ONCE(sqe->len);
+ if (len < OPEN_HOW_SIZE_VER0)
+ return -EINVAL;
+
+ ret = copy_struct_from_user(&open->how, sizeof(open->how), how, len);
+ if (ret)
+ return ret;
+
+ return __io_openat_prep(req, sqe);
+}
+
+int io_openat2(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_open *open = io_kiocb_to_cmd(req);
+ struct open_flags op;
+ struct file *file;
+ bool resolve_nonblock, nonblock_set;
+ bool fixed = !!open->file_slot;
+ int ret;
+
+ ret = build_open_flags(&open->how, &op);
+ if (ret)
+ goto err;
+ nonblock_set = op.open_flag & O_NONBLOCK;
+ resolve_nonblock = open->how.resolve & RESOLVE_CACHED;
+ if (issue_flags & IO_URING_F_NONBLOCK) {
+ /*
+ * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
+ * it'll always -EAGAIN
+ */
+ if (open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE))
+ return -EAGAIN;
+ op.lookup_flags |= LOOKUP_CACHED;
+ op.open_flag |= O_NONBLOCK;
+ }
+
+ if (!fixed) {
+ ret = __get_unused_fd_flags(open->how.flags, open->nofile);
+ if (ret < 0)
+ goto err;
+ }
+
+ file = do_filp_open(open->dfd, open->filename, &op);
+ if (IS_ERR(file)) {
+ /*
+ * We could hang on to this 'fd' on retrying, but seems like
+ * marginal gain for something that is now known to be a slower
+ * path. So just put it, and we'll get a new one when we retry.
+ */
+ if (!fixed)
+ put_unused_fd(ret);
+
+ ret = PTR_ERR(file);
+ /* only retry if RESOLVE_CACHED wasn't already set by application */
+ if (ret == -EAGAIN &&
+ (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK)))
+ return -EAGAIN;
+ goto err;
+ }
+
+ if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set)
+ file->f_flags &= ~O_NONBLOCK;
+ fsnotify_open(file);
+
+ if (!fixed)
+ fd_install(ret, file);
+ else
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ open->file_slot);
+err:
+ putname(open->filename);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
+
+int io_openat(struct io_kiocb *req, unsigned int issue_flags)
+{
+ return io_openat2(req, issue_flags);
+}
+
+void io_open_cleanup(struct io_kiocb *req)
+{
+ struct io_open *open = io_kiocb_to_cmd(req);
+
+ if (open->filename)
+ putname(open->filename);
+}
+
+int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+ unsigned int offset)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_fixed_file *file_slot;
+ struct file *file;
+ int ret;
+
+ io_ring_submit_lock(ctx, issue_flags);
+ ret = -ENXIO;
+ if (unlikely(!ctx->file_data))
+ goto out;
+ ret = -EINVAL;
+ if (offset >= ctx->nr_user_files)
+ goto out;
+ ret = io_rsrc_node_switch_start(ctx);
+ if (ret)
+ goto out;
+
+ offset = array_index_nospec(offset, ctx->nr_user_files);
+ file_slot = io_fixed_file_slot(&ctx->file_table, offset);
+ ret = -EBADF;
+ if (!file_slot->file_ptr)
+ goto out;
+
+ file = (struct file *)(file_slot->file_ptr & FFS_MASK);
+ ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file);
+ if (ret)
+ goto out;
+
+ file_slot->file_ptr = 0;
+ io_file_bitmap_clear(&ctx->file_table, offset);
+ io_rsrc_node_switch(ctx, ctx->file_data);
+ ret = 0;
+out:
+ io_ring_submit_unlock(ctx, issue_flags);
+ return ret;
+}
+
+static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_close *close = io_kiocb_to_cmd(req);
+
+ return __io_close_fixed(req, issue_flags, close->file_slot - 1);
+}
+
+int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_close *close = io_kiocb_to_cmd(req);
+
+ if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
+ return -EINVAL;
+ if (req->flags & REQ_F_FIXED_FILE)
+ return -EBADF;
+
+ close->fd = READ_ONCE(sqe->fd);
+ close->file_slot = READ_ONCE(sqe->file_index);
+ if (close->file_slot && close->fd)
+ return -EINVAL;
+
+ return 0;
+}
+
+int io_close(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct files_struct *files = current->files;
+ struct io_close *close = io_kiocb_to_cmd(req);
+ struct fdtable *fdt;
+ struct file *file;
+ int ret = -EBADF;
+
+ if (close->file_slot) {
+ ret = io_close_fixed(req, issue_flags);
+ goto err;
+ }
+
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ if (close->fd >= fdt->max_fds) {
+ spin_unlock(&files->file_lock);
+ goto err;
+ }
+ file = rcu_dereference_protected(fdt->fd[close->fd],
+ lockdep_is_held(&files->file_lock));
+ if (!file || io_is_uring_fops(file)) {
+ spin_unlock(&files->file_lock);
+ goto err;
+ }
+
+ /* if the file has a flush method, be safe and punt to async */
+ if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) {
+ spin_unlock(&files->file_lock);
+ return -EAGAIN;
+ }
+
+ file = __close_fd_get_file(close->fd);
+ spin_unlock(&files->file_lock);
+ if (!file)
+ goto err;
+
+ /* No ->flush() or already async, safely close from here */
+ ret = filp_close(file, current->files);
+err:
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
diff --git a/io_uring/openclose.h b/io_uring/openclose.h
new file mode 100644
index 0000000..9f578f3
--- /dev/null
+++ b/io_uring/openclose.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+ unsigned int offset);
+
+int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_openat(struct io_kiocb *req, unsigned int issue_flags);
+void io_open_cleanup(struct io_kiocb *req);
+
+int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_openat2(struct io_kiocb *req, unsigned int issue_flags);
+
+int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_close(struct io_kiocb *req, unsigned int issue_flags);