io_uring: add fsync support
Add a new fsync opcode, which either syncs a range if one is passed,
or the whole file if the offset and length fields are both cleared
to zero. A flag is provided to use fdatasync semantics, that is only
force out metadata which is required to retrieve the file data, but
not others like metadata.
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f680522..745413d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -24,6 +24,7 @@
* data that the application could potentially modify, it remains stable.
*
* Copyright (C) 2018-2019 Jens Axboe
+ * Copyright (c) 2018-2019 Christoph Hellwig
*/
#include <linux/kernel.h>
#include <linux/init.h>
@@ -557,6 +558,56 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
return 0;
}
+static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ int fd;
+
+ /* Prep already done */
+ if (req->rw.ki_filp)
+ return 0;
+
+ if (unlikely(sqe->addr || sqe->ioprio))
+ return -EINVAL;
+
+ fd = READ_ONCE(sqe->fd);
+ req->rw.ki_filp = fget(fd);
+ if (unlikely(!req->rw.ki_filp))
+ return -EBADF;
+
+ return 0;
+}
+
+static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
+{
+ loff_t sqe_off = READ_ONCE(sqe->off);
+ loff_t sqe_len = READ_ONCE(sqe->len);
+ loff_t end = sqe_off + sqe_len;
+ unsigned fsync_flags;
+ int ret;
+
+ fsync_flags = READ_ONCE(sqe->fsync_flags);
+ if (unlikely(fsync_flags & ~IORING_FSYNC_DATASYNC))
+ return -EINVAL;
+
+ ret = io_prep_fsync(req, sqe);
+ if (ret)
+ return ret;
+
+ /* fsync always requires a blocking context */
+ if (force_nonblock)
+ return -EAGAIN;
+
+ ret = vfs_fsync_range(req->rw.ki_filp, sqe_off,
+ end > 0 ? end : LLONG_MAX,
+ fsync_flags & IORING_FSYNC_DATASYNC);
+
+ fput(req->rw.ki_filp);
+ io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+ io_free_req(req);
+ return 0;
+}
+
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct sqe_submit *s, bool force_nonblock)
{
@@ -578,6 +629,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_WRITEV:
ret = io_write(req, s, force_nonblock);
break;
+ case IORING_OP_FSYNC:
+ ret = io_fsync(req, s->sqe, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ac692823..4589d56 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -24,7 +24,7 @@ struct io_uring_sqe {
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
- __u32 __resv;
+ __u32 fsync_flags;
};
__u64 user_data; /* data to be passed back at completion time */
__u64 __pad2[3];
@@ -33,6 +33,12 @@ struct io_uring_sqe {
#define IORING_OP_NOP 0
#define IORING_OP_READV 1
#define IORING_OP_WRITEV 2
+#define IORING_OP_FSYNC 3
+
+/*
+ * sqe->fsync_flags
+ */
+#define IORING_FSYNC_DATASYNC (1U << 0)
/*
* IO completion data structure (Completion Queue Entry)