fs: move kiocb_start_write() into vfs_iocb_iter_write()
In vfs code, sb_start_write() is usually called after the permission hook
in rw_verify_area(). vfs_iocb_iter_write() is an exception to this rule,
where kiocb_start_write() is called by its callers.
Move kiocb_start_write() from the callers into vfs_iocb_iter_write()
after the rw_verify_area() checks, to make them "start-write-safe".
The semantics of vfs_iocb_iter_write() is changed, so that the caller is
responsible for calling kiocb_end_write() on completion only if async
iocb was queued. The completion handlers of both callers were adapted
to this semantic change.
This is needed for fanotify "pre content" events.
Suggested-by: Jan Kara <jack@suse.cz>
Suggested-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Link: https://lore.kernel.org/r/20231122122715.2561213-14-amir73il@gmail.com
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 009d23c..5857241 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -259,7 +259,8 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
_enter("%ld", ret);
- kiocb_end_write(iocb);
+ if (ki->was_async)
+ kiocb_end_write(iocb);
if (ret < 0)
trace_cachefiles_io_error(object, inode, ret,
@@ -319,8 +320,6 @@ int __cachefiles_write(struct cachefiles_object *object,
ki->iocb.ki_complete = cachefiles_write_complete;
atomic_long_add(ki->b_writing, &cache->b_writing);
- kiocb_start_write(&ki->iocb);
-
get_file(ki->iocb.ki_filp);
cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 690b173..4e46420 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -295,10 +295,8 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
struct kiocb *iocb = &aio_req->iocb;
struct kiocb *orig_iocb = aio_req->orig_iocb;
- if (iocb->ki_flags & IOCB_WRITE) {
- kiocb_end_write(iocb);
+ if (iocb->ki_flags & IOCB_WRITE)
ovl_file_modified(orig_iocb->ki_filp);
- }
orig_iocb->ki_pos = iocb->ki_pos;
ovl_aio_put(aio_req);
@@ -310,6 +308,9 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
+ if (iocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(iocb);
+
ovl_aio_cleanup_handler(aio_req);
orig_iocb->ki_complete(orig_iocb, res);
}
@@ -456,7 +457,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_queue_completion;
refcount_set(&aio_req->ref, 2);
- kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
diff --git a/fs/read_write.c b/fs/read_write.c
index 9fce8d6..92c68ab 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -829,6 +829,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
}
EXPORT_SYMBOL(vfs_iter_read);
+/*
+ * Caller is responsible for calling kiocb_end_write() on completion
+ * if async iocb was queued.
+ */
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
struct iov_iter *iter)
{
@@ -849,7 +853,10 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
if (ret < 0)
return ret;
+ kiocb_start_write(iocb);
ret = call_write_iter(file, iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ kiocb_end_write(iocb);
if (ret > 0)
fsnotify_modify(file);