| // SPDX-License-Identifier: GPL-2.0-only |
| /* Network filesystem high-level write support. |
| * |
| * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| */ |
| |
| #include <linux/fs.h> |
| #include <linux/mm.h> |
| #include <linux/pagemap.h> |
| #include <linux/slab.h> |
| #include <linux/writeback.h> |
| #include <linux/pagevec.h> |
| #include "internal.h" |
| |
| /** |
| * netfs_create_write_request - Create a write operation. |
| * @wreq: The write request this is storing from. |
| * @dest: The destination type |
| * @start: Start of the region this write will modify |
| * @len: Length of the modification |
| * @worker: The worker function to handle the write(s) |
| * |
| * Allocate a write operation, set it up and add it to the list on a write |
| * request. |
| */ |
| struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq, |
| enum netfs_io_source dest, |
| loff_t start, size_t len, |
| work_func_t worker) |
| { |
| struct netfs_io_subrequest *subreq; |
| |
| subreq = netfs_alloc_subrequest(wreq); |
| if (subreq) { |
| INIT_WORK(&subreq->work, worker); |
| subreq->source = dest; |
| subreq->start = start; |
| subreq->len = len; |
| subreq->debug_index = wreq->subreq_counter++; |
| |
| switch (subreq->source) { |
| case NETFS_UPLOAD_TO_SERVER: |
| netfs_stat(&netfs_n_wh_upload); |
| break; |
| case NETFS_WRITE_TO_CACHE: |
| netfs_stat(&netfs_n_wh_write); |
| break; |
| default: |
| BUG(); |
| } |
| |
| subreq->io_iter = wreq->io_iter; |
| iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start); |
| iov_iter_truncate(&subreq->io_iter, subreq->len); |
| |
| trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, |
| refcount_read(&subreq->ref), |
| netfs_sreq_trace_new); |
| atomic_inc(&wreq->nr_outstanding); |
| list_add_tail(&subreq->rreq_link, &wreq->subrequests); |
| trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); |
| } |
| |
| return subreq; |
| } |
| EXPORT_SYMBOL(netfs_create_write_request); |
| |
| /* |
| * Process a completed write request once all the component operations have |
| * been completed. |
| */ |
| static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async) |
| { |
| struct netfs_io_subrequest *subreq; |
| struct netfs_inode *ctx = netfs_inode(wreq->inode); |
| size_t transferred = 0; |
| |
| _enter("R=%x[]", wreq->debug_id); |
| |
| trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); |
| |
| list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { |
| if (subreq->error || subreq->transferred == 0) |
| break; |
| transferred += subreq->transferred; |
| if (subreq->transferred < subreq->len) |
| break; |
| } |
| wreq->transferred = transferred; |
| |
| list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { |
| if (!subreq->error) |
| continue; |
| switch (subreq->source) { |
| case NETFS_UPLOAD_TO_SERVER: |
| /* Depending on the type of failure, this may prevent |
| * writeback completion unless we're in disconnected |
| * mode. |
| */ |
| if (!wreq->error) |
| wreq->error = subreq->error; |
| break; |
| |
| case NETFS_WRITE_TO_CACHE: |
| /* Failure doesn't prevent writeback completion unless |
| * we're in disconnected mode. |
| */ |
| if (subreq->error != -ENOBUFS) |
| ctx->ops->invalidate_cache(wreq); |
| break; |
| |
| default: |
| WARN_ON_ONCE(1); |
| if (!wreq->error) |
| wreq->error = -EIO; |
| return; |
| } |
| } |
| |
| wreq->cleanup(wreq); |
| |
| if (wreq->origin == NETFS_DIO_WRITE && |
| wreq->mapping->nrpages) { |
| pgoff_t first = wreq->start >> PAGE_SHIFT; |
| pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; |
| invalidate_inode_pages2_range(wreq->mapping, first, last); |
| } |
| |
| if (wreq->origin == NETFS_DIO_WRITE) |
| inode_dio_end(wreq->inode); |
| |
| _debug("finished"); |
| trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); |
| clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); |
| wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); |
| |
| if (wreq->iocb) { |
| wreq->iocb->ki_pos += transferred; |
| if (wreq->iocb->ki_complete) |
| wreq->iocb->ki_complete( |
| wreq->iocb, wreq->error ? wreq->error : transferred); |
| } |
| |
| netfs_clear_subrequests(wreq, was_async); |
| netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete); |
| } |
| |
| /* |
| * Deal with the completion of writing the data to the cache. |
| */ |
| void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, |
| bool was_async) |
| { |
| struct netfs_io_subrequest *subreq = _op; |
| struct netfs_io_request *wreq = subreq->rreq; |
| unsigned int u; |
| |
| _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); |
| |
| switch (subreq->source) { |
| case NETFS_UPLOAD_TO_SERVER: |
| netfs_stat(&netfs_n_wh_upload_done); |
| break; |
| case NETFS_WRITE_TO_CACHE: |
| netfs_stat(&netfs_n_wh_write_done); |
| break; |
| case NETFS_INVALID_WRITE: |
| break; |
| default: |
| BUG(); |
| } |
| |
| if (IS_ERR_VALUE(transferred_or_error)) { |
| subreq->error = transferred_or_error; |
| trace_netfs_failure(wreq, subreq, transferred_or_error, |
| netfs_fail_write); |
| goto failed; |
| } |
| |
| if (WARN(transferred_or_error > subreq->len - subreq->transferred, |
| "Subreq excess write: R%x[%x] %zd > %zu - %zu", |
| wreq->debug_id, subreq->debug_index, |
| transferred_or_error, subreq->len, subreq->transferred)) |
| transferred_or_error = subreq->len - subreq->transferred; |
| |
| subreq->error = 0; |
| subreq->transferred += transferred_or_error; |
| |
| if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) |
| pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n", |
| wreq->debug_id, subreq->debug_index, |
| iov_iter_count(&subreq->io_iter), subreq->len, |
| subreq->transferred, subreq->io_iter.iter_type); |
| |
| if (subreq->transferred < subreq->len) |
| goto incomplete; |
| |
| __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); |
| out: |
| trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); |
| |
| /* If we decrement nr_outstanding to 0, the ref belongs to us. */ |
| u = atomic_dec_return(&wreq->nr_outstanding); |
| if (u == 0) |
| netfs_write_terminated(wreq, was_async); |
| else if (u == 1) |
| wake_up_var(&wreq->nr_outstanding); |
| |
| netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); |
| return; |
| |
| incomplete: |
| if (transferred_or_error == 0) { |
| if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { |
| subreq->error = -ENODATA; |
| goto failed; |
| } |
| } else { |
| __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); |
| } |
| |
| __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); |
| set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); |
| goto out; |
| |
| failed: |
| switch (subreq->source) { |
| case NETFS_WRITE_TO_CACHE: |
| netfs_stat(&netfs_n_wh_write_failed); |
| set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags); |
| break; |
| case NETFS_UPLOAD_TO_SERVER: |
| netfs_stat(&netfs_n_wh_upload_failed); |
| set_bit(NETFS_RREQ_FAILED, &wreq->flags); |
| wreq->error = subreq->error; |
| break; |
| default: |
| break; |
| } |
| goto out; |
| } |
| EXPORT_SYMBOL(netfs_write_subrequest_terminated); |
| |
| static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq) |
| { |
| struct netfs_io_request *wreq = subreq->rreq; |
| struct netfs_cache_resources *cres = &wreq->cache_resources; |
| |
| trace_netfs_sreq(subreq, netfs_sreq_trace_submit); |
| |
| cres->ops->write(cres, subreq->start, &subreq->io_iter, |
| netfs_write_subrequest_terminated, subreq); |
| } |
| |
| static void netfs_write_to_cache_op_worker(struct work_struct *work) |
| { |
| struct netfs_io_subrequest *subreq = |
| container_of(work, struct netfs_io_subrequest, work); |
| |
| netfs_write_to_cache_op(subreq); |
| } |
| |
| /** |
| * netfs_queue_write_request - Queue a write request for attention |
| * @subreq: The write request to be queued |
| * |
| * Queue the specified write request for processing by a worker thread. We |
| * pass the caller's ref on the request to the worker thread. |
| */ |
| void netfs_queue_write_request(struct netfs_io_subrequest *subreq) |
| { |
| if (!queue_work(system_unbound_wq, &subreq->work)) |
| netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip); |
| } |
| EXPORT_SYMBOL(netfs_queue_write_request); |
| |
| /* |
| * Set up a op for writing to the cache. |
| */ |
| static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq) |
| { |
| struct netfs_cache_resources *cres = &wreq->cache_resources; |
| struct netfs_io_subrequest *subreq; |
| struct netfs_inode *ctx = netfs_inode(wreq->inode); |
| struct fscache_cookie *cookie = netfs_i_cookie(ctx); |
| loff_t start = wreq->start; |
| size_t len = wreq->len; |
| int ret; |
| |
| if (!fscache_cookie_enabled(cookie)) { |
| clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags); |
| return; |
| } |
| |
| _debug("write to cache"); |
| ret = fscache_begin_write_operation(cres, cookie); |
| if (ret < 0) |
| return; |
| |
| ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len, |
| i_size_read(wreq->inode), true); |
| if (ret < 0) |
| return; |
| |
| subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len, |
| netfs_write_to_cache_op_worker); |
| if (!subreq) |
| return; |
| |
| netfs_write_to_cache_op(subreq); |
| } |
| |
| /* |
| * Begin the process of writing out a chunk of data. |
| * |
| * We are given a write request that holds a series of dirty regions and |
| * (partially) covers a sequence of folios, all of which are present. The |
| * pages must have been marked as writeback as appropriate. |
| * |
| * We need to perform the following steps: |
| * |
| * (1) If encrypting, create an output buffer and encrypt each block of the |
| * data into it, otherwise the output buffer will point to the original |
| * folios. |
| * |
| * (2) If the data is to be cached, set up a write op for the entire output |
| * buffer to the cache, if the cache wants to accept it. |
| * |
| * (3) If the data is to be uploaded (ie. not merely cached): |
| * |
| * (a) If the data is to be compressed, create a compression buffer and |
| * compress the data into it. |
| * |
| * (b) For each destination we want to upload to, set up write ops to write |
| * to that destination. We may need multiple writes if the data is not |
| * contiguous or the span exceeds wsize for a server. |
| */ |
| int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, |
| enum netfs_write_trace what) |
| { |
| struct netfs_inode *ctx = netfs_inode(wreq->inode); |
| |
| _enter("R=%x %llx-%llx f=%lx", |
| wreq->debug_id, wreq->start, wreq->start + wreq->len - 1, |
| wreq->flags); |
| |
| trace_netfs_write(wreq, what); |
| if (wreq->len == 0 || wreq->iter.count == 0) { |
| pr_err("Zero-sized write [R=%x]\n", wreq->debug_id); |
| return -EIO; |
| } |
| |
| if (wreq->origin == NETFS_DIO_WRITE) |
| inode_dio_begin(wreq->inode); |
| |
| wreq->io_iter = wreq->iter; |
| |
| /* ->outstanding > 0 carries a ref */ |
| netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); |
| atomic_set(&wreq->nr_outstanding, 1); |
| |
| /* Start the encryption/compression going. We can do that in the |
| * background whilst we generate a list of write ops that we want to |
| * perform. |
| */ |
| // TODO: Encrypt or compress the region as appropriate |
| |
| /* We need to write all of the region to the cache */ |
| if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) |
| netfs_set_up_write_to_cache(wreq); |
| |
| /* However, we don't necessarily write all of the region to the server. |
| * Caching of reads is being managed this way also. |
| */ |
| if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) |
| ctx->ops->create_write_requests(wreq, wreq->start, wreq->len); |
| |
| if (atomic_dec_and_test(&wreq->nr_outstanding)) |
| netfs_write_terminated(wreq, false); |
| |
| if (!may_wait) |
| return -EIOCBQUEUED; |
| |
| wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, |
| TASK_UNINTERRUPTIBLE); |
| return wreq->error; |
| } |
| |
| /* |
| * Begin a write operation for writing through the pagecache. |
| */ |
| struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) |
| { |
| struct netfs_io_request *wreq; |
| struct file *file = iocb->ki_filp; |
| |
| wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len, |
| NETFS_WRITETHROUGH); |
| if (IS_ERR(wreq)) |
| return wreq; |
| |
| trace_netfs_write(wreq, netfs_write_trace_writethrough); |
| |
| __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); |
| iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0); |
| wreq->io_iter = wreq->iter; |
| |
| /* ->outstanding > 0 carries a ref */ |
| netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding); |
| atomic_set(&wreq->nr_outstanding, 1); |
| return wreq; |
| } |
| |
| static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final) |
| { |
| struct netfs_inode *ictx = netfs_inode(wreq->inode); |
| unsigned long long start; |
| size_t len; |
| |
| if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) |
| return; |
| |
| start = wreq->start + wreq->submitted; |
| len = wreq->iter.count - wreq->submitted; |
| if (!final) { |
| len /= wreq->wsize; /* Round to number of maximum packets */ |
| len *= wreq->wsize; |
| } |
| |
| ictx->ops->create_write_requests(wreq, start, len); |
| wreq->submitted += len; |
| } |
| |
| /* |
| * Advance the state of the write operation used when writing through the |
| * pagecache. Data has been copied into the pagecache that we need to append |
| * to the request. If we've added more than wsize then we need to create a new |
| * subrequest. |
| */ |
| int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end) |
| { |
| _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u", |
| wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end); |
| |
| wreq->iter.count += copied; |
| wreq->io_iter.count += copied; |
| if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize) |
| netfs_submit_writethrough(wreq, false); |
| |
| return wreq->error; |
| } |
| |
| /* |
| * End a write operation used when writing through the pagecache. |
| */ |
| int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb) |
| { |
| int ret = -EIOCBQUEUED; |
| |
| _enter("ic=%zu sb=%zu ws=%u", |
| wreq->iter.count, wreq->submitted, wreq->wsize); |
| |
| if (wreq->submitted < wreq->io_iter.count) |
| netfs_submit_writethrough(wreq, true); |
| |
| if (atomic_dec_and_test(&wreq->nr_outstanding)) |
| netfs_write_terminated(wreq, false); |
| |
| if (is_sync_kiocb(iocb)) { |
| wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, |
| TASK_UNINTERRUPTIBLE); |
| ret = wreq->error; |
| } |
| |
| netfs_put_request(wreq, false, netfs_rreq_trace_put_return); |
| return ret; |
| } |