Blame - fs/netfs/buffered_read.c - linux

blob: 0ce5358521510694f21b9dd1133081e182593be7 [file] [log] [blame]

David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-or-later
				2	/* Network filesystem high-level buffered read support.
				3	*
				4	* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
				5	* Written by David Howells (dhowells@redhat.com)
				6	*/
				7
				8	#include <linux/export.h>
				9	#include <linux/task_io_accounting_ops.h>
				10	#include "internal.h"
				11
				12	/*
				13	* Unlock the folios in a read operation. We need to set PG_fscache on any
				14	* folios we're going to write back before we unlock them.
				15	*/
				16	void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
				17	{
				18	struct netfs_io_subrequest *subreq;
				19	struct folio *folio;
				20	unsigned int iopos, account = 0;
				21	pgoff_t start_page = rreq->start / PAGE_SIZE;
				22	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
				23	bool subreq_failed = false;
				24
				25	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
				26
				27	if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
				28	__clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
				29	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
				30	__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
				31	}
				32	}
				33
				34	/* Walk through the pagecache and the I/O request lists simultaneously.
				35	* We may have a mixture of cached and uncached sections and we only
				36	* really want to write out the uncached sections. This is slightly
				37	* complicated by the possibility that we might have huge pages with a
				38	* mixture inside.
				39	*/
				40	subreq = list_first_entry(&rreq->subrequests,
				41	struct netfs_io_subrequest, rreq_link);
				42	iopos = 0;
				43	subreq_failed = (subreq->error < 0);
				44
				45	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
				46
				47	rcu_read_lock();
				48	xas_for_each(&xas, folio, last_page) {
				49	unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
				50	unsigned int pgend = pgpos + folio_size(folio);
				51	bool pg_failed = false;
				52
				53	for (;;) {
				54	if (!subreq) {
				55	pg_failed = true;
				56	break;
				57	}
				58	if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
				59	folio_start_fscache(folio);
				60	pg_failed \|= subreq_failed;
				61	if (pgend < iopos + subreq->len)
				62	break;
				63
				64	account += subreq->transferred;
				65	iopos += subreq->len;
				66	if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
				67	subreq = list_next_entry(subreq, rreq_link);
				68	subreq_failed = (subreq->error < 0);
				69	} else {
				70	subreq = NULL;
				71	subreq_failed = false;
				72	}
				73	if (pgend == iopos)
				74	break;
				75	}
				76
				77	if (!pg_failed) {
				78	flush_dcache_folio(folio);
				79	folio_mark_uptodate(folio);
				80	}
				81
				82	if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
				83	if (folio_index(folio) == rreq->no_unlock_folio &&
				84	test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
				85	_debug("no unlock");
				86	else
				87	folio_unlock(folio);
				88	}
				89	}
				90	rcu_read_unlock();
				91
				92	task_io_account_read(account);
				93	if (rreq->netfs_ops->done)
				94	rreq->netfs_ops->done(rreq);
				95	}
				96
				97	static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
				98	loff_t _start, size_t _len, loff_t i_size)
				99	{
				100	struct netfs_cache_resources *cres = &rreq->cache_resources;
				101
				102	if (cres->ops && cres->ops->expand_readahead)
				103	cres->ops->expand_readahead(cres, _start, _len, i_size);
				104	}
				105
				106	static void netfs_rreq_expand(struct netfs_io_request *rreq,
				107	struct readahead_control *ractl)
				108	{
				109	/* Give the cache a chance to change the request parameters. The
				110	* resultant request must contain the original region.
				111	*/
				112	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
				113
				114	/* Give the netfs a chance to change the request parameters. The
				115	* resultant request must contain the original region.
				116	*/
				117	if (rreq->netfs_ops->expand_readahead)
				118	rreq->netfs_ops->expand_readahead(rreq);
				119
				120	/* Expand the request if the cache wants it to start earlier. Note
				121	* that the expansion may get further extended if the VM wishes to
				122	* insert THPs and the preferred start and/or end wind up in the middle
				123	* of THPs.
				124	*
				125	* If this is the case, however, the THP size should be an integer
				126	* multiple of the cache granule size, so we get a whole number of
				127	* granules to deal with.
				128	*/
				129	if (rreq->start != readahead_pos(ractl) \|\|
				130	rreq->len != readahead_length(ractl)) {
				131	readahead_expand(ractl, rreq->start, rreq->len);
				132	rreq->start = readahead_pos(ractl);
				133	rreq->len = readahead_length(ractl);
				134
				135	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
				136	netfs_read_trace_expanded);
				137	}
				138	}
				139
				140	/**
				141	* netfs_readahead - Helper to manage a read request
				142	* @ractl: The description of the readahead request
				143	*
				144	* Fulfil a readahead request by drawing data from the cache if possible, or
				145	* the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O
				146	* requests from different sources will get munged together. If necessary, the
				147	* readahead window can be expanded in either direction to a more convenient
				148	* alighment for RPC efficiency or to make storage in the cache feasible.
				149	*
				150	* The calling netfs must initialise a netfs context contiguous to the vfs
				151	* inode before calling this.
				152	*
				153	* This is usable whether or not caching is enabled.
				154	*/
				155	void netfs_readahead(struct readahead_control *ractl)
				156	{
				157	struct netfs_io_request *rreq;
David Howells	874c8ca1	2022-06-09 21:46:04 +0100	[diff] [blame]	158	struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	159	int ret;
				160
				161	_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
				162
				163	if (readahead_count(ractl) == 0)
				164	return;
				165
				166	rreq = netfs_alloc_request(ractl->mapping, ractl->file,
				167	readahead_pos(ractl),
				168	readahead_length(ractl),
				169	NETFS_READAHEAD);
				170	if (IS_ERR(rreq))
				171	return;
				172
				173	if (ctx->ops->begin_cache_operation) {
				174	ret = ctx->ops->begin_cache_operation(rreq);
				175	if (ret == -ENOMEM \|\| ret == -EINTR \|\| ret == -ERESTARTSYS)
				176	goto cleanup_free;
				177	}
				178
				179	netfs_stat(&netfs_n_rh_readahead);
				180	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
				181	netfs_read_trace_readahead);
				182
				183	netfs_rreq_expand(rreq, ractl);
				184
				185	/* Drop the refs on the folios here rather than in the cache or
				186	* filesystem. The locks will be dropped in netfs_rreq_unlock().
				187	*/
				188	while (readahead_folio(ractl))
				189	;
				190
				191	netfs_begin_read(rreq, false);
				192	return;
				193
				194	cleanup_free:
				195	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
				196	return;
				197	}
				198	EXPORT_SYMBOL(netfs_readahead);
				199
				200	/**
Matthew Wilcox (Oracle)	6c62371	2022-04-29 08:49:28 -0400	[diff] [blame]	201	* netfs_read_folio - Helper to manage a read_folio request
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	202	* @file: The file to read from
Matthew Wilcox (Oracle)	6c62371	2022-04-29 08:49:28 -0400	[diff] [blame]	203	* @folio: The folio to read
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	204	*
Matthew Wilcox (Oracle)	6c62371	2022-04-29 08:49:28 -0400	[diff] [blame]	205	* Fulfil a read_folio request by drawing data from the cache if
				206	* possible, or the netfs if not. Space beyond the EOF is zero-filled.
				207	* Multiple I/O requests from different sources will get munged together.
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	208	*
				209	* The calling netfs must initialise a netfs context contiguous to the vfs
				210	* inode before calling this.
				211	*
				212	* This is usable whether or not caching is enabled.
				213	*/
Matthew Wilcox (Oracle)	6c62371	2022-04-29 08:49:28 -0400	[diff] [blame]	214	int netfs_read_folio(struct file file, struct folio folio)
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	215	{
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	216	struct address_space *mapping = folio_file_mapping(folio);
				217	struct netfs_io_request *rreq;
David Howells	874c8ca1	2022-06-09 21:46:04 +0100	[diff] [blame]	218	struct netfs_inode *ctx = netfs_inode(mapping->host);
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	219	int ret;
				220
				221	_enter("%lx", folio_index(folio));
				222
				223	rreq = netfs_alloc_request(mapping, file,
				224	folio_file_pos(folio), folio_size(folio),
				225	NETFS_READPAGE);
				226	if (IS_ERR(rreq)) {
				227	ret = PTR_ERR(rreq);
				228	goto alloc_error;
				229	}
				230
				231	if (ctx->ops->begin_cache_operation) {
				232	ret = ctx->ops->begin_cache_operation(rreq);
				233	if (ret == -ENOMEM \|\| ret == -EINTR \|\| ret == -ERESTARTSYS)
				234	goto discard;
				235	}
				236
				237	netfs_stat(&netfs_n_rh_readpage);
				238	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
				239	return netfs_begin_read(rreq, true);
				240
				241	discard:
				242	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
				243	alloc_error:
				244	folio_unlock(folio);
				245	return ret;
				246	}
Matthew Wilcox (Oracle)	6c62371	2022-04-29 08:49:28 -0400	[diff] [blame]	247	EXPORT_SYMBOL(netfs_read_folio);
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	248
				249	/*
				250	* Prepare a folio for writing without reading first
				251	* @folio: The folio being prepared
				252	* @pos: starting position for the write
				253	* @len: length of write
				254	* @always_fill: T if the folio should always be completely filled/cleared
				255	*
				256	* In some cases, write_begin doesn't need to read at all:
				257	* - full folio write
				258	* - write that lies in a folio that is completely beyond EOF
				259	* - write that covers the folio from start to EOF or beyond it
				260	*
				261	* If any of these criteria are met, then zero out the unwritten parts
				262	* of the folio and return true. Otherwise, return false.
				263	*/
				264	static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
				265	bool always_fill)
				266	{
				267	struct inode *inode = folio_inode(folio);
				268	loff_t i_size = i_size_read(inode);
				269	size_t offset = offset_in_folio(folio, pos);
				270	size_t plen = folio_size(folio);
				271
				272	if (unlikely(always_fill)) {
				273	if (pos - offset + len <= i_size)
				274	return false; /* Page entirely before EOF */
				275	zero_user_segment(&folio->page, 0, plen);
				276	folio_mark_uptodate(folio);
				277	return true;
				278	}
				279
				280	/* Full folio write */
				281	if (offset == 0 && len >= plen)
				282	return true;
				283
				284	/* Page entirely beyond the end of the file */
				285	if (pos - offset >= i_size)
				286	goto zero_out;
				287
				288	/* Write that covers from the start of the folio to EOF or beyond */
				289	if (offset == 0 && (pos + len) >= i_size)
				290	goto zero_out;
				291
				292	return false;
				293	zero_out:
				294	zero_user_segments(&folio->page, 0, offset, offset + len, plen);
				295	return true;
				296	}
				297
				298	/**
				299	* netfs_write_begin - Helper to prepare for writing
Linus Torvalds	e81fb41	2022-06-09 15:04:01 -0700	[diff] [blame]	300	* @ctx: The netfs context
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	301	* @file: The file to read from
				302	* @mapping: The mapping to read from
				303	* @pos: File position at which the write will begin
				304	* @len: The length of the write (may extend beyond the end of the folio chosen)
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	305	* @_folio: Where to put the resultant folio
				306	* @_fsdata: Place for the netfs to store a cookie
				307	*
				308	* Pre-read data for a write-begin request by drawing data from the cache if
				309	* possible, or the netfs if not. Space beyond the EOF is zero-filled.
				310	* Multiple I/O requests from different sources will get munged together. If
				311	* necessary, the readahead window can be expanded in either direction to a
				312	* more convenient alighment for RPC efficiency or to make storage in the cache
				313	* feasible.
				314	*
				315	* The calling netfs must provide a table of operations, only one of which,
				316	* issue_op, is mandatory.
				317	*
				318	* The check_write_begin() operation can be provided to check for and flush
				319	* conflicting writes once the folio is grabbed and locked. It is passed a
				320	* pointer to the fsdata cookie that gets returned to the VM to be passed to
				321	* write_end. It is permitted to sleep. It should return 0 if the request
Xiubo Li	fac47b4	2022-07-11 12:11:21 +0800	[diff] [blame]	322	* should go ahead or it may return an error. It may also unlock and put the
				323	* folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
				324	* will cause the folio to be re-got and the process to be retried.
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	325	*
				326	* The calling netfs must initialise a netfs context contiguous to the vfs
				327	* inode before calling this.
				328	*
				329	* This is usable whether or not caching is enabled.
				330	*/
Linus Torvalds	e81fb41	2022-06-09 15:04:01 -0700	[diff] [blame]	331	int netfs_write_begin(struct netfs_inode *ctx,
				332	struct file file, struct address_space mapping,
Matthew Wilcox (Oracle)	de2a9311	2022-02-22 10:47:09 -0500	[diff] [blame]	333	loff_t pos, unsigned int len, struct folio **_folio,
				334	void **_fsdata)
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	335	{
				336	struct netfs_io_request *rreq;
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	337	struct folio *folio;
Matthew Wilcox (Oracle)	236d93c	2022-02-22 10:40:11 -0500	[diff] [blame]	338	unsigned int fgp_flags = FGP_LOCK \| FGP_WRITE \| FGP_CREAT \| FGP_STABLE;
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	339	pgoff_t index = pos >> PAGE_SHIFT;
				340	int ret;
				341
				342	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
				343
				344	retry:
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	345	folio = __filemap_get_folio(mapping, index, fgp_flags,
				346	mapping_gfp_mask(mapping));
				347	if (!folio)
				348	return -ENOMEM;
				349
				350	if (ctx->ops->check_write_begin) {
				351	/* Allow the netfs (eg. ceph) to flush conflicts. */
Xiubo Li	fac47b4	2022-07-11 12:11:21 +0800	[diff] [blame]	352	ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	353	if (ret < 0) {
				354	trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	355	goto error;
				356	}
Xiubo Li	fac47b4	2022-07-11 12:11:21 +0800	[diff] [blame]	357	if (!folio)
				358	goto retry;
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	359	}
				360
				361	if (folio_test_uptodate(folio))
				362	goto have_folio;
				363
				364	/* If the page is beyond the EOF, we want to clear it - unless it's
				365	* within the cache granule containing the EOF, in which case we need
				366	* to preload the granule.
				367	*/
				368	if (!netfs_is_cache_enabled(ctx) &&
				369	netfs_skip_folio_read(folio, pos, len, false)) {
				370	netfs_stat(&netfs_n_rh_write_zskip);
				371	goto have_folio_no_wait;
				372	}
				373
				374	rreq = netfs_alloc_request(mapping, file,
				375	folio_file_pos(folio), folio_size(folio),
				376	NETFS_READ_FOR_WRITE);
				377	if (IS_ERR(rreq)) {
				378	ret = PTR_ERR(rreq);
				379	goto error;
				380	}
				381	rreq->no_unlock_folio = folio_index(folio);
				382	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
				383
				384	if (ctx->ops->begin_cache_operation) {
				385	ret = ctx->ops->begin_cache_operation(rreq);
				386	if (ret == -ENOMEM \|\| ret == -EINTR \|\| ret == -ERESTARTSYS)
				387	goto error_put;
				388	}
				389
				390	netfs_stat(&netfs_n_rh_write_begin);
				391	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
				392
				393	/* Expand the request to meet caching requirements and download
				394	* preferences.
				395	*/
				396	ractl._nr_pages = folio_nr_pages(folio);
				397	netfs_rreq_expand(rreq, &ractl);
				398
				399	/* We hold the folio locks, so we can drop the references */
				400	folio_get(folio);
				401	while (readahead_folio(&ractl))
				402	;
				403
				404	ret = netfs_begin_read(rreq, true);
				405	if (ret < 0)
				406	goto error;
				407
				408	have_folio:
				409	ret = folio_wait_fscache_killable(folio);
				410	if (ret < 0)
				411	goto error;
				412	have_folio_no_wait:
				413	*_folio = folio;
				414	_leave(" = 0");
				415	return 0;
				416
				417	error_put:
				418	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
				419	error:
Xiubo Li	fac47b4	2022-07-11 12:11:21 +0800	[diff] [blame]	420	if (folio) {
				421	folio_unlock(folio);
				422	folio_put(folio);
				423	}
David Howells	1621126	2022-03-01 14:35:58 +0000	[diff] [blame]	424	_leave(" = %d", ret);
				425	return ret;
				426	}
				427	EXPORT_SYMBOL(netfs_write_begin);