Blame - fs/bcachefs/fs-io-pagecache.c - linux

blob: a9cc5cad9cc992ff6d1e3d9000c0a51b9b85408f [file] [log] [blame]

Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	#ifndef NO_BCACHEFS_FS
				3
				4	#include "bcachefs.h"
				5	#include "btree_iter.h"
				6	#include "extents.h"
				7	#include "fs-io.h"
				8	#include "fs-io-pagecache.h"
				9	#include "subvolume.h"
				10
				11	#include <linux/pagevec.h>
				12	#include <linux/writeback.h>
				13
				14	int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
				15	loff_t start, u64 end,
Kent Overstreet	c4f1f80	2023-11-11 12:30:19 -0500	[diff] [blame]	16	fgf_t fgp_flags, gfp_t gfp,
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	17	folios *fs)
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	18	{
				19	struct folio *f;
				20	u64 pos = start;
				21	int ret = 0;
				22
				23	while (pos < end) {
				24	if ((u64) pos >= (u64) start + (1ULL << 20))
				25	fgp_flags &= ~FGP_CREAT;
				26
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	27	ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	28	if (ret)
				29	break;
				30
				31	f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
				32	if (IS_ERR_OR_NULL(f))
				33	break;
				34
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	35	BUG_ON(fs->nr && folio_pos(f) != pos);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	36
				37	pos = folio_end_pos(f);
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	38	darray_push(fs, f);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	39	}
				40
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	41	if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	42	ret = -ENOMEM;
				43
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	44	return fs->nr ? 0 : ret;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	45	}
				46
				47	/* pagecache_block must be held */
				48	int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
				49	loff_t start, loff_t end)
				50	{
				51	int ret;
				52
				53	/*
				54	* XXX: the way this is currently implemented, we can spin if a process
				55	* is continually redirtying a specific page
				56	*/
				57	do {
				58	if (!mapping->nrpages)
				59	return 0;
				60
				61	ret = filemap_write_and_wait_range(mapping, start, end);
				62	if (ret)
				63	break;
				64
				65	if (!mapping->nrpages)
				66	return 0;
				67
				68	ret = invalidate_inode_pages2_range(mapping,
				69	start >> PAGE_SHIFT,
				70	end >> PAGE_SHIFT);
				71	} while (ret == -EBUSY);
				72
				73	return ret;
				74	}
				75
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	76	#if 0
				77	/* Useful for debug tracing: */
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	78	static const char * const bch2_folio_sector_states[] = {
				79	#define x(n) #n,
				80	BCH_FOLIO_SECTOR_STATE()
				81	#undef x
				82	NULL
				83	};
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	84	#endif
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	85
				86	static inline enum bch_folio_sector_state
				87	folio_sector_dirty(enum bch_folio_sector_state state)
				88	{
				89	switch (state) {
				90	case SECTOR_unallocated:
				91	return SECTOR_dirty;
				92	case SECTOR_reserved:
				93	return SECTOR_dirty_reserved;
				94	default:
				95	return state;
				96	}
				97	}
				98
				99	static inline enum bch_folio_sector_state
				100	folio_sector_undirty(enum bch_folio_sector_state state)
				101	{
				102	switch (state) {
				103	case SECTOR_dirty:
				104	return SECTOR_unallocated;
				105	case SECTOR_dirty_reserved:
				106	return SECTOR_reserved;
				107	default:
				108	return state;
				109	}
				110	}
				111
				112	static inline enum bch_folio_sector_state
				113	folio_sector_reserve(enum bch_folio_sector_state state)
				114	{
				115	switch (state) {
				116	case SECTOR_unallocated:
				117	return SECTOR_reserved;
				118	case SECTOR_dirty:
				119	return SECTOR_dirty_reserved;
				120	default:
				121	return state;
				122	}
				123	}
				124
				125	/* for newly allocated folios: */
				126	struct bch_folio __bch2_folio_create(struct folio folio, gfp_t gfp)
				127	{
				128	struct bch_folio *s;
				129
				130	s = kzalloc(sizeof(*s) +
				131	sizeof(struct bch_folio_sector) *
				132	folio_sectors(folio), gfp);
				133	if (!s)
				134	return NULL;
				135
				136	spin_lock_init(&s->lock);
				137	folio_attach_private(folio, s);
				138	return s;
				139	}
				140
				141	struct bch_folio bch2_folio_create(struct folio folio, gfp_t gfp)
				142	{
				143	return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
				144	}
				145
				146	static unsigned bkey_to_sector_state(struct bkey_s_c k)
				147	{
				148	if (bkey_extent_is_reservation(k))
				149	return SECTOR_reserved;
				150	if (bkey_extent_is_allocation(k.k))
				151	return SECTOR_allocated;
				152	return SECTOR_unallocated;
				153	}
				154
				155	static void __bch2_folio_set(struct folio *folio,
				156	unsigned pg_offset, unsigned pg_len,
				157	unsigned nr_ptrs, unsigned state)
				158	{
				159	struct bch_folio *s = bch2_folio(folio);
				160	unsigned i, sectors = folio_sectors(folio);
				161
				162	BUG_ON(pg_offset >= sectors);
				163	BUG_ON(pg_offset + pg_len > sectors);
				164
				165	spin_lock(&s->lock);
				166
				167	for (i = pg_offset; i < pg_offset + pg_len; i++) {
				168	s->s[i].nr_replicas = nr_ptrs;
				169	bch2_folio_sector_set(folio, s, i, state);
				170	}
				171
				172	if (i == sectors)
				173	s->uptodate = true;
				174
				175	spin_unlock(&s->lock);
				176	}
				177
				178	/*
				179	* Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
				180	* extents btree:
				181	*/
				182	int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	183	struct folio **fs, unsigned nr_folios)
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	184	{
Kent Overstreet	6bd68ec	2023-09-12 17:16:02 -0400	[diff] [blame]	185	struct btree_trans *trans;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	186	struct btree_iter iter;
				187	struct bkey_s_c k;
				188	struct bch_folio *s;
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	189	u64 offset = folio_sector(fs[0]);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	190	unsigned folio_idx;
				191	u32 snapshot;
				192	bool need_set = false;
				193	int ret;
				194
				195	for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	196	s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	197	if (!s)
				198	return -ENOMEM;
				199
				200	need_set \|= !s->uptodate;
				201	}
				202
				203	if (!need_set)
				204	return 0;
				205
				206	folio_idx = 0;
Kent Overstreet	6bd68ec	2023-09-12 17:16:02 -0400	[diff] [blame]	207	trans = bch2_trans_get(c);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	208	retry:
Kent Overstreet	6bd68ec	2023-09-12 17:16:02 -0400	[diff] [blame]	209	bch2_trans_begin(trans);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	210
Kent Overstreet	6bd68ec	2023-09-12 17:16:02 -0400	[diff] [blame]	211	ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	212	if (ret)
				213	goto err;
				214
Kent Overstreet	6bd68ec	2023-09-12 17:16:02 -0400	[diff] [blame]	215	for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	216	SPOS(inum.inum, offset, snapshot),
Kent Overstreet	5dd8c60	2024-04-07 18:05:34 -0400	[diff] [blame]	217	BTREE_ITER_slots, k, ret) {
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	218	unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
				219	unsigned state = bkey_to_sector_state(k);
				220
				221	while (folio_idx < nr_folios) {
Kent Overstreet	96dea3d	2023-09-12 18:41:22 -0400	[diff] [blame]	222	struct folio *folio = fs[folio_idx];
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	223	u64 folio_start = folio_sector(folio);
				224	u64 folio_end = folio_end_sector(folio);
				225	unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
				226	folio_start;
				227	unsigned folio_len = min(k.k->p.offset, folio_end) -
				228	folio_offset - folio_start;
				229
				230	BUG_ON(k.k->p.offset < folio_start);
				231	BUG_ON(bkey_start_offset(k.k) > folio_end);
				232
				233	if (!bch2_folio(folio)->uptodate)
				234	__bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
				235
				236	if (k.k->p.offset < folio_end)
				237	break;
				238	folio_idx++;
				239	}
				240
				241	if (folio_idx == nr_folios)
				242	break;
				243	}
				244
				245	offset = iter.pos.offset;
Kent Overstreet	6bd68ec	2023-09-12 17:16:02 -0400	[diff] [blame]	246	bch2_trans_iter_exit(trans, &iter);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	247	err:
				248	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
				249	goto retry;
Kent Overstreet	6bd68ec	2023-09-12 17:16:02 -0400	[diff] [blame]	250	bch2_trans_put(trans);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	251
				252	return ret;
				253	}
				254
				255	void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
				256	{
				257	struct bvec_iter iter;
				258	struct folio_vec fv;
				259	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
				260	? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
				261	unsigned state = bkey_to_sector_state(k);
				262
				263	bio_for_each_folio(fv, bio, iter)
				264	__bch2_folio_set(fv.fv_folio,
				265	fv.fv_offset >> 9,
				266	fv.fv_len >> 9,
				267	nr_ptrs, state);
				268	}
				269
				270	void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
				271	u64 start, u64 end)
				272	{
				273	pgoff_t index = start >> PAGE_SECTORS_SHIFT;
				274	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
				275	struct folio_batch fbatch;
				276	unsigned i, j;
				277
				278	if (end <= start)
				279	return;
				280
				281	folio_batch_init(&fbatch);
				282
				283	while (filemap_get_folios(inode->v.i_mapping,
				284	&index, end_index, &fbatch)) {
				285	for (i = 0; i < folio_batch_count(&fbatch); i++) {
				286	struct folio *folio = fbatch.folios[i];
				287	u64 folio_start = folio_sector(folio);
				288	u64 folio_end = folio_end_sector(folio);
				289	unsigned folio_offset = max(start, folio_start) - folio_start;
				290	unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
				291	struct bch_folio *s;
				292
				293	BUG_ON(end <= folio_start);
				294
				295	folio_lock(folio);
				296	s = bch2_folio(folio);
				297
				298	if (s) {
				299	spin_lock(&s->lock);
				300	for (j = folio_offset; j < folio_offset + folio_len; j++)
				301	s->s[j].nr_replicas = 0;
				302	spin_unlock(&s->lock);
				303	}
				304
				305	folio_unlock(folio);
				306	}
				307	folio_batch_release(&fbatch);
				308	cond_resched();
				309	}
				310	}
				311
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	312	int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
				313	u64 *start, u64 end,
				314	bool nonblocking)
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	315	{
				316	struct bch_fs *c = inode->v.i_sb->s_fs_info;
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	317	pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	318	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
				319	struct folio_batch fbatch;
				320	s64 i_sectors_delta = 0;
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	321	int ret = 0;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	322
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	323	if (end <= *start)
				324	return 0;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	325
				326	folio_batch_init(&fbatch);
				327
				328	while (filemap_get_folios(inode->v.i_mapping,
				329	&index, end_index, &fbatch)) {
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	330	for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	331	struct folio *folio = fbatch.folios[i];
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	332
				333	if (!nonblocking)
				334	folio_lock(folio);
				335	else if (!folio_trylock(folio)) {
				336	folio_batch_release(&fbatch);
				337	ret = -EAGAIN;
				338	break;
				339	}
				340
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	341	u64 folio_start = folio_sector(folio);
				342	u64 folio_end = folio_end_sector(folio);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	343
				344	BUG_ON(end <= folio_start);
				345
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	346	*start = min(end, folio_end);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	347
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	348	struct bch_folio *s = bch2_folio(folio);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	349	if (s) {
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	350	unsigned folio_offset = max(*start, folio_start) - folio_start;
				351	unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
				352
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	353	spin_lock(&s->lock);
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	354	for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	355	i_sectors_delta -= s->s[j].state == SECTOR_dirty;
				356	bch2_folio_sector_set(folio, s, j,
				357	folio_sector_reserve(s->s[j].state));
				358	}
				359	spin_unlock(&s->lock);
				360	}
				361
				362	folio_unlock(folio);
				363	}
				364	folio_batch_release(&fbatch);
				365	cond_resched();
				366	}
				367
				368	bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
Kent Overstreet	46bf2e9	2024-01-15 20:37:23 -0500	[diff] [blame]	369	return ret;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	370	}
				371
				372	static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
				373	unsigned nr_replicas)
				374	{
				375	return max(0, (int) nr_replicas -
				376	s->nr_replicas -
				377	s->replicas_reserved);
				378	}
				379
				380	int bch2_get_folio_disk_reservation(struct bch_fs *c,
				381	struct bch_inode_info *inode,
				382	struct folio *folio, bool check_enospc)
				383	{
				384	struct bch_folio *s = bch2_folio_create(folio, 0);
				385	unsigned nr_replicas = inode_nr_replicas(c, inode);
				386	struct disk_reservation disk_res = { 0 };
				387	unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
				388	int ret;
				389
				390	if (!s)
				391	return -ENOMEM;
				392
				393	for (i = 0; i < sectors; i++)
				394	disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
				395
				396	if (!disk_res_sectors)
				397	return 0;
				398
				399	ret = bch2_disk_reservation_get(c, &disk_res,
				400	disk_res_sectors, 1,
				401	!check_enospc
				402	? BCH_DISK_RESERVATION_NOFAIL
				403	: 0);
				404	if (unlikely(ret))
				405	return ret;
				406
				407	for (i = 0; i < sectors; i++)
				408	s->s[i].replicas_reserved +=
				409	sectors_to_reserve(&s->s[i], nr_replicas);
				410
				411	return 0;
				412	}
				413
				414	void bch2_folio_reservation_put(struct bch_fs *c,
				415	struct bch_inode_info *inode,
				416	struct bch2_folio_reservation *res)
				417	{
				418	bch2_disk_reservation_put(c, &res->disk);
				419	bch2_quota_reservation_put(c, inode, &res->quota);
				420	}
				421
				422	int bch2_folio_reservation_get(struct bch_fs *c,
				423	struct bch_inode_info *inode,
				424	struct folio *folio,
				425	struct bch2_folio_reservation *res,
Kent Overstreet	7554a8bb6	2023-09-10 17:29:39 -0400	[diff] [blame]	426	size_t offset, size_t len)
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	427	{
				428	struct bch_folio *s = bch2_folio_create(folio, 0);
				429	unsigned i, disk_sectors = 0, quota_sectors = 0;
				430	int ret;
				431
				432	if (!s)
				433	return -ENOMEM;
				434
				435	BUG_ON(!s->uptodate);
				436
				437	for (i = round_down(offset, block_bytes(c)) >> 9;
				438	i < round_up(offset + len, block_bytes(c)) >> 9;
				439	i++) {
Kent Overstreet	7554a8bb6	2023-09-10 17:29:39 -0400	[diff] [blame]	440	disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	441	quota_sectors += s->s[i].state == SECTOR_unallocated;
				442	}
				443
				444	if (disk_sectors) {
				445	ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
				446	if (unlikely(ret))
				447	return ret;
				448	}
				449
				450	if (quota_sectors) {
Kent Overstreet	7554a8bb6	2023-09-10 17:29:39 -0400	[diff] [blame]	451	ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	452	if (unlikely(ret)) {
Kent Overstreet	7554a8bb6	2023-09-10 17:29:39 -0400	[diff] [blame]	453	struct disk_reservation tmp = { .sectors = disk_sectors };
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	454
				455	bch2_disk_reservation_put(c, &tmp);
				456	res->disk.sectors -= disk_sectors;
				457	return ret;
				458	}
				459	}
				460
				461	return 0;
				462	}
				463
Kent Overstreet	7554a8bb6	2023-09-10 17:29:39 -0400	[diff] [blame]	464	ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
				465	struct bch_inode_info *inode,
				466	struct folio *folio,
				467	struct bch2_folio_reservation *res,
				468	size_t offset, size_t len)
				469	{
				470	size_t l, reserved = 0;
				471	int ret;
				472
				473	while ((l = len - reserved)) {
				474	while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
				475	if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
				476	return reserved ?: ret;
				477
				478	len = reserved + l;
				479	l /= 2;
				480	}
				481
				482	offset += l;
				483	reserved += l;
				484	}
				485
				486	return reserved;
				487	}
				488
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	489	static void bch2_clear_folio_bits(struct folio *folio)
				490	{
				491	struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
				492	struct bch_fs *c = inode->v.i_sb->s_fs_info;
				493	struct bch_folio *s = bch2_folio(folio);
				494	struct disk_reservation disk_res = { 0 };
				495	int i, sectors = folio_sectors(folio), dirty_sectors = 0;
				496
				497	if (!s)
				498	return;
				499
				500	EBUG_ON(!folio_test_locked(folio));
				501	EBUG_ON(folio_test_writeback(folio));
				502
				503	for (i = 0; i < sectors; i++) {
				504	disk_res.sectors += s->s[i].replicas_reserved;
				505	s->s[i].replicas_reserved = 0;
				506
				507	dirty_sectors -= s->s[i].state == SECTOR_dirty;
				508	bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
				509	}
				510
				511	bch2_disk_reservation_put(c, &disk_res);
				512
				513	bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
				514
				515	bch2_folio_release(folio);
				516	}
				517
				518	void bch2_set_folio_dirty(struct bch_fs *c,
				519	struct bch_inode_info *inode,
				520	struct folio *folio,
				521	struct bch2_folio_reservation *res,
				522	unsigned offset, unsigned len)
				523	{
				524	struct bch_folio *s = bch2_folio(folio);
				525	unsigned i, dirty_sectors = 0;
				526
				527	WARN_ON((u64) folio_pos(folio) + offset + len >
				528	round_up((u64) i_size_read(&inode->v), block_bytes(c)));
				529
				530	BUG_ON(!s->uptodate);
				531
				532	spin_lock(&s->lock);
				533
				534	for (i = round_down(offset, block_bytes(c)) >> 9;
				535	i < round_up(offset + len, block_bytes(c)) >> 9;
				536	i++) {
				537	unsigned sectors = sectors_to_reserve(&s->s[i],
				538	res->disk.nr_replicas);
				539
				540	/*
				541	* This can happen if we race with the error path in
				542	* bch2_writepage_io_done():
				543	*/
				544	sectors = min_t(unsigned, sectors, res->disk.sectors);
				545
				546	s->s[i].replicas_reserved += sectors;
				547	res->disk.sectors -= sectors;
				548
				549	dirty_sectors += s->s[i].state == SECTOR_unallocated;
				550
				551	bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
				552	}
				553
				554	spin_unlock(&s->lock);
				555
				556	bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
				557
				558	if (!folio_test_dirty(folio))
				559	filemap_dirty_folio(inode->v.i_mapping, folio);
				560	}
				561
				562	vm_fault_t bch2_page_fault(struct vm_fault *vmf)
				563	{
				564	struct file *file = vmf->vma->vm_file;
				565	struct address_space *mapping = file->f_mapping;
				566	struct address_space *fdm = faults_disabled_mapping();
				567	struct bch_inode_info *inode = file_bch_inode(file);
				568	vm_fault_t ret;
				569
				570	if (fdm == mapping)
				571	return VM_FAULT_SIGBUS;
				572
				573	/* Lock ordering: */
				574	if (fdm > mapping) {
				575	struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
				576
				577	if (bch2_pagecache_add_tryget(inode))
				578	goto got_lock;
				579
				580	bch2_pagecache_block_put(fdm_host);
				581
				582	bch2_pagecache_add_get(inode);
				583	bch2_pagecache_add_put(inode);
				584
				585	bch2_pagecache_block_get(fdm_host);
				586
				587	/* Signal that lock has been dropped: */
				588	set_fdm_dropped_locks();
				589	return VM_FAULT_SIGBUS;
				590	}
				591
				592	bch2_pagecache_add_get(inode);
				593	got_lock:
				594	ret = filemap_fault(vmf);
				595	bch2_pagecache_add_put(inode);
				596
				597	return ret;
				598	}
				599
				600	vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
				601	{
				602	struct folio *folio = page_folio(vmf->page);
				603	struct file *file = vmf->vma->vm_file;
				604	struct bch_inode_info *inode = file_bch_inode(file);
				605	struct address_space *mapping = file->f_mapping;
				606	struct bch_fs *c = inode->v.i_sb->s_fs_info;
				607	struct bch2_folio_reservation res;
				608	unsigned len;
				609	loff_t isize;
				610	vm_fault_t ret;
				611
				612	bch2_folio_reservation_init(c, inode, &res);
				613
				614	sb_start_pagefault(inode->v.i_sb);
				615	file_update_time(file);
				616
				617	/*
				618	* Not strictly necessary, but helps avoid dio writes livelocking in
				619	* bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
				620	* a bch2_write_invalidate_inode_pages_range() that works without dropping
				621	* page lock before invalidating page
				622	*/
				623	bch2_pagecache_add_get(inode);
				624
				625	folio_lock(folio);
				626	isize = i_size_read(&inode->v);
				627
				628	if (folio->mapping != mapping \|\| folio_pos(folio) >= isize) {
				629	folio_unlock(folio);
				630	ret = VM_FAULT_NOPAGE;
				631	goto out;
				632	}
				633
				634	len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
				635
				636	if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
				637	bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
				638	folio_unlock(folio);
				639	ret = VM_FAULT_SIGBUS;
				640	goto out;
				641	}
				642
				643	bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
				644	bch2_folio_reservation_put(c, inode, &res);
				645
				646	folio_wait_stable(folio);
				647	ret = VM_FAULT_LOCKED;
				648	out:
				649	bch2_pagecache_add_put(inode);
				650	sb_end_pagefault(inode->v.i_sb);
				651
				652	return ret;
				653	}
				654
				655	void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
				656	{
				657	if (offset \|\| length < folio_size(folio))
				658	return;
				659
				660	bch2_clear_folio_bits(folio);
				661	}
				662
				663	bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
				664	{
				665	if (folio_test_dirty(folio) \|\| folio_test_writeback(folio))
				666	return false;
				667
				668	bch2_clear_folio_bits(folio);
				669	return true;
				670	}
				671
				672	/* fseek: */
				673
				674	static int folio_data_offset(struct folio *folio, loff_t pos,
				675	unsigned min_replicas)
				676	{
				677	struct bch_folio *s = bch2_folio(folio);
				678	unsigned i, sectors = folio_sectors(folio);
				679
				680	if (s)
				681	for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
				682	if (s->s[i].state >= SECTOR_dirty &&
				683	s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
				684	return i << SECTOR_SHIFT;
				685
				686	return -1;
				687	}
				688
				689	loff_t bch2_seek_pagecache_data(struct inode *vinode,
				690	loff_t start_offset,
				691	loff_t end_offset,
				692	unsigned min_replicas,
				693	bool nonblock)
				694	{
				695	struct folio_batch fbatch;
				696	pgoff_t start_index = start_offset >> PAGE_SHIFT;
				697	pgoff_t end_index = end_offset >> PAGE_SHIFT;
				698	pgoff_t index = start_index;
				699	unsigned i;
				700	loff_t ret;
				701	int offset;
				702
				703	folio_batch_init(&fbatch);
				704
				705	while (filemap_get_folios(vinode->i_mapping,
				706	&index, end_index, &fbatch)) {
				707	for (i = 0; i < folio_batch_count(&fbatch); i++) {
				708	struct folio *folio = fbatch.folios[i];
				709
				710	if (!nonblock) {
				711	folio_lock(folio);
				712	} else if (!folio_trylock(folio)) {
				713	folio_batch_release(&fbatch);
				714	return -EAGAIN;
				715	}
				716
				717	offset = folio_data_offset(folio,
				718	max(folio_pos(folio), start_offset),
				719	min_replicas);
				720	if (offset >= 0) {
				721	ret = clamp(folio_pos(folio) + offset,
				722	start_offset, end_offset);
				723	folio_unlock(folio);
				724	folio_batch_release(&fbatch);
				725	return ret;
				726	}
				727	folio_unlock(folio);
				728	}
				729	folio_batch_release(&fbatch);
				730	cond_resched();
				731	}
				732
				733	return end_offset;
				734	}
				735
Brian Foster	8c9b0f7	2023-08-14 10:49:42 -0400	[diff] [blame]	736	/*
				737	* Search for a hole in a folio.
				738	*
				739	* The filemap layer returns -ENOENT if no folio exists, so reuse the same error
				740	* code to indicate a pagecache hole exists at the returned offset. Otherwise
				741	* return 0 if the folio is filled with data, or an error code. This function
				742	* can return -EAGAIN if nonblock is specified.
				743	*/
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	744	static int folio_hole_offset(struct address_space mapping, loff_t offset,
				745	unsigned min_replicas, bool nonblock)
				746	{
				747	struct folio *folio;
				748	struct bch_folio *s;
				749	unsigned i, sectors;
Brian Foster	8c9b0f7	2023-08-14 10:49:42 -0400	[diff] [blame]	750	int ret = -ENOENT;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	751
				752	folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
				753	FGP_LOCK\|(nonblock ? FGP_NOWAIT : 0), 0);
Brian Foster	8c9b0f7	2023-08-14 10:49:42 -0400	[diff] [blame]	754	if (IS_ERR(folio))
				755	return PTR_ERR(folio);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	756
				757	s = bch2_folio(folio);
				758	if (!s)
				759	goto unlock;
				760
				761	sectors = folio_sectors(folio);
				762	for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
				763	if (s->s[i].state < SECTOR_dirty \|\|
				764	s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
				765	offset = max(offset,
				766	folio_pos(folio) + (i << SECTOR_SHIFT));
				767	goto unlock;
				768	}
				769
				770	*offset = folio_end_pos(folio);
Brian Foster	8c9b0f7	2023-08-14 10:49:42 -0400	[diff] [blame]	771	ret = 0;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	772	unlock:
				773	folio_unlock(folio);
				774	folio_put(folio);
				775	return ret;
				776	}
				777
				778	loff_t bch2_seek_pagecache_hole(struct inode *vinode,
				779	loff_t start_offset,
				780	loff_t end_offset,
				781	unsigned min_replicas,
				782	bool nonblock)
				783	{
				784	struct address_space *mapping = vinode->i_mapping;
				785	loff_t offset = start_offset;
Brian Foster	8c9b0f7	2023-08-14 10:49:42 -0400	[diff] [blame]	786	loff_t ret = 0;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	787
Brian Foster	8c9b0f7	2023-08-14 10:49:42 -0400	[diff] [blame]	788	while (!ret && offset < end_offset)
				789	ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	790
Brian Foster	8c9b0f7	2023-08-14 10:49:42 -0400	[diff] [blame]	791	if (ret && ret != -ENOENT)
				792	return ret;
Kent Overstreet	dbbfca9	2023-08-03 18:18:21 -0400	[diff] [blame]	793	return min(offset, end_offset);
				794	}
				795
				796	int bch2_clamp_data_hole(struct inode *inode,
				797	u64 *hole_start,
				798	u64 *hole_end,
				799	unsigned min_replicas,
				800	bool nonblock)
				801	{
				802	loff_t ret;
				803
				804	ret = bch2_seek_pagecache_hole(inode,
				805	hole_start << 9, hole_end << 9, min_replicas, nonblock) >> 9;
				806	if (ret < 0)
				807	return ret;
				808
				809	*hole_start = ret;
				810
				811	if (hole_start == hole_end)
				812	return 0;
				813
				814	ret = bch2_seek_pagecache_data(inode,
				815	hole_start << 9, hole_end << 9, min_replicas, nonblock) >> 9;
				816	if (ret < 0)
				817	return ret;
				818
				819	*hole_end = ret;
				820	return 0;
				821	}
				822
				823	#endif /* NO_BCACHEFS_FS */