Blame - fs/ext4/file.c - linux

blob: 109d07629f81fb9e1d255f95a4965b7b0caf266d [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	2	/*
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	3	* linux/fs/ext4/file.c
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	4	*
				5	* Copyright (C) 1992, 1993, 1994, 1995
				6	* Remy Card (card@masi.ibp.fr)
				7	* Laboratoire MASI - Institut Blaise Pascal
				8	* Universite Pierre et Marie Curie (Paris VI)
				9	*
				10	* from
				11	*
				12	* linux/fs/minix/file.c
				13	*
				14	* Copyright (C) 1991, 1992 Linus Torvalds
				15	*
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	16	* ext4 fs regular file handling primitives
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	17	*
				18	* 64-bit file support on 64-bit platforms by Jakub Jelinek
				19	* (jj@sunsite.ms.mff.cuni.cz)
				20	*/
				21
				22	#include <linux/time.h>
				23	#include <linux/fs.h>
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	24	#include <linux/iomap.h>
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	25	#include <linux/mount.h>
				26	#include <linux/path.h>
Matthew Wilcox	c94c2ac	2015-09-08 14:58:40 -0700	[diff] [blame]	27	#include <linux/dax.h>
Christoph Hellwig	871a293	2010-03-03 09:05:07 -0500	[diff] [blame]	28	#include <linux/quotaops.h>
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	29	#include <linux/pagevec.h>
Christoph Hellwig	e2e40f2	2015-02-22 08:58:50 -0800	[diff] [blame]	30	#include <linux/uio.h>
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	31	#include <linux/mman.h>
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	32	#include <linux/backing-dev.h>
Christoph Hellwig	3dcf545	2008-04-29 18:13:32 -0400	[diff] [blame]	33	#include "ext4.h"
				34	#include "ext4_jbd2.h"
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	35	#include "xattr.h"
				36	#include "acl.h"
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	37	#include "truncate.h"
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	38
Eric Biggers	38ea50d	2022-01-28 15:39:38 -0800	[diff] [blame]	39	static bool ext4_dio_supported(struct kiocb iocb, struct iov_iter iter)
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	40	{
Eric Biggers	38ea50d	2022-01-28 15:39:38 -0800	[diff] [blame]	41	struct inode *inode = file_inode(iocb->ki_filp);
				42
				43	if (!fscrypt_dio_supported(iocb, iter))
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	44	return false;
				45	if (fsverity_active(inode))
				46	return false;
				47	if (ext4_should_journal_data(inode))
				48	return false;
				49	if (ext4_has_inline_data(inode))
				50	return false;
				51	return true;
				52	}
				53
				54	static ssize_t ext4_dio_read_iter(struct kiocb iocb, struct iov_iter to)
				55	{
				56	ssize_t ret;
				57	struct inode *inode = file_inode(iocb->ki_filp);
				58
				59	if (iocb->ki_flags & IOCB_NOWAIT) {
				60	if (!inode_trylock_shared(inode))
				61	return -EAGAIN;
				62	} else {
				63	inode_lock_shared(inode);
				64	}
				65
Eric Biggers	38ea50d	2022-01-28 15:39:38 -0800	[diff] [blame]	66	if (!ext4_dio_supported(iocb, to)) {
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	67	inode_unlock_shared(inode);
				68	/*
				69	* Fallback to buffered I/O if the operation being performed on
				70	* the inode is not supported by direct I/O. The IOCB_DIRECT
				71	* flag needs to be cleared here in order to ensure that the
				72	* direct I/O path within generic_file_read_iter() is not
				73	* taken.
				74	*/
				75	iocb->ki_flags &= ~IOCB_DIRECT;
				76	return generic_file_read_iter(iocb, to);
				77	}
				78
Christoph Hellwig	786f847	2022-05-05 15:11:11 -0500	[diff] [blame]	79	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, NULL, 0);
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	80	inode_unlock_shared(inode);
				81
				82	file_accessed(iocb->ki_filp);
				83	return ret;
				84	}
				85
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	86	#ifdef CONFIG_FS_DAX
				87	static ssize_t ext4_dax_read_iter(struct kiocb iocb, struct iov_iter to)
				88	{
				89	struct inode *inode = file_inode(iocb->ki_filp);
				90	ssize_t ret;
				91
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	92	if (iocb->ki_flags & IOCB_NOWAIT) {
				93	if (!inode_trylock_shared(inode))
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	94	return -EAGAIN;
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	95	} else {
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	96	inode_lock_shared(inode);
				97	}
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	98	/*
				99	* Recheck under inode lock - at this point we are sure it cannot
				100	* change anymore
				101	*/
				102	if (!IS_DAX(inode)) {
				103	inode_unlock_shared(inode);
				104	/* Fallback to buffered IO in case we cannot support DAX */
				105	return generic_file_read_iter(iocb, to);
				106	}
				107	ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
				108	inode_unlock_shared(inode);
				109
				110	file_accessed(iocb->ki_filp);
				111	return ret;
				112	}
				113	#endif
				114
				115	static ssize_t ext4_file_read_iter(struct kiocb iocb, struct iov_iter to)
				116	{
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	117	struct inode *inode = file_inode(iocb->ki_filp);
				118
				119	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	120	return -EIO;
				121
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	122	if (!iov_iter_count(to))
				123	return 0; /* skip atime */
				124
				125	#ifdef CONFIG_FS_DAX
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	126	if (IS_DAX(inode))
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	127	return ext4_dax_read_iter(iocb, to);
				128	#endif
Matthew Bobrowski	b1b4705	2019-11-05 23:01:37 +1100	[diff] [blame]	129	if (iocb->ki_flags & IOCB_DIRECT)
				130	return ext4_dio_read_iter(iocb, to);
				131
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	132	return generic_file_read_iter(iocb, to);
				133	}
				134
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	135	/*
				136	* Called when an inode is released. Note that this is different
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	137	* from ext4_file_open: open gets called at every open, but release
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	138	* gets called only when /all/ the files are closed.
				139	*/
Theodore Ts'o	af5bc92	2008-09-08 22:25:24 -0400	[diff] [blame]	140	static int ext4_release_file(struct inode inode, struct file filp)
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	141	{
Theodore Ts'o	19f5fb7	2010-01-24 14:34:07 -0500	[diff] [blame]	142	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
Theodore Ts'o	7d8f9f7	2009-02-24 08:21:14 -0500	[diff] [blame]	143	ext4_alloc_da_blocks(inode);
Theodore Ts'o	19f5fb7	2010-01-24 14:34:07 -0500	[diff] [blame]	144	ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
Theodore Ts'o	7d8f9f7	2009-02-24 08:21:14 -0500	[diff] [blame]	145	}
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	146	/* if we are the last writer on the inode, drop the block reservation */
				147	if ((filp->f_mode & FMODE_WRITE) &&
Aneesh Kumar K.V	d601430	2009-03-27 22:36:43 -0400	[diff] [blame]	148	(atomic_read(&inode->i_writecount) == 1) &&
Dio Putra	e030a28	2020-06-14 11:45:44 +0700	[diff] [blame]	149	!EXT4_I(inode)->i_reserved_data_blocks) {
Aneesh Kumar K.V	0e855ac	2008-01-28 23:58:26 -0500	[diff] [blame]	150	down_write(&EXT4_I(inode)->i_data_sem);
brookxu	27bc446	2020-08-17 15:36:15 +0800	[diff] [blame]	151	ext4_discard_preallocations(inode, 0);
Aneesh Kumar K.V	0e855ac	2008-01-28 23:58:26 -0500	[diff] [blame]	152	up_write(&EXT4_I(inode)->i_data_sem);
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	153	}
				154	if (is_dx(inode) && filp->private_data)
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	155	ext4_htree_free_dir_info(filp->private_data);
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	156
				157	return 0;
				158	}
				159
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	160	/*
				161	* This tests whether the IO in question is block-aligned or not.
				162	* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
				163	* are converted to written only after the IO is complete. Until they are
				164	* mapped, these blocks appear as holes, so dio_zero_block() will assume that
				165	* it needs to zero out portions of the start and/or end block. If 2 AIO
				166	* threads are at work on the same unwritten block, they must be synchronized
				167	* or one thread will zero the other's data, causing corruption.
				168	*/
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	169	static bool
				170	ext4_unaligned_io(struct inode inode, struct iov_iter from, loff_t pos)
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	171	{
				172	struct super_block *sb = inode->i_sb;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	173	unsigned long blockmask = sb->s_blocksize - 1;
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	174
Al Viro	9b88416	2014-04-17 16:09:22 -0400	[diff] [blame]	175	if ((pos \| iov_iter_alignment(from)) & blockmask)
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	176	return true;
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	177
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	178	return false;
				179	}
				180
				181	static bool
				182	ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
				183	{
				184	if (offset + len > i_size_read(inode) \|\|
				185	offset + len > EXT4_I(inode)->i_disksize)
				186	return true;
				187	return false;
Eric Sandeen	e9e3bce	2011-02-12 08:17:34 -0500	[diff] [blame]	188	}
				189
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	190	/* Is IO overwriting allocated and initialized blocks? */
				191	static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
				192	{
				193	struct ext4_map_blocks map;
				194	unsigned int blkbits = inode->i_blkbits;
				195	int err, blklen;
				196
				197	if (pos + len > i_size_read(inode))
				198	return false;
				199
				200	map.m_lblk = pos >> blkbits;
				201	map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
				202	blklen = map.m_len;
				203
				204	err = ext4_map_blocks(NULL, inode, &map, 0);
				205	/*
				206	* 'err==len' means that all of the blocks have been preallocated,
				207	* regardless of whether they have been initialized or not. To exclude
				208	* unwritten extents, we need to check m_flags.
				209	*/
				210	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
				211	}
				212
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	213	static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
				214	struct iov_iter *from)
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	215	{
				216	struct inode *inode = file_inode(iocb->ki_filp);
				217	ssize_t ret;
				218
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	219	if (unlikely(IS_IMMUTABLE(inode)))
				220	return -EPERM;
				221
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	222	ret = generic_write_checks(iocb, from);
				223	if (ret <= 0)
				224	return ret;
Theodore Ts'o	02b016c	2019-06-09 22:04:33 -0400	[diff] [blame]	225
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	226	/*
				227	* If we have encountered a bitmap-format file, the size limit
				228	* is smaller than s_maxbytes, which is for extent-mapped files.
				229	*/
				230	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
				231	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				232
				233	if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
				234	return -EFBIG;
				235	iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
				236	}
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	237
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	238	return iov_iter_count(from);
				239	}
				240
				241	static ssize_t ext4_write_checks(struct kiocb iocb, struct iov_iter from)
				242	{
				243	ssize_t ret, count;
				244
				245	count = ext4_generic_write_checks(iocb, from);
				246	if (count <= 0)
				247	return count;
				248
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	249	ret = file_modified(iocb->ki_filp);
				250	if (ret)
				251	return ret;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	252	return count;
Jan Kara	213bcd9	2016-11-20 17:29:51 -0500	[diff] [blame]	253	}
				254
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	255	static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
				256	struct iov_iter *from)
				257	{
				258	ssize_t ret;
				259	struct inode *inode = file_inode(iocb->ki_filp);
				260
				261	if (iocb->ki_flags & IOCB_NOWAIT)
				262	return -EOPNOTSUPP;
				263
				264	inode_lock(inode);
				265	ret = ext4_write_checks(iocb, from);
				266	if (ret <= 0)
				267	goto out;
				268
				269	current->backing_dev_info = inode_to_bdi(inode);
Matthew Wilcox (Oracle)	800ba29	2022-02-19 23:19:49 -0500	[diff] [blame]	270	ret = generic_perform_write(iocb, from);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	271	current->backing_dev_info = NULL;
				272
				273	out:
				274	inode_unlock(inode);
				275	if (likely(ret > 0)) {
				276	iocb->ki_pos += ret;
				277	ret = generic_write_sync(iocb, ret);
				278	}
				279
				280	return ret;
				281	}
				282
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	283	static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
				284	ssize_t written, size_t count)
				285	{
				286	handle_t *handle;
				287	bool truncate = false;
				288	u8 blkbits = inode->i_blkbits;
				289	ext4_lblk_t written_blk, end_blk;
Harshad Shirwadkar	4209ae1	2020-04-26 18:34:37 -0700	[diff] [blame]	290	int ret;
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	291
				292	/*
				293	* Note that EXT4_I(inode)->i_disksize can get extended up to
				294	* inode->i_size while the I/O was running due to writeback of delalloc
				295	* blocks. But, the code in ext4_iomap_alloc() is careful to use
				296	* zeroed/unwritten extents if this is possible; thus we won't leave
				297	* uninitialized blocks in a file even if we didn't succeed in writing
				298	* as much as we intended.
				299	*/
				300	WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
				301	if (offset + count <= EXT4_I(inode)->i_disksize) {
				302	/*
				303	* We need to ensure that the inode is removed from the orphan
				304	* list if it has been added prematurely, due to writeback of
				305	* delalloc blocks.
				306	*/
				307	if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
				308	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				309
				310	if (IS_ERR(handle)) {
				311	ext4_orphan_del(NULL, inode);
				312	return PTR_ERR(handle);
				313	}
				314
				315	ext4_orphan_del(handle, inode);
				316	ext4_journal_stop(handle);
				317	}
				318
				319	return written;
				320	}
				321
				322	if (written < 0)
				323	goto truncate;
				324
				325	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				326	if (IS_ERR(handle)) {
				327	written = PTR_ERR(handle);
				328	goto truncate;
				329	}
				330
Harshad Shirwadkar	4209ae1	2020-04-26 18:34:37 -0700	[diff] [blame]	331	if (ext4_update_inode_size(inode, offset + written)) {
				332	ret = ext4_mark_inode_dirty(handle, inode);
				333	if (unlikely(ret)) {
				334	written = ret;
				335	ext4_journal_stop(handle);
				336	goto truncate;
				337	}
				338	}
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	339
				340	/*
				341	* We may need to truncate allocated but not written blocks beyond EOF.
				342	*/
				343	written_blk = ALIGN(offset + written, 1 << blkbits);
				344	end_blk = ALIGN(offset + count, 1 << blkbits);
				345	if (written_blk < end_blk && ext4_can_truncate(inode))
				346	truncate = true;
				347
				348	/*
				349	* Remove the inode from the orphan list if it has been extended and
				350	* everything went OK.
				351	*/
				352	if (!truncate && inode->i_nlink)
				353	ext4_orphan_del(handle, inode);
				354	ext4_journal_stop(handle);
				355
				356	if (truncate) {
				357	truncate:
				358	ext4_truncate_failed_write(inode);
				359	/*
				360	* If the truncate operation failed early, then the inode may
				361	* still be on the orphan list. In that case, we need to try
				362	* remove the inode from the in-memory linked list.
				363	*/
				364	if (inode->i_nlink)
				365	ext4_orphan_del(NULL, inode);
				366	}
				367
				368	return written;
				369	}
				370
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	371	static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
				372	int error, unsigned int flags)
				373	{
Jan Kara	5899593	2021-04-15 17:54:17 +0200	[diff] [blame]	374	loff_t pos = iocb->ki_pos;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	375	struct inode *inode = file_inode(iocb->ki_filp);
				376
				377	if (error)
				378	return error;
				379
Jan Kara	5899593	2021-04-15 17:54:17 +0200	[diff] [blame]	380	if (size && flags & IOMAP_DIO_UNWRITTEN) {
				381	error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
				382	if (error < 0)
				383	return error;
				384	}
				385	/*
				386	* If we are extending the file, we have to update i_size here before
				387	* page cache gets invalidated in iomap_dio_rw(). Otherwise racing
				388	* buffered reads could zero out too much from page cache pages. Update
				389	* of on-disk size will happen later in ext4_dio_write_iter() where
				390	* we have enough information to also perform orphan list handling etc.
				391	* Note that we perform all extending writes synchronously under
				392	* i_rwsem held exclusively so i_size update is safe here in that case.
				393	* If the write was not extending, we cannot see pos > i_size here
				394	* because operations reducing i_size like truncate wait for all
				395	* outstanding DIO before updating i_size.
				396	*/
				397	pos += size;
				398	if (pos > i_size_read(inode))
				399	i_size_write(inode, pos);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	400
				401	return 0;
				402	}
				403
				404	static const struct iomap_dio_ops ext4_dio_write_ops = {
				405	.end_io = ext4_dio_write_end_io,
				406	};
				407
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	408	/*
				409	* The intention here is to start with shared lock acquired then see if any
				410	* condition requires an exclusive inode lock. If yes, then we restart the
				411	* whole operation by releasing the shared lock and acquiring exclusive lock.
				412	*
				413	* - For unaligned_io we never take shared lock as it may cause data corruption
				414	* when two unaligned IO tries to modify the same block e.g. while zeroing.
				415	*
				416	* - For extending writes case we don't take the shared lock, since it requires
				417	* updating inode i_disksize and/or orphan handling with exclusive lock.
				418	*
Ritesh Harjani	bc6385d	2019-12-12 11:25:57 +0530	[diff] [blame]	419	* - shared locking will only be true mostly with overwrites. Otherwise we will
				420	* switch to exclusive i_rwsem lock.
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	421	*/
				422	static ssize_t ext4_dio_write_checks(struct kiocb iocb, struct iov_iter from,
				423	bool ilock_shared, bool extend)
				424	{
				425	struct file *file = iocb->ki_filp;
				426	struct inode *inode = file_inode(file);
				427	loff_t offset;
				428	size_t count;
				429	ssize_t ret;
				430
				431	restart:
				432	ret = ext4_generic_write_checks(iocb, from);
				433	if (ret <= 0)
				434	goto out;
				435
				436	offset = iocb->ki_pos;
				437	count = ret;
				438	if (ext4_extending_io(inode, offset, count))
				439	*extend = true;
				440	/*
				441	* Determine whether the IO operation will overwrite allocated
Ritesh Harjani	bc6385d	2019-12-12 11:25:57 +0530	[diff] [blame]	442	* and initialized blocks.
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	443	* We need exclusive i_rwsem for changing security info
				444	* in file_modified().
				445	*/
				446	if (ilock_shared && (!IS_NOSEC(inode) \|\| extend \|\|
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	447	!ext4_overwrite_io(inode, offset, count))) {
Jan Kara	0b3171b	2020-07-08 17:35:16 +0200	[diff] [blame]	448	if (iocb->ki_flags & IOCB_NOWAIT) {
				449	ret = -EAGAIN;
				450	goto out;
				451	}
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	452	inode_unlock_shared(inode);
				453	*ilock_shared = false;
				454	inode_lock(inode);
				455	goto restart;
				456	}
				457
				458	ret = file_modified(file);
				459	if (ret < 0)
				460	goto out;
				461
				462	return count;
				463	out:
				464	if (*ilock_shared)
				465	inode_unlock_shared(inode);
				466	else
				467	inode_unlock(inode);
				468	return ret;
				469	}
				470
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	471	static ssize_t ext4_dio_write_iter(struct kiocb iocb, struct iov_iter from)
				472	{
				473	ssize_t ret;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	474	handle_t *handle;
				475	struct inode *inode = file_inode(iocb->ki_filp);
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	476	loff_t offset = iocb->ki_pos;
				477	size_t count = iov_iter_count(from);
Jan Kara	8cd115b	2019-12-18 18:44:33 +0100	[diff] [blame]	478	const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	479	bool extend = false, unaligned_io = false;
				480	bool ilock_shared = true;
				481
				482	/*
				483	* We initially start with shared inode lock unless it is
				484	* unaligned IO which needs exclusive lock anyways.
				485	*/
				486	if (ext4_unaligned_io(inode, from, offset)) {
				487	unaligned_io = true;
				488	ilock_shared = false;
				489	}
				490	/*
				491	* Quick check here without any i_rwsem lock to see if it is extending
				492	* IO. A more reliable check is done in ext4_dio_write_checks() with
				493	* proper locking in place.
				494	*/
				495	if (offset + count > i_size_read(inode))
				496	ilock_shared = false;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	497
				498	if (iocb->ki_flags & IOCB_NOWAIT) {
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	499	if (ilock_shared) {
				500	if (!inode_trylock_shared(inode))
				501	return -EAGAIN;
				502	} else {
				503	if (!inode_trylock(inode))
				504	return -EAGAIN;
				505	}
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	506	} else {
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	507	if (ilock_shared)
				508	inode_lock_shared(inode);
				509	else
				510	inode_lock(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	511	}
				512
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	513	/* Fallback to buffered I/O if the inode does not support direct I/O. */
Eric Biggers	38ea50d	2022-01-28 15:39:38 -0800	[diff] [blame]	514	if (!ext4_dio_supported(iocb, from)) {
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	515	if (ilock_shared)
				516	inode_unlock_shared(inode);
				517	else
				518	inode_unlock(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	519	return ext4_buffered_write_iter(iocb, from);
				520	}
				521
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	522	ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
				523	if (ret <= 0)
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	524	return ret;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	525
Jens Axboe	6e014c6	2020-05-24 16:53:16 -0600	[diff] [blame]	526	/* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
				527	if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io \|\| extend)) {
				528	ret = -EAGAIN;
				529	goto out;
				530	}
				531
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	532	offset = iocb->ki_pos;
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	533	count = ret;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	534
				535	/*
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	536	* Unaligned direct IO must be serialized among each other as zeroing
				537	* of partial blocks of two competing unaligned IOs can result in data
				538	* corruption.
				539	*
				540	* So we make sure we don't allow any unaligned IO in flight.
				541	* For IOs where we need not wait (like unaligned non-AIO DIO),
				542	* below inode_dio_wait() may anyway become a no-op, since we start
				543	* with exclusive lock.
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	544	*/
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	545	if (unaligned_io)
				546	inode_dio_wait(inode);
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	547
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	548	if (extend) {
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	549	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				550	if (IS_ERR(handle)) {
				551	ret = PTR_ERR(handle);
				552	goto out;
				553	}
				554
				555	ret = ext4_orphan_add(handle, inode);
				556	if (ret) {
				557	ext4_journal_stop(handle);
				558	goto out;
				559	}
				560
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	561	ext4_journal_stop(handle);
				562	}
				563
Jan Kara	8cd115b	2019-12-18 18:44:33 +0100	[diff] [blame]	564	if (ilock_shared)
				565	iomap_ops = &ext4_iomap_overwrite_ops;
				566	ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
Andreas Gruenbacher	4fdccaa	2021-07-24 12:26:41 +0200	[diff] [blame]	567	(unaligned_io \|\| extend) ? IOMAP_DIO_FORCE_WAIT : 0,
Christoph Hellwig	786f847	2022-05-05 15:11:11 -0500	[diff] [blame]	568	NULL, 0);
Christoph Hellwig	60263d5	2020-07-23 22:45:59 -0700	[diff] [blame]	569	if (ret == -ENOTBLK)
				570	ret = 0;
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	571
				572	if (extend)
				573	ret = ext4_handle_inode_extension(inode, offset, ret, count);
				574
				575	out:
Ritesh Harjani	aa9714d0	2019-12-12 11:25:56 +0530	[diff] [blame]	576	if (ilock_shared)
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	577	inode_unlock_shared(inode);
				578	else
				579	inode_unlock(inode);
				580
				581	if (ret >= 0 && iov_iter_count(from)) {
				582	ssize_t err;
				583	loff_t endbyte;
				584
				585	offset = iocb->ki_pos;
				586	err = ext4_buffered_write_iter(iocb, from);
				587	if (err < 0)
				588	return err;
				589
				590	/*
				591	* We need to ensure that the pages within the page cache for
				592	* the range covered by this I/O are written to disk and
				593	* invalidated. This is in attempt to preserve the expected
				594	* direct I/O semantics in the case we fallback to buffered I/O
				595	* to complete off the I/O request.
				596	*/
				597	ret += err;
				598	endbyte = offset + err - 1;
				599	err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
				600	offset, endbyte);
				601	if (!err)
				602	invalidate_mapping_pages(iocb->ki_filp->f_mapping,
				603	offset >> PAGE_SHIFT,
				604	endbyte >> PAGE_SHIFT);
				605	}
				606
				607	return ret;
				608	}
				609
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	610	#ifdef CONFIG_FS_DAX
				611	static ssize_t
				612	ext4_dax_write_iter(struct kiocb iocb, struct iov_iter from)
				613	{
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	614	ssize_t ret;
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	615	size_t count;
				616	loff_t offset;
Matthew Bobrowski	0b9f230	2019-11-05 23:02:08 +1100	[diff] [blame]	617	handle_t *handle;
				618	bool extend = false;
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	619	struct inode *inode = file_inode(iocb->ki_filp);
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	620
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	621	if (iocb->ki_flags & IOCB_NOWAIT) {
				622	if (!inode_trylock(inode))
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	623	return -EAGAIN;
Ritesh Harjani	f629afe	2019-12-12 11:25:55 +0530	[diff] [blame]	624	} else {
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	625	inode_lock(inode);
				626	}
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	627
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	628	ret = ext4_write_checks(iocb, from);
				629	if (ret <= 0)
				630	goto out;
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	631
Matthew Bobrowski	569342d	2019-11-05 23:01:51 +1100	[diff] [blame]	632	offset = iocb->ki_pos;
				633	count = iov_iter_count(from);
Matthew Bobrowski	0b9f230	2019-11-05 23:02:08 +1100	[diff] [blame]	634
				635	if (offset + count > EXT4_I(inode)->i_disksize) {
				636	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
				637	if (IS_ERR(handle)) {
				638	ret = PTR_ERR(handle);
				639	goto out;
				640	}
				641
				642	ret = ext4_orphan_add(handle, inode);
				643	if (ret) {
				644	ext4_journal_stop(handle);
				645	goto out;
				646	}
				647
				648	extend = true;
				649	ext4_journal_stop(handle);
				650	}
				651
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	652	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
Matthew Bobrowski	0b9f230	2019-11-05 23:02:08 +1100	[diff] [blame]	653
				654	if (extend)
				655	ret = ext4_handle_inode_extension(inode, offset, ret, count);
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	656	out:
Christoph Hellwig	ff5462e	2017-02-08 14:39:27 -0500	[diff] [blame]	657	inode_unlock(inode);
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	658	if (ret > 0)
				659	ret = generic_write_sync(iocb, ret);
				660	return ret;
				661	}
				662	#endif
				663
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	664	static ssize_t
Al Viro	9b88416	2014-04-17 16:09:22 -0400	[diff] [blame]	665	ext4_file_write_iter(struct kiocb iocb, struct iov_iter from)
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	666	{
Al Viro	496ad9a	2013-01-23 17:07:38 -0500	[diff] [blame]	667	struct inode *inode = file_inode(iocb->ki_filp);
Theodore Ts'o	7608e61	2014-04-21 14:26:28 -0400	[diff] [blame]	668
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	669	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				670	return -EIO;
				671
Jan Kara	776722e	2016-11-20 18:09:11 -0500	[diff] [blame]	672	#ifdef CONFIG_FS_DAX
				673	if (IS_DAX(inode))
				674	return ext4_dax_write_iter(iocb, from);
				675	#endif
Matthew Bobrowski	378f32b	2019-11-05 23:02:39 +1100	[diff] [blame]	676	if (iocb->ki_flags & IOCB_DIRECT)
				677	return ext4_dio_write_iter(iocb, from);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	678	else
				679	return ext4_buffered_write_iter(iocb, from);
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	680	}
				681
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	682	#ifdef CONFIG_FS_DAX
Souptick Joarder	71fe9899	2018-05-13 16:01:49 -0400	[diff] [blame]	683	static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
Dave Jiang	c791ace	2017-02-24 14:57:08 -0800	[diff] [blame]	684	enum page_entry_size pe_size)
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	685	{
Souptick Joarder	71fe9899	2018-05-13 16:01:49 -0400	[diff] [blame]	686	int error = 0;
				687	vm_fault_t result;
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	688	int retries = 0;
Jan Kara	fb26a1cb	2017-05-12 15:46:54 -0700	[diff] [blame]	689	handle_t *handle = NULL;
Dave Jiang	11bac80	2017-02-24 14:56:41 -0800	[diff] [blame]	690	struct inode *inode = file_inode(vmf->vma->vm_file);
Jan Kara	ea3d720	2015-12-07 14:28:03 -0500	[diff] [blame]	691	struct super_block *sb = inode->i_sb;
Randy Dodgen	fd96b8d	2017-08-24 15:26:01 -0400	[diff] [blame]	692
				693	/*
				694	* We have to distinguish real writes from writes which will result in a
				695	* COW page; COW writes should not poke the journal (the file will not
				696	* be changed). Doing so would cause unintended failures when mounted
				697	* read-only.
				698	*
				699	* We check for VM_SHARED rather than vmf->cow_page since the latter is
				700	* unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
				701	* other sizes, dax_iomap_fault will handle splitting / fallback so that
				702	* we eventually come back with a COW page.
				703	*/
				704	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
				705	(vmf->vma->vm_flags & VM_SHARED);
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	706	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	707	pfn_t pfn;
Matthew Wilcox	01a33b4	2015-09-08 14:59:22 -0700	[diff] [blame]	708
				709	if (write) {
				710	sb_start_pagefault(sb);
Dave Jiang	11bac80	2017-02-24 14:56:41 -0800	[diff] [blame]	711	file_update_time(vmf->vma->vm_file);
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	712	filemap_invalidate_lock_shared(mapping);
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	713	retry:
Jan Kara	fb26a1cb	2017-05-12 15:46:54 -0700	[diff] [blame]	714	handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
				715	EXT4_DATA_TRANS_BLOCKS(sb));
Jan Kara	497f692	2017-11-01 16:36:44 +0100	[diff] [blame]	716	if (IS_ERR(handle)) {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	717	filemap_invalidate_unlock_shared(mapping);
Jan Kara	497f692	2017-11-01 16:36:44 +0100	[diff] [blame]	718	sb_end_pagefault(sb);
				719	return VM_FAULT_SIGBUS;
				720	}
Jan Kara	fb26a1cb	2017-05-12 15:46:54 -0700	[diff] [blame]	721	} else {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	722	filemap_invalidate_lock_shared(mapping);
Jan Kara	1db1754	2016-10-21 11:33:49 +0200	[diff] [blame]	723	}
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	724	result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
Jan Kara	fb26a1cb	2017-05-12 15:46:54 -0700	[diff] [blame]	725	if (write) {
Jan Kara	497f692	2017-11-01 16:36:44 +0100	[diff] [blame]	726	ext4_journal_stop(handle);
Jan Kara	2244642	2018-01-07 16:41:01 -0500	[diff] [blame]	727
				728	if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
				729	ext4_should_retry_alloc(sb, &retries))
				730	goto retry;
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	731	/* Handling synchronous page fault? */
				732	if (result & VM_FAULT_NEEDDSYNC)
				733	result = dax_finish_sync_fault(vmf, pe_size, pfn);
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	734	filemap_invalidate_unlock_shared(mapping);
Matthew Wilcox	01a33b4	2015-09-08 14:59:22 -0700	[diff] [blame]	735	sb_end_pagefault(sb);
Jan Kara	fb26a1cb	2017-05-12 15:46:54 -0700	[diff] [blame]	736	} else {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	737	filemap_invalidate_unlock_shared(mapping);
Jan Kara	fb26a1cb	2017-05-12 15:46:54 -0700	[diff] [blame]	738	}
Matthew Wilcox	01a33b4	2015-09-08 14:59:22 -0700	[diff] [blame]	739
				740	return result;
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	741	}
				742
Souptick Joarder	71fe9899	2018-05-13 16:01:49 -0400	[diff] [blame]	743	static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
Dave Jiang	c791ace	2017-02-24 14:57:08 -0800	[diff] [blame]	744	{
				745	return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
				746	}
				747
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	748	static const struct vm_operations_struct ext4_dax_vm_ops = {
				749	.fault = ext4_dax_fault,
Dave Jiang	c791ace	2017-02-24 14:57:08 -0800	[diff] [blame]	750	.huge_fault = ext4_dax_huge_fault,
Ross Zwisler	1e9d180	2016-02-27 14:01:13 -0500	[diff] [blame]	751	.page_mkwrite = ext4_dax_fault,
Ross Zwisler	91d25ba	2017-09-06 16:18:43 -0700	[diff] [blame]	752	.pfn_mkwrite = ext4_dax_fault,
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	753	};
				754	#else
				755	#define ext4_dax_vm_ops ext4_file_vm_ops
				756	#endif
				757
Alexey Dobriyan	f0f37e2f	2009-09-27 22:29:37 +0400	[diff] [blame]	758	static const struct vm_operations_struct ext4_file_vm_ops = {
Jan Kara	d4f5258	2021-02-04 18:05:42 +0100	[diff] [blame]	759	.fault = filemap_fault,
Kirill A. Shutemov	f182036	2014-04-07 15:37:19 -0700	[diff] [blame]	760	.map_pages = filemap_map_pages,
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	761	.page_mkwrite = ext4_page_mkwrite,
				762	};
				763
				764	static int ext4_file_mmap(struct file file, struct vm_area_struct vma)
				765	{
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	766	struct inode *inode = file->f_mapping->host;
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	767	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				768	struct dax_device *dax_dev = sbi->s_daxdev;
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	769
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	770	if (unlikely(ext4_forced_shutdown(sbi)))
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	771	return -EIO;
				772
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	773	/*
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	774	* We don't support synchronous mappings for non-DAX files and
				775	* for DAX files if underneath dax_device is not synchronous.
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	776	*/
Pankaj Gupta	e46bfc3	2019-07-05 19:33:27 +0530	[diff] [blame]	777	if (!daxdev_mapping_supported(vma, dax_dev))
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	778	return -EOPNOTSUPP;
				779
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	780	file_accessed(file);
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	781	if (IS_DAX(file_inode(file))) {
				782	vma->vm_ops = &ext4_dax_vm_ops;
Dave Jiang	e1fb4a0	2018-08-17 15:43:40 -0700	[diff] [blame]	783	vma->vm_flags \|= VM_HUGEPAGE;
Ross Zwisler	923ae0f	2015-02-16 15:59:38 -0800	[diff] [blame]	784	} else {
				785	vma->vm_ops = &ext4_file_vm_ops;
				786	}
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	787	return 0;
				788	}
				789
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	790	static int ext4_sample_last_mounted(struct super_block *sb,
				791	struct vfsmount *mnt)
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	792	{
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	793	struct ext4_sb_info *sbi = EXT4_SB(sb);
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	794	struct path path;
				795	char buf[64], *cp;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	796	handle_t *handle;
				797	int err;
				798
Harshad Shirwadkar	9b5f6c9	2020-11-05 19:59:09 -0800	[diff] [blame]	799	if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	800	return 0;
				801
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	802	if (sb_rdonly(sb) \|\| !sb_start_intwrite_trylock(sb))
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	803	return 0;
				804
Harshad Shirwadkar	9b5f6c9	2020-11-05 19:59:09 -0800	[diff] [blame]	805	ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	806	/*
				807	* Sample where the filesystem has been mounted and
				808	* store it in the superblock for sysadmin convenience
				809	* when trying to sort through large numbers of block
				810	* devices or filesystem images.
				811	*/
				812	memset(buf, 0, sizeof(buf));
				813	path.mnt = mnt;
				814	path.dentry = mnt->mnt_root;
				815	cp = d_path(&path, buf, sizeof(buf));
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	816	err = 0;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	817	if (IS_ERR(cp))
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	818	goto out;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	819
				820	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	821	err = PTR_ERR(handle);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	822	if (IS_ERR(handle))
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	823	goto out;
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	824	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
Jan Kara	188c299	2021-08-16 11:57:04 +0200	[diff] [blame]	825	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
				826	EXT4_JTR_NONE);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	827	if (err)
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	828	goto out_journal;
Jan Kara	05c2c00	2020-12-16 11:18:39 +0100	[diff] [blame]	829	lock_buffer(sbi->s_sbh);
Theodore Ts'o	5a3b590	2020-12-17 13:24:15 -0500	[diff] [blame]	830	strncpy(sbi->s_es->s_last_mounted, cp,
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	831	sizeof(sbi->s_es->s_last_mounted));
Jan Kara	05c2c00	2020-12-16 11:18:39 +0100	[diff] [blame]	832	ext4_superblock_csum_set(sb);
				833	unlock_buffer(sbi->s_sbh);
Jan Kara	a3f5cf1	2020-12-16 11:18:44 +0100	[diff] [blame]	834	ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	835	out_journal:
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	836	ext4_journal_stop(handle);
Amir Goldstein	db6516a	2018-05-13 22:54:44 -0400	[diff] [blame]	837	out:
				838	sb_end_intwrite(sb);
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	839	return err;
				840	}
				841
Dio Putra	e030a28	2020-06-14 11:45:44 +0700	[diff] [blame]	842	static int ext4_file_open(struct inode inode, struct file filp)
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	843	{
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	844	int ret;
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	845
Theodore Ts'o	0db1ff2	2017-02-05 01:28:48 -0500	[diff] [blame]	846	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
				847	return -EIO;
				848
Amir Goldstein	833a950	2018-05-13 22:44:23 -0400	[diff] [blame]	849	ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
				850	if (ret)
				851	return ret;
Miklos Szeredi	9dd78d8	2016-03-26 16:14:41 -0400	[diff] [blame]	852
Eric Biggers	09a5c31	2017-10-18 20:21:57 -0400	[diff] [blame]	853	ret = fscrypt_file_open(inode, filp);
				854	if (ret)
				855	return ret;
				856
Eric Biggers	c93d8f8	2019-07-22 09:26:24 -0700	[diff] [blame]	857	ret = fsverity_file_open(inode, filp);
				858	if (ret)
				859	return ret;
				860
Theodore Ts'o	8aefcd5	2011-01-10 12:29:43 -0500	[diff] [blame]	861	/*
				862	* Set up the jbd2_inode if we are opening the inode for
				863	* writing and the journal is present
				864	*/
Jan Kara	a361293	2013-08-16 21:19:41 -0400	[diff] [blame]	865	if (filp->f_mode & FMODE_WRITE) {
Michael Halcrow	c9c7429	2015-04-12 00:56:10 -0400	[diff] [blame]	866	ret = ext4_inode_attach_jinode(inode);
Jan Kara	a361293	2013-08-16 21:19:41 -0400	[diff] [blame]	867	if (ret < 0)
				868	return ret;
Theodore Ts'o	8aefcd5	2011-01-10 12:29:43 -0500	[diff] [blame]	869	}
Goldwyn Rodrigues	728fbc0	2017-06-20 07:05:47 -0500	[diff] [blame]	870
Jens Axboe	766ef1e	2020-08-03 17:02:11 -0600	[diff] [blame]	871	filp->f_mode \|= FMODE_NOWAIT \| FMODE_BUF_RASYNC;
Theodore Ts'o	abdd438	2015-05-31 13:35:39 -0400	[diff] [blame]	872	return dquot_file_open(inode, filp);
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	873	}
				874
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	875	/*
Eric Sandeen	ec7268c	2012-04-30 13:14:03 -0500	[diff] [blame]	876	* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
				877	* by calling generic_file_llseek_size() with the appropriate maxbytes
				878	* value for each.
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	879	*/
Andrew Morton	965c8e5	2012-12-17 15:59:39 -0800	[diff] [blame]	880	loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	881	{
				882	struct inode *inode = file->f_mapping->host;
				883	loff_t maxbytes;
				884
				885	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
				886	maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
				887	else
				888	maxbytes = inode->i_sb->s_maxbytes;
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	889
Andrew Morton	965c8e5	2012-12-17 15:59:39 -0800	[diff] [blame]	890	switch (whence) {
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	891	default:
Andrew Morton	965c8e5	2012-12-17 15:59:39 -0800	[diff] [blame]	892	return generic_file_llseek_size(file, offset, whence,
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	893	maxbytes, i_size_read(inode));
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	894	case SEEK_HOLE:
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	895	inode_lock_shared(inode);
Matthew Bobrowski	09edf4d	2019-11-05 23:03:31 +1100	[diff] [blame]	896	offset = iomap_seek_hole(inode, offset,
				897	&ext4_iomap_report_ops);
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	898	inode_unlock_shared(inode);
				899	break;
				900	case SEEK_DATA:
				901	inode_lock_shared(inode);
Matthew Bobrowski	09edf4d	2019-11-05 23:03:31 +1100	[diff] [blame]	902	offset = iomap_seek_data(inode, offset,
				903	&ext4_iomap_report_ops);
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	904	inode_unlock_shared(inode);
				905	break;
Zheng Liu	c8c0df2	2012-11-08 21:57:40 -0500	[diff] [blame]	906	}
				907
Christoph Hellwig	545052e	2017-10-01 17:58:54 -0400	[diff] [blame]	908	if (offset < 0)
				909	return offset;
				910	return vfs_setpos(file, offset, maxbytes);
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	911	}
				912
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	913	const struct file_operations ext4_file_operations = {
Toshiyuki Okajima	e0d10bf	2010-10-27 21:30:06 -0400	[diff] [blame]	914	.llseek = ext4_llseek,
Jan Kara	364443c	2016-11-20 17:36:06 -0500	[diff] [blame]	915	.read_iter = ext4_file_read_iter,
Al Viro	9b88416	2014-04-17 16:09:22 -0400	[diff] [blame]	916	.write_iter = ext4_file_write_iter,
Christoph Hellwig	3e08773	2021-10-12 13:12:24 +0200	[diff] [blame]	917	.iopoll = iocb_bio_iopoll,
Andi Kleen	5cdd7b2	2008-04-29 22:03:54 -0400	[diff] [blame]	918	.unlocked_ioctl = ext4_ioctl,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	919	#ifdef CONFIG_COMPAT
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	920	.compat_ioctl = ext4_compat_ioctl,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	921	#endif
Aneesh Kumar K.V	2e9ee85	2008-07-11 19:27:31 -0400	[diff] [blame]	922	.mmap = ext4_file_mmap,
Jan Kara	b8a6176	2017-11-01 16:36:45 +0100	[diff] [blame]	923	.mmap_supported_flags = MAP_SYNC,
Theodore Ts'o	bc0b0d6	2009-06-13 10:09:48 -0400	[diff] [blame]	924	.open = ext4_file_open,
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	925	.release = ext4_release_file,
				926	.fsync = ext4_sync_file,
Toshi Kani	dbe6ec8	2016-10-07 16:59:59 -0700	[diff] [blame]	927	.get_unmapped_area = thp_get_unmapped_area,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	928	.splice_read = generic_file_splice_read,
Al Viro	8d02076	2014-04-05 04:27:08 -0400	[diff] [blame]	929	.splice_write = iter_file_splice_write,
Christoph Hellwig	2fe17c1	2011-01-14 13:07:43 +0100	[diff] [blame]	930	.fallocate = ext4_fallocate,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	931	};
				932
Arjan van de Ven	754661f	2007-02-12 00:55:38 -0800	[diff] [blame]	933	const struct inode_operations ext4_file_inode_operations = {
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	934	.setattr = ext4_setattr,
David Howells	99652ea	2017-03-31 18:31:56 +0100	[diff] [blame]	935	.getattr = ext4_file_getattr,
Mingming Cao	617ba13	2006-10-11 01:20:53 -0700	[diff] [blame]	936	.listxattr = ext4_listxattr,
Christoph Hellwig	4e34e71	2011-07-23 17:37:31 +0200	[diff] [blame]	937	.get_acl = ext4_get_acl,
Christoph Hellwig	64e178a	2013-12-20 05:16:44 -0800	[diff] [blame]	938	.set_acl = ext4_set_acl,
Eric Sandeen	6873fa0	2008-10-07 00:46:36 -0400	[diff] [blame]	939	.fiemap = ext4_fiemap,
Miklos Szeredi	4db5c2e	2021-04-07 14:36:43 +0200	[diff] [blame]	940	.fileattr_get = ext4_fileattr_get,
				941	.fileattr_set = ext4_fileattr_set,
Dave Kleikamp	ac27a0e	2006-10-11 01:20:50 -0700	[diff] [blame]	942	};
				943