blob: b7e5a3841aa4a31228a579aeded9912d41e3fb54 [file] [log] [blame]
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -03001=======
2Locking
3=======
4
5The text below describes the locking rules for VFS-related methods.
Linus Torvalds1da177e2005-04-16 15:20:36 -07006It is (believed to be) up-to-date. *Please*, if you change anything in
7prototypes or locking protocols - update this file. And update the relevant
8instances in the tree, don't leave that to maintainers of filesystems/devices/
9etc. At the very least, put the list of dubious cases in the end of this file.
10Don't turn it into log - maintainers of out-of-the-tree code are supposed to
11be able to use diff(1).
Linus Torvalds1da177e2005-04-16 15:20:36 -070012
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -030013Thing currently missing here: socket operations. Alexey?
14
15dentry_operations
16=================
17
18prototypes::
19
Al Viro0b728e12012-06-10 16:03:43 -040020 int (*d_revalidate)(struct dentry *, unsigned int);
Jeff Laytonecf3d1f2013-02-20 11:19:05 -050021 int (*d_weak_revalidate)(struct dentry *, unsigned int);
Linus Torvaldsda53be12013-05-21 15:22:44 -070022 int (*d_hash)(const struct dentry *, struct qstr *);
Al Viro6fa67e72016-07-31 16:37:25 -040023 int (*d_compare)(const struct dentry *,
Nick Piggin621e1552011-01-07 17:49:27 +110024 unsigned int, const char *, const struct qstr *);
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 int (*d_delete)(struct dentry *);
Miklos Szeredi285b1022016-06-28 11:47:32 +020026 int (*d_init)(struct dentry *);
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 void (*d_release)(struct dentry *);
28 void (*d_iput)(struct dentry *, struct inode *);
Eric Dumazetc23fbb62007-05-08 00:26:18 -070029 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
David Howells9875cf82011-01-14 18:45:21 +000030 struct vfsmount *(*d_automount)(struct path *path);
Ian Kentfb5f51c2016-11-24 08:03:41 +110031 int (*d_manage)(const struct path *, bool);
Miklos Szeredifb160432018-07-18 15:44:44 +020032 struct dentry *(*d_real)(struct dentry *, const struct inode *);
Linus Torvalds1da177e2005-04-16 15:20:36 -070033
34locking rules:
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -030036================== =========== ======== ============== ========
37ops rename_lock ->d_lock may block rcu-walk
38================== =========== ======== ============== ========
39d_revalidate: no no yes (ref-walk) maybe
40d_weak_revalidate: no no yes no
41d_hash no no no maybe
42d_compare: yes no no maybe
43d_delete: no yes no no
44d_init: no no yes no
45d_release: no no yes no
46d_prune: no yes no no
47d_iput: no no yes no
48d_dname: no no no no
49d_automount: no no yes no
50d_manage: no no yes (ref-walk) maybe
51d_real no no yes no
52================== =========== ======== ============== ========
53
54inode_operations
55================
56
57prototypes::
58
Christian Brauner6c960e62023-01-13 12:49:13 +010059 int (*create) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t, bool);
Al Viro00cd8dd2012-06-10 17:13:09 -040060 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
Linus Torvalds1da177e2005-04-16 15:20:36 -070061 int (*link) (struct dentry *,struct inode *,struct dentry *);
62 int (*unlink) (struct inode *,struct dentry *);
Christian Brauner7a77db92023-01-13 12:49:14 +010063 int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,const char *);
Christian Braunerc54bd912023-01-13 12:49:15 +010064 int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t);
Linus Torvalds1da177e2005-04-16 15:20:36 -070065 int (*rmdir) (struct inode *,struct dentry *);
Christian Brauner5ebb29b2023-01-13 12:49:16 +010066 int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,umode_t,dev_t);
Christian Braunere18275a2023-01-13 12:49:17 +010067 int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *,
Miklos Szeredi520c8b12014-04-01 17:08:42 +020068 struct inode *, struct dentry *, unsigned int);
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 int (*readlink) (struct dentry *, char __user *,int);
Eric Biggers1a6a3162019-04-11 16:16:29 -070070 const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *);
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 void (*truncate) (struct inode *);
Christian Brauner4609e1f2023-01-13 12:49:22 +010072 int (*permission) (struct mnt_idmap *, struct inode *, int, unsigned int);
Christian Braunercac2f8b2022-09-22 17:17:00 +020073 struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
Christian Braunerc1632a02023-01-13 12:49:11 +010074 int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *);
Christian Braunerb74d24f2023-01-13 12:49:12 +010075 int (*getattr) (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int);
Linus Torvalds1da177e2005-04-16 15:20:36 -070076 ssize_t (*listxattr) (struct dentry *, char *, size_t);
Christoph Hellwigb83be6f2010-12-16 12:04:54 +010077 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
Josef Bacikc3b2da32012-03-26 09:59:21 -040078 void (*update_time)(struct inode *, struct timespec *, int);
Al Virod9585272012-06-22 12:39:14 +040079 int (*atomic_open)(struct inode *, struct dentry *,
Al Viro30d904942012-06-22 12:40:19 +040080 struct file *, unsigned open_flag,
Al Viro6c9b1de2018-07-09 19:20:08 -040081 umode_t create_mode);
Christian Brauner011e2b72023-01-13 12:49:18 +010082 int (*tmpfile) (struct mnt_idmap *, struct inode *,
Miklos Szeredi863f1442022-09-24 07:00:00 +020083 struct file *, umode_t);
Christian Brauner8782a9a2023-01-13 12:49:21 +010084 int (*fileattr_set)(struct mnt_idmap *idmap,
Miklos Szeredi4c5b4792021-04-07 14:36:42 +020085 struct dentry *dentry, struct fileattr *fa);
86 int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
Christian Brauner77435322023-01-13 12:49:19 +010087 struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
Chuck Lever6faddda692023-06-30 13:48:49 -040088 struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
90locking rules:
Christoph Hellwigb83be6f2010-12-16 12:04:54 +010091 all may block
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -030092
Chuck Lever6faddda692023-06-30 13:48:49 -040093============== ==================================================
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -030094ops i_rwsem(inode)
Chuck Lever6faddda692023-06-30 13:48:49 -040095============== ==================================================
Sean Anderson965de0e2018-05-23 22:29:10 -040096lookup: shared
97create: exclusive
98link: exclusive (both)
99mknod: exclusive
100symlink: exclusive
101mkdir: exclusive
102unlink: exclusive (both)
103rmdir: exclusive (both)(see below)
104rename: exclusive (all) (see below)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105readlink: no
Al Viro6b255392015-11-17 10:20:54 -0500106get_link: no
Sean Anderson965de0e2018-05-23 22:29:10 -0400107setattr: exclusive
Nick Pigginb74c79e2011-01-07 17:49:58 +1100108permission: no (may not block if called in rcu-walk mode)
Christian Braunercac2f8b2022-09-22 17:17:00 +0200109get_inode_acl: no
Christian Brauner74203322022-09-22 17:17:01 +0200110get_acl: no
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111getattr: no
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112listxattr: no
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100113fiemap: no
Josef Bacikc3b2da32012-03-26 09:59:21 -0400114update_time: no
Jeff Laytonff467342019-10-30 06:46:54 -0400115atomic_open: shared (exclusive if O_CREAT is set in open flags)
Al Viro48bde8d2013-07-03 16:19:23 +0400116tmpfile: no
Miklos Szeredi4c5b4792021-04-07 14:36:42 +0200117fileattr_get: no or exclusive
118fileattr_set: exclusive
Chuck Lever6faddda692023-06-30 13:48:49 -0400119get_offset_ctx no
120============== ==================================================
Josef Bacikc3b2da32012-03-26 09:59:21 -0400121
Andreas Gruenbacher6c6ef9f2016-09-29 17:48:44 +0200122
Sean Anderson965de0e2018-05-23 22:29:10 -0400123 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
124 exclusive on victim.
Miklos Szeredi2773bf02016-09-27 11:03:58 +0200125 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300127See Documentation/filesystems/directory-locking.rst for more detailed discussion
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128of the locking scheme for directory operations.
129
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300130xattr_handler operations
131========================
132
133prototypes::
134
Andreas Gruenbacher6c6ef9f2016-09-29 17:48:44 +0200135 bool (*list)(struct dentry *dentry);
136 int (*get)(const struct xattr_handler *handler, struct dentry *dentry,
137 struct inode *inode, const char *name, void *buffer,
138 size_t size);
Christian Braunere65ce2a2021-01-21 14:19:27 +0100139 int (*set)(const struct xattr_handler *handler,
Christian Brauner39f60c12023-01-13 12:49:23 +0100140 struct mnt_idmap *idmap,
Christian Braunere65ce2a2021-01-21 14:19:27 +0100141 struct dentry *dentry, struct inode *inode, const char *name,
142 const void *buffer, size_t size, int flags);
Andreas Gruenbacher6c6ef9f2016-09-29 17:48:44 +0200143
144locking rules:
145 all may block
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300146
147===== ==============
148ops i_rwsem(inode)
149===== ==============
Andreas Gruenbacher6c6ef9f2016-09-29 17:48:44 +0200150list: no
151get: no
Sean Anderson965de0e2018-05-23 22:29:10 -0400152set: exclusive
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300153===== ==============
Andreas Gruenbacher6c6ef9f2016-09-29 17:48:44 +0200154
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300155super_operations
156================
157
158prototypes::
159
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 struct inode *(*alloc_inode)(struct super_block *sb);
Al Virofdb0da82019-04-10 14:43:44 -0400161 void (*free_inode)(struct inode *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 void (*destroy_inode)(struct inode *);
Christoph Hellwigaa385722011-05-27 06:53:02 -0400163 void (*dirty_inode) (struct inode *, int flags);
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100164 int (*write_inode) (struct inode *, struct writeback_control *wbc);
Al Viro336fb3b2010-06-08 00:37:12 -0400165 int (*drop_inode) (struct inode *);
166 void (*evict_inode) (struct inode *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 void (*put_super) (struct super_block *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 int (*sync_fs)(struct super_block *sb, int wait);
Takashi Satoc4be0c12009-01-09 16:40:58 -0800169 int (*freeze_fs) (struct super_block *);
170 int (*unfreeze_fs) (struct super_block *);
David Howells726c3342006-06-23 02:02:58 -0700171 int (*statfs) (struct dentry *, struct kstatfs *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 int (*remount_fs) (struct super_block *, int *, char *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 void (*umount_begin) (struct super_block *);
Al Viro34c80b12011-12-08 21:32:45 -0500174 int (*show_options)(struct seq_file *, struct dentry *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
176 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
177
178locking rules:
Al Viro336fb3b2010-06-08 00:37:12 -0400179 All may block [not true, see below]
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300180
181====================== ============ ========================
182ops s_umount note
183====================== ============ ========================
Christoph Hellwig7e325d32009-06-19 20:22:37 +0200184alloc_inode:
Al Virofdb0da82019-04-10 14:43:44 -0400185free_inode: called from RCU callback
Christoph Hellwig7e325d32009-06-19 20:22:37 +0200186destroy_inode:
Christoph Hellwigaa385722011-05-27 06:53:02 -0400187dirty_inode:
Christoph Hellwig7e325d32009-06-19 20:22:37 +0200188write_inode:
Dave Chinnerf283c862011-03-22 22:23:39 +1100189drop_inode: !!!inode->i_lock!!!
Al Viro336fb3b2010-06-08 00:37:12 -0400190evict_inode:
Christoph Hellwig7e325d32009-06-19 20:22:37 +0200191put_super: write
Christoph Hellwig7e325d32009-06-19 20:22:37 +0200192sync_fs: read
Valerie Aurora06fd5162012-06-12 16:20:48 +0200193freeze_fs: write
194unfreeze_fs: write
Al Viro336fb3b2010-06-08 00:37:12 -0400195statfs: maybe(read) (see below)
196remount_fs: write
Christoph Hellwig7e325d32009-06-19 20:22:37 +0200197umount_begin: no
198show_options: no (namespace_sem)
199quota_read: no (see below)
200quota_write: no (see below)
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300201====================== ============ ========================
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
Al Viro336fb3b2010-06-08 00:37:12 -0400203->statfs() has s_umount (shared) when called by ustat(2) (native or
204compat), but that's an accident of bad API; s_umount is used to pin
205the superblock down when we only have dev_t given us by userland to
206identify the superblock. Everything else (statfs(), fstatfs(), etc.)
207doesn't hold it when calling ->statfs() - superblock is pinned down
208by resolving the pathname passed to syscall.
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300209
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210->quota_read() and ->quota_write() functions are both guaranteed to
211be the only ones operating on the quota file by the quota code (via
212dqio_sem) (unless an admin really wants to screw up something and
213writes to quota files with quotas on). For other details about locking
214see also dquot_operations section.
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300215
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300216file_system_type
217================
218
219prototypes::
220
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100221 struct dentry *(*mount) (struct file_system_type *, int,
222 const char *, void *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 void (*kill_sb) (struct super_block *);
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300224
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225locking rules:
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300226
227======= =========
228ops may block
229======= =========
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100230mount yes
231kill_sb yes
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300232======= =========
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233
Al Viro1a102ff2011-03-16 09:07:58 -0400234->mount() returns ERR_PTR or the root dentry; its superblock should be locked
235on return.
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300236
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237->kill_sb() takes a write-locked superblock, does all shutdown work on it,
238unlocks and drops the reference.
239
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300240address_space_operations
241========================
242prototypes::
243
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 int (*writepage)(struct page *page, struct writeback_control *wbc);
Matthew Wilcox (Oracle)08830c82022-04-29 08:45:43 -0400245 int (*read_folio)(struct file *, struct folio *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 int (*writepages)(struct address_space *, struct writeback_control *);
Matthew Wilcox (Oracle)6f31a5a2022-02-09 20:22:00 +0000247 bool (*dirty_folio)(struct address_space *, struct folio *folio);
Matthew Wilcox (Oracle)8151b4c2020-06-01 21:46:44 -0700248 void (*readahead)(struct readahead_control *);
Nick Piggin4e02ed42008-10-29 14:00:55 -0700249 int (*write_begin)(struct file *, struct address_space *mapping,
Matthew Wilcox (Oracle)9d6b0cd2022-02-22 14:31:43 -0500250 loff_t pos, unsigned len,
Nick Piggin4e02ed42008-10-29 14:00:55 -0700251 struct page **pagep, void **fsdata);
252 int (*write_end)(struct file *, struct address_space *mapping,
253 loff_t pos, unsigned len, unsigned copied,
254 struct page *page, void *fsdata);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 sector_t (*bmap)(struct address_space *, sector_t);
Matthew Wilcox (Oracle)128d1f82022-02-09 20:21:32 +0000256 void (*invalidate_folio) (struct folio *, size_t start, size_t len);
Matthew Wilcox (Oracle)fa290002022-04-29 17:00:05 -0400257 bool (*release_folio)(struct folio *, gfp_t);
Matthew Wilcox (Oracle)d2329aa2022-05-01 07:35:31 -0400258 void (*free_folio)(struct folio *);
Christoph Hellwigc8b8e322016-04-07 08:51:58 -0700259 int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
Matthew Wilcox (Oracle)5490da42022-06-06 09:00:16 -0400260 int (*migrate_folio)(struct address_space *, struct folio *dst,
261 struct folio *src, enum migrate_mode);
Matthew Wilcox (Oracle)affa80e2022-02-09 20:21:52 +0000262 int (*launder_folio)(struct folio *);
Matthew Wilcox (Oracle)2e7e80f2022-02-09 20:21:27 +0000263 bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100264 int (*error_remove_page)(struct address_space *, struct page *);
NeilBrowncba738f2022-05-09 18:20:48 -0700265 int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
Mel Gorman62c230b2012-07-31 16:44:55 -0700266 int (*swap_deactivate)(struct file *);
NeilBrowncba738f2022-05-09 18:20:48 -0700267 int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
269locking rules:
Matthew Wilcox (Oracle)d2329aa2022-05-01 07:35:31 -0400270 All except dirty_folio and free_folio may block
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
Jan Kara730633f2021-01-28 19:19:45 +0100272====================== ======================== ========= ===============
Matthew Wilcox (Oracle)d2329aa2022-05-01 07:35:31 -0400273ops folio locked i_rwsem invalidate_lock
Jan Kara730633f2021-01-28 19:19:45 +0100274====================== ======================== ========= ===============
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100275writepage: yes, unlocks (see below)
Matthew Wilcox (Oracle)08830c82022-04-29 08:45:43 -0400276read_folio: yes, unlocks shared
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100277writepages:
Matthew Wilcox (Oracle)fa290002022-04-29 17:00:05 -0400278dirty_folio: maybe
Jan Kara730633f2021-01-28 19:19:45 +0100279readahead: yes, unlocks shared
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300280write_begin: locks the page exclusive
281write_end: yes, unlocks exclusive
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100282bmap:
Matthew Wilcox (Oracle)128d1f82022-02-09 20:21:32 +0000283invalidate_folio: yes exclusive
Matthew Wilcox (Oracle)fa290002022-04-29 17:00:05 -0400284release_folio: yes
Matthew Wilcox (Oracle)d2329aa2022-05-01 07:35:31 -0400285free_folio: yes
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100286direct_IO:
Matthew Wilcox (Oracle)5490da42022-06-06 09:00:16 -0400287migrate_folio: yes (both)
Matthew Wilcox (Oracle)affa80e2022-02-09 20:21:52 +0000288launder_folio: yes
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100289is_partially_uptodate: yes
290error_remove_page: yes
Mel Gorman62c230b2012-07-31 16:44:55 -0700291swap_activate: no
292swap_deactivate: no
NeilBrowncba738f2022-05-09 18:20:48 -0700293swap_rw: yes, unlocks
Randy Dunlap7882c552021-07-27 16:22:12 -0700294====================== ======================== ========= ===============
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295
Matthew Wilcox (Oracle)08830c82022-04-29 08:45:43 -0400296->write_begin(), ->write_end() and ->read_folio() may be called from
Matthew Wilcoxf4e6d842016-03-06 23:27:26 -0500297the request handler (/dev/loop).
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298
Matthew Wilcox (Oracle)08830c82022-04-29 08:45:43 -0400299->read_folio() unlocks the folio, either synchronously or via I/O
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300completion.
301
Matthew Wilcox (Oracle)08830c82022-04-29 08:45:43 -0400302->readahead() unlocks the folios that I/O is attempted on like ->read_folio().
Matthew Wilcox (Oracle)8151b4c2020-06-01 21:46:44 -0700303
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300304->writepage() is used for two purposes: for "memory cleansing" and for
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305"sync". These are quite different operations and the behaviour may differ
306depending upon the mode.
307
308If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
309it *must* start I/O against the page, even if that would involve
310blocking on in-progress I/O.
311
312If writepage is called for memory cleansing (sync_mode ==
313WBC_SYNC_NONE) then its role is to get as much writeout underway as
314possible. So writepage should try to avoid blocking against
315currently-in-progress I/O.
316
317If the filesystem is not called for "sync" and it determines that it
318would need to block against in-progress I/O to be able to start new I/O
319against the page the filesystem should redirty the page with
320redirty_page_for_writepage(), then unlock the page and return zero.
321This may also be done to avoid internal deadlocks, but rarely.
322
Robert P. J. Day3a4fa0a2007-10-19 23:10:43 +0200323If the filesystem is called for sync then it must wait on any
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324in-progress I/O and then start new I/O.
325
Nikita Danilov20546062005-05-01 08:58:37 -0700326The filesystem should unlock the page synchronously, before returning to the
327caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
328value. WRITEPAGE_ACTIVATE means that page cannot really be written out
329currently, and VM should stop calling ->writepage() on this page for some
330time. VM does this by moving page to the head of the active list, hence the
331name.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
333Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
334and return zero, writepage *must* run set_page_writeback() against the page,
335followed by unlocking it. Once set_page_writeback() has been run against the
336page, write I/O can be submitted and the write I/O completion handler must run
337end_page_writeback() once the I/O is complete. If no I/O is submitted, the
338filesystem must run end_page_writeback() against the page before returning from
339writepage.
340
341That is: after 2.5.12, pages which are under writeout are *not* locked. Note,
342if the filesystem needs the page to be locked during writeout, that is ok, too,
343the page is allowed to be unlocked at any point in time between the calls to
344set_page_writeback() and end_page_writeback().
345
346Note, failure to run either redirty_page_for_writepage() or the combination of
347set_page_writeback()/end_page_writeback() on a page submitted to writepage
348will leave the page itself marked clean but it will be tagged as dirty in the
349radix tree. This incoherency can lead to all sorts of hard-to-debug problems
350in the filesystem like having dirty inodes at umount and losing written data.
351
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300352->writepages() is used for periodic writeback and for syscall-initiated
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353sync operations. The address_space should start I/O against at least
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300354``*nr_to_write`` pages. ``*nr_to_write`` must be decremented for each page
355which is written. The address_space implementation may write more (or less)
356pages than ``*nr_to_write`` asks for, but it should try to be reasonably close.
357If nr_to_write is NULL, all dirty pages must be written.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
359writepages should _only_ write pages which are present on
360mapping->io_pages.
361
Matthew Wilcox (Oracle)6f31a5a2022-02-09 20:22:00 +0000362->dirty_folio() is called from various places in the kernel when
363the target folio is marked as needing writeback. The folio cannot be
364truncated because either the caller holds the folio lock, or the caller
365has found the folio while holding the page table lock which will block
366truncation.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300368->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100369filesystems and by the swapper. The latter will eventually go away. Please,
370keep it that way and don't breed new callers.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
Matthew Wilcox (Oracle)128d1f82022-02-09 20:21:32 +0000372->invalidate_folio() is called when the filesystem must attempt to drop
Lukas Czernerd47992f2013-05-21 23:17:23 -0400373some or all of the buffers from the page when it is being truncated. It
Matthew Wilcox (Oracle)128d1f82022-02-09 20:21:32 +0000374returns zero on success. The filesystem must exclusively acquire
375invalidate_lock before invalidating page cache in truncate / hole punch
376path (and thus calling into ->invalidate_folio) to block races between page
377cache invalidation and page cache filling functions (fault, read, ...).
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
Matthew Wilcox (Oracle)32b29cc2023-06-02 16:33:20 -0400379->release_folio() is called when the MM wants to make a change to the
380folio that would invalidate the filesystem's private data. For example,
381it may be about to be removed from the address_space or split. The folio
382is locked and not under writeback. It may be dirty. The gfp parameter
383is not usually used for allocation, but rather to indicate what the
384filesystem may do to attempt to free the private data. The filesystem may
385return false to indicate that the folio's private data cannot be freed.
386If it returns true, it should have already removed the private data from
387the folio. If a filesystem does not provide a ->release_folio method,
388the pagecache will assume that private data is buffer_heads and call
389try_to_free_buffers().
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
Matthew Wilcox (Oracle)d2329aa2022-05-01 07:35:31 -0400391->free_folio() is called when the kernel has dropped the folio
Linus Torvalds6072d132010-12-01 13:35:19 -0500392from the page cache.
393
Matthew Wilcox (Oracle)affa80e2022-02-09 20:21:52 +0000394->launder_folio() may be called prior to releasing a folio if
395it is still found to be dirty. It returns zero if the folio was successfully
396cleaned, or an error value if not. Note that in order to prevent the folio
Trond Myklebuste3db7692007-01-10 23:15:39 -0800397getting mapped back in and redirtied, it needs to be kept locked
398across the entire operation.
399
NeilBrowncba738f2022-05-09 18:20:48 -0700400->swap_activate() will be called to prepare the given file for swap. It
401should perform any validation and preparation necessary to ensure that
402writes can be performed with minimal memory allocation. It should call
403add_swap_extent(), or the helper iomap_swapfile_activate(), and return
404the number of extents added. If IO should be submitted through
405->swap_rw(), it should set SWP_FS_OPS, otherwise IO will be submitted
406directly to the block device ``sis->bdev``.
Mel Gorman62c230b2012-07-31 16:44:55 -0700407
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300408->swap_deactivate() will be called in the sys_swapoff()
Mel Gorman62c230b2012-07-31 16:44:55 -0700409path after ->swap_activate() returned success.
410
NeilBrowncba738f2022-05-09 18:20:48 -0700411->swap_rw will be called for swap IO if SWP_FS_OPS was set by ->swap_activate().
412
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300413file_lock_operations
414====================
415
416prototypes::
417
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
419 void (*fl_release_private)(struct file_lock *);
420
421
422locking rules:
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300423
424=================== ============= =========
425ops inode->i_lock may block
426=================== ============= =========
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100427fl_copy_lock: yes no
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300428fl_release_private: maybe maybe[1]_
429=================== ============= =========
Jeff Layton2ece1732014-08-12 10:38:07 -0400430
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300431.. [1]:
432 ->fl_release_private for flock or POSIX locks is currently allowed
433 to block. Leases however can still be freed while the i_lock is held and
434 so fl_release_private called on a lease should not block.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300436lock_manager_operations
437=======================
438
439prototypes::
440
J. Bruce Fields8fb47a42011-07-20 20:21:59 -0400441 void (*lm_notify)(struct file_lock *); /* unblock callback */
442 int (*lm_grant)(struct file_lock *, struct file_lock *, int);
J. Bruce Fields8fb47a42011-07-20 20:21:59 -0400443 void (*lm_break)(struct file_lock *); /* break_lease callback */
444 int (*lm_change)(struct file_lock **, int);
J. Bruce Fields28df3d12017-07-28 16:35:15 -0400445 bool (*lm_breaker_owns_lease)(struct file_lock *);
Dai Ngo2443da22022-05-02 14:19:25 -0700446 bool (*lm_lock_expirable)(struct file_lock *);
447 void (*lm_expire_lock)(void);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
449locking rules:
Jeff Layton1c8c6012013-06-21 08:58:15 -0400450
Randy Dunlap6cbef2a2020-06-14 20:22:19 -0700451====================== ============= ================= =========
Dai Ngo9d664772022-02-12 10:12:52 -0800452ops flc_lock blocked_lock_lock may block
Randy Dunlap6cbef2a2020-06-14 20:22:19 -0700453====================== ============= ================= =========
Dai Ngo9d664772022-02-12 10:12:52 -0800454lm_notify: no yes no
Jeff Layton7b2296a2013-06-21 08:58:20 -0400455lm_grant: no no no
456lm_break: yes no no
457lm_change yes no no
Dai Ngo9d664772022-02-12 10:12:52 -0800458lm_breaker_owns_lease: yes no no
Dai Ngo2443da22022-05-02 14:19:25 -0700459lm_lock_expirable yes no no
460lm_expire_lock no no yes
Randy Dunlap6cbef2a2020-06-14 20:22:19 -0700461====================== ============= ================= =========
Jeff Layton1c8c6012013-06-21 08:58:15 -0400462
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300463buffer_head
464===========
465
466prototypes::
467
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 void (*b_end_io)(struct buffer_head *bh, int uptodate);
469
470locking rules:
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300471
472called from interrupts. In other words, extreme care is needed here.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473bh is locked, but that's all warranties we have here. Currently only RAID1,
474highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
475call this method upon the IO completion.
476
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300477block_device_operations
478=======================
479prototypes::
480
Christoph Hellwige1455d12010-10-06 10:46:53 +0200481 int (*open) (struct block_device *, fmode_t);
482 int (*release) (struct gendisk *, fmode_t);
483 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
484 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
Dan Williams7a9eb202016-06-03 18:06:47 -0700485 int (*direct_access) (struct block_device *, sector_t, void **,
Ross Zwislere2e05392015-08-18 13:55:41 -0600486 unsigned long *);
Christoph Hellwige1455d12010-10-06 10:46:53 +0200487 void (*unlock_native_capacity) (struct gendisk *);
Christoph Hellwige1455d12010-10-06 10:46:53 +0200488 int (*getgeo)(struct block_device *, struct hd_geometry *);
489 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
491locking rules:
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300492
493======================= ===================
Christoph Hellwiga8698702021-05-25 08:12:56 +0200494ops open_mutex
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300495======================= ===================
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100496open: yes
497release: yes
498ioctl: no
499compat_ioctl: no
500direct_access: no
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100501unlock_native_capacity: no
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100502getgeo: no
503swap_slot_free_notify: no (see below)
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300504======================= ===================
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
Christoph Hellwige1455d12010-10-06 10:46:53 +0200506swap_slot_free_notify is called with swap_lock and sometimes the page lock
507held.
508
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300510file_operations
511===============
512
513prototypes::
514
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 loff_t (*llseek) (struct file *, loff_t, int);
516 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
Al Viro293bc982014-02-11 18:37:41 -0500518 ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
519 ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
Jan Karac625b4c2021-05-10 19:13:53 +0200520 int (*iopoll) (struct kiocb *kiocb, bool spin);
Al Viro2233f312013-05-22 21:44:23 -0400521 int (*iterate) (struct file *, struct dir_context *);
Sean Anderson965de0e2018-05-23 22:29:10 -0400522 int (*iterate_shared) (struct file *, struct dir_context *);
Christoph Hellwig6e8b7042018-01-02 22:50:45 +0100523 __poll_t (*poll) (struct file *, struct poll_table_struct *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
525 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
526 int (*mmap) (struct file *, struct vm_area_struct *);
527 int (*open) (struct inode *, struct file *);
528 int (*flush) (struct file *);
529 int (*release) (struct inode *, struct file *);
Josef Bacik02c24a82011-07-16 20:44:56 -0400530 int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 int (*fasync) (int, struct file *, int);
532 int (*lock) (struct file *, int, struct file_lock *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 unsigned long (*get_unmapped_area)(struct file *, unsigned long,
534 unsigned long, unsigned long, unsigned long);
535 int (*check_flags)(int);
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100536 int (*flock) (struct file *, int, struct file_lock *);
537 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
538 size_t, unsigned int);
539 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
540 size_t, unsigned int);
Jeff Laytone6f5c782014-08-22 10:40:25 -0400541 int (*setlease)(struct file *, long, struct file_lock **, void **);
Christoph Hellwig2fe17c12011-01-14 13:07:43 +0100542 long (*fallocate)(struct file *, int, loff_t, loff_t);
Jan Karac625b4c2021-05-10 19:13:53 +0200543 void (*show_fdinfo)(struct seq_file *m, struct file *f);
544 unsigned (*mmap_capabilities)(struct file *);
545 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
546 loff_t, size_t, unsigned int);
547 loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
548 struct file *file_out, loff_t pos_out,
549 loff_t len, unsigned int remap_flags);
550 int (*fadvise)(struct file *, loff_t, loff_t, int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551
552locking rules:
Linus Torvaldsa11e1d42018-06-28 09:43:44 -0700553 All may block.
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100554
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555->llseek() locking has moved from llseek to the individual llseek
556implementations. If your fs is not using generic_file_llseek, you
557need to acquire and release the appropriate locks in your ->llseek().
558For many filesystems, it is probably safe to acquire the inode
Jan Blunck866707f2010-05-26 14:44:54 -0700559mutex or just to use i_size_read() instead.
560Note: this does not protect the file->f_pos against concurrent modifications
561since this is something the userspace has to take care about.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
Linus Torvalds3e327152023-08-05 12:25:01 -0700563->iterate_shared() is called with i_rwsem held for reading, and with the
564file f_pos_lock held exclusively
Sean Anderson965de0e2018-05-23 22:29:10 -0400565
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100566->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
567Most instances call fasync_helper(), which does that maintenance, so it's
568not normally something one needs to worry about. Return values > 0 will be
569mapped to zero in the VFS layer.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571->readdir() and ->ioctl() on directories must be changed. Ideally we would
572move ->readdir() to inode_operations and use a separate method for directory
573->ioctl() or kill the latter completely. One of the problems is that for
574anything that resembles union-mount we won't have a struct file for all
575components. And there are other reasons why the current interface is a mess...
576
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577->read on directories probably must go away - we should just enforce -EISDIR
578in sys_read() and friends.
579
Jeff Laytonf82b4b62014-08-22 18:50:48 -0400580->setlease operations should call generic_setlease() before or after setting
581the lease within the individual filesystem to record the result of the
582operation
583
Jan Kara730633f2021-01-28 19:19:45 +0100584->fallocate implementation must be really careful to maintain page cache
585consistency when punching holes or performing other operations that invalidate
586page cache contents. Usually the filesystem needs to call
587truncate_inode_pages_range() to invalidate relevant range of the page cache.
588However the filesystem usually also needs to update its internal (and on disk)
589view of file offset -> disk block mapping. Until this update is finished, the
590filesystem needs to block page faults and reads from reloading now-stale page
591cache contents from the disk. Since VFS acquires mapping->invalidate_lock in
592shared mode when loading pages from disk (filemap_fault(), filemap_read(),
593readahead paths), the fallocate implementation must take the invalidate_lock to
594prevent reloading.
595
596->copy_file_range and ->remap_file_range implementations need to serialize
597against modifications of file data while the operation is running. For
598blocking changes through write(2) and similar operations inode->i_rwsem can be
599used. To block changes to file contents via a memory mapping during the
600operation, the filesystem must take mapping->invalidate_lock to coordinate
601with ->page_mkwrite.
602
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300603dquot_operations
604================
605
606prototypes::
607
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 int (*write_dquot) (struct dquot *);
609 int (*acquire_dquot) (struct dquot *);
610 int (*release_dquot) (struct dquot *);
611 int (*mark_dirty) (struct dquot *);
612 int (*write_info) (struct super_block *, int);
613
614These operations are intended to be more or less wrapping functions that ensure
615a proper locking wrt the filesystem and call the generic quota operations.
616
617What filesystem should expect from the generic quota functions:
618
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300619============== ============ =========================
620ops FS recursion Held locks when called
621============== ============ =========================
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622write_dquot: yes dqonoff_sem or dqptr_sem
623acquire_dquot: yes dqonoff_sem or dqptr_sem
624release_dquot: yes dqonoff_sem or dqptr_sem
625mark_dirty: no -
626write_info: yes dqonoff_sem
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300627============== ============ =========================
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628
629FS recursion means calling ->quota_read() and ->quota_write() from superblock
630operations.
631
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632More details about quota locking can be found in fs/dquot.c.
633
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300634vm_operations_struct
635====================
636
637prototypes::
638
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 void (*open)(struct vm_area_struct*);
640 void (*close)(struct vm_area_struct*);
Souptick Joarderfe3136f2018-07-22 18:31:34 +0530641 vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *);
642 vm_fault_t (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
643 vm_fault_t (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
Rik van Riel28b2ee22008-07-23 21:27:05 -0700644 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
646locking rules:
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300647
Randy Dunlap6cbef2a2020-06-14 20:22:19 -0700648============= ========= ===========================
Michel Lespinassec1e8d7c2020-06-08 21:33:54 -0700649ops mmap_lock PageLocked(page)
Randy Dunlap6cbef2a2020-06-14 20:22:19 -0700650============= ========= ===========================
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100651open: yes
652close: yes
653fault: yes can return with page locked
Matthew Wilcox (Oracle)58ef47e2023-03-27 18:45:15 +0100654map_pages: read
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100655page_mkwrite: yes can return with page locked
Boaz Harroshdd906182015-04-15 16:15:11 -0700656pfn_mkwrite: yes
Christoph Hellwigb83be6f2010-12-16 12:04:54 +0100657access: yes
Randy Dunlap6cbef2a2020-06-14 20:22:19 -0700658============= ========= ===========================
Mark Fashehed2f2f92007-07-19 01:47:01 -0700659
Jan Kara730633f2021-01-28 19:19:45 +0100660->fault() is called when a previously not present pte is about to be faulted
661in. The filesystem must find and return the page associated with the passed in
662"pgoff" in the vm_fault structure. If it is possible that the page may be
663truncated and/or invalidated, then the filesystem must lock invalidate_lock,
664then ensure the page is not already truncated (invalidate_lock will block
Nick Pigginb827e492009-04-30 15:08:16 -0700665subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
666locked. The VM will unlock the page.
667
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300668->map_pages() is called when VM asks to map easy accessible pages.
Kirill A. Shutemovbae473a2016-07-26 15:25:20 -0700669Filesystem should find and map pages associated with offsets from "start_pgoff"
Matthew Wilcox (Oracle)58ef47e2023-03-27 18:45:15 +0100670till "end_pgoff". ->map_pages() is called with the RCU lock held and must
Kirill A. Shutemov8c6e50b2014-04-07 15:37:18 -0700671not block. If it's not possible to reach a page without blocking,
672filesystem should skip it. Filesystem should use do_set_pte() to setup
Kirill A. Shutemovbae473a2016-07-26 15:25:20 -0700673page table entry. Pointer to entry associated with the page is passed in
Jan Kara82b0f8c2016-12-14 15:06:58 -0800674"pte" field in vm_fault structure. Pointers to entries for other offsets
Kirill A. Shutemovbae473a2016-07-26 15:25:20 -0700675should be calculated relative to "pte".
Kirill A. Shutemov8c6e50b2014-04-07 15:37:18 -0700676
Jan Kara730633f2021-01-28 19:19:45 +0100677->page_mkwrite() is called when a previously read-only pte is about to become
678writeable. The filesystem again must ensure that there are no
679truncate/invalidate races or races with operations such as ->remap_file_range
680or ->copy_file_range, and then return with the page locked. Usually
681mapping->invalidate_lock is suitable for proper serialization. If the page has
682been truncated, the filesystem should not look up a new page like the ->fault()
683handler, but simply return with VM_FAULT_NOPAGE, which will cause the VM to
684retry the fault.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300686->pfn_mkwrite() is the same as page_mkwrite but when the pte is
Boaz Harroshdd906182015-04-15 16:15:11 -0700687VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
688VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
689after this call is to make the pte read-write, unless pfn_mkwrite returns
690an error.
691
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300692->access() is called when get_user_pages() fails in
Stefan Weil507da6a2013-12-05 20:34:05 +0100693access_process_vm(), typically used to debug a process through
Rik van Riel28b2ee22008-07-23 21:27:05 -0700694/proc/pid/mem or ptrace. This function is needed only for
695VM_IO | VM_PFNMAP VMAs.
696
Mauro Carvalho Chehabec23eb52019-07-26 09:51:27 -0300697--------------------------------------------------------------------------------
698
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 Dubious stuff
700
701(if you break something or notice that it is broken and do not fix it yourself
702- at least put it here)