Blame - mm/internal.h - linux

blob: b61034bd50f5f88a2e7092d217280bdd57862707 [file] [log] [blame]

Thomas Gleixner	2874c5f	2019-05-27 08:55:01 +0200	[diff] [blame]	1	/* SPDX-License-Identifier: GPL-2.0-or-later */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/* internal.h: mm/ internal definitions
				3	*
				4	* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
				5	* Written by David Howells (dhowells@redhat.com)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	6	*/
Nick Piggin	0f8053a	2006-03-22 00:08:33 -0800	[diff] [blame]	7	#ifndef __MM_INTERNAL_H
				8	#define __MM_INTERNAL_H
				9
Fabian Frederick	29f175d	2014-04-07 15:37:55 -0700	[diff] [blame]	10	#include <linux/fs.h>
Nick Piggin	0f8053a	2006-03-22 00:08:33 -0800	[diff] [blame]	11	#include <linux/mm.h>
Kirill A. Shutemov	e9b61f1	2016-01-15 16:54:10 -0800	[diff] [blame]	12	#include <linux/pagemap.h>
Matthew Wilcox (Oracle)	2aff7a4	2022-02-03 11:40:17 -0500	[diff] [blame]	13	#include <linux/rmap.h>
Vlastimil Babka	edf14cd	2016-03-15 14:55:56 -0700	[diff] [blame]	14	#include <linux/tracepoint-defs.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	15
Matthew Wilcox (Oracle)	0e499ed	2020-09-01 23:17:50 -0400	[diff] [blame]	16	struct folio_batch;
				17
Mel Gorman	dd56b04	2015-11-06 16:28:43 -0800	[diff] [blame]	18	/*
				19	* The set of flags that only affect watermark checking and reclaim
				20	* behaviour. This is used by the MM to obey the caller constraints
				21	* about IO, FS and watermark checking while ignoring placement
				22	* hints such as HIGHMEM usage.
				23	*/
				24	#define GFP_RECLAIM_MASK (__GFP_RECLAIM\|__GFP_HIGH\|__GFP_IO\|__GFP_FS\|\
Michal Hocko	dcda9b0	2017-07-12 14:36:45 -0700	[diff] [blame]	25	__GFP_NOWARN\|__GFP_RETRY_MAYFAIL\|__GFP_NOFAIL\|\
Mel Gorman	e838a45	2016-06-24 14:49:37 -0700	[diff] [blame]	26	__GFP_NORETRY\|__GFP_MEMALLOC\|__GFP_NOMEMALLOC\|\
NeilBrown	2973d82	2023-01-13 11:12:17 +0000	[diff] [blame]	27	__GFP_NOLOCKDEP)
Mel Gorman	dd56b04	2015-11-06 16:28:43 -0800	[diff] [blame]	28
				29	/* The GFP flags allowed during early boot */
				30	#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM\|__GFP_IO\|__GFP_FS))
				31
				32	/* Control allocation cpuset and node placement constraints */
				33	#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL\|__GFP_THISNODE)
				34
				35	/* Do not use these with a slab allocator */
				36	#define GFP_SLAB_BUG_MASK (__GFP_DMA32\|__GFP_HIGHMEM\|~__GFP_BITS_MASK)
				37
Qi Zheng	3f913fc	2022-05-19 14:08:55 -0700	[diff] [blame]	38	/*
				39	* Different from WARN_ON_ONCE(), no warning will be issued
				40	* when we specify __GFP_NOWARN.
				41	*/
				42	#define WARN_ON_ONCE_GFP(cond, gfp) ({ \
				43	static bool __section(".data.once") __warned; \
				44	int __ret_warn_once = !!(cond); \
				45	\
				46	if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
				47	__warned = true; \
				48	WARN_ON(1); \
				49	} \
				50	unlikely(__ret_warn_once); \
				51	})
				52
Nicholas Piggin	6290602	2016-12-25 13:00:30 +1000	[diff] [blame]	53	void page_writeback_init(void);
				54
Matthew Wilcox (Oracle)	eec2042	2023-01-11 14:28:48 +0000	[diff] [blame]	55	/*
				56	* If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages,
				57	* its nr_pages_mapped would be 0x400000: choose the COMPOUND_MAPPED bit
				58	* above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently
				59	* leaves nr_pages_mapped at 0, but avoid surprise if it participates later.
				60	*/
				61	#define COMPOUND_MAPPED 0x800000
				62	#define FOLIO_PAGES_MAPPED (COMPOUND_MAPPED - 1)
				63
				64	/*
Kefeng Wang	1279aa0	2023-06-30 14:22:53 +0800	[diff] [blame]	65	* Flags passed to __show_mem() and show_free_areas() to suppress output in
				66	* various contexts.
				67	*/
				68	#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
				69
				70	/*
Matthew Wilcox (Oracle)	eec2042	2023-01-11 14:28:48 +0000	[diff] [blame]	71	* How many individual pages have an elevated _mapcount. Excludes
				72	* the folio's entire_mapcount.
				73	*/
				74	static inline int folio_nr_pages_mapped(struct folio *folio)
				75	{
				76	return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;
				77	}
				78
Matthew Wilcox (Oracle)	6460100	2021-05-07 11:17:34 -0400	[diff] [blame]	79	static inline void folio_raw_mapping(struct folio folio)
				80	{
				81	unsigned long mapping = (unsigned long)folio->mapping;
				82
				83	return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
				84	}
				85
Linus Torvalds	512b7931	2021-11-06 14:08:17 -0700	[diff] [blame]	86	void __acct_reclaim_writeback(pg_data_t pgdat, struct folio folio,
				87	int nr_throttled);
				88	static inline void acct_reclaim_writeback(struct folio *folio)
				89	{
				90	pg_data_t *pgdat = folio_pgdat(folio);
Mel Gorman	8cd7c58	2021-11-05 13:42:25 -0700	[diff] [blame]	91	int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled);
				92
				93	if (nr_throttled)
Linus Torvalds	512b7931	2021-11-06 14:08:17 -0700	[diff] [blame]	94	__acct_reclaim_writeback(pgdat, folio, nr_throttled);
Mel Gorman	8cd7c58	2021-11-05 13:42:25 -0700	[diff] [blame]	95	}
				96
Mel Gorman	d818fca	2021-11-05 13:42:29 -0700	[diff] [blame]	97	static inline void wake_throttle_isolated(pg_data_t *pgdat)
				98	{
				99	wait_queue_head_t *wqh;
				100
				101	wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
				102	if (waitqueue_active(wqh))
				103	wake_up(wqh);
				104	}
				105
Souptick Joarder	2b74030	2018-08-23 17:01:36 -0700	[diff] [blame]	106	vm_fault_t do_swap_page(struct vm_fault *vmf);
Matthew Wilcox (Oracle)	575ced1	2020-12-08 01:25:39 -0500	[diff] [blame]	107	void folio_rotate_reclaimable(struct folio *folio);
Matthew Wilcox (Oracle)	2580d55	2023-10-04 17:53:17 +0100	[diff] [blame]	108	bool __folio_end_writeback(struct folio *folio);
Matthew Wilcox (Oracle)	261b684	2022-02-13 16:40:24 -0500	[diff] [blame]	109	void deactivate_file_folio(struct folio *folio);
Yu Zhao	018ee47	2022-09-18 02:00:04 -0600	[diff] [blame]	110	void folio_activate(struct folio *folio);
Ebru Akagunduz	8a966ed	2016-07-26 15:25:03 -0700	[diff] [blame]	111
Liam R. Howlett	fd89259	2023-07-24 14:31:45 -0400	[diff] [blame]	112	void free_pgtables(struct mmu_gather tlb, struct ma_state mas,
Liam R. Howlett	763ecb0	2022-09-06 19:49:06 +0000	[diff] [blame]	113	struct vm_area_struct *start_vma, unsigned long floor,
Suren Baghdasaryan	98e51a2	2023-02-27 09:36:18 -0800	[diff] [blame]	114	unsigned long ceiling, bool mm_wr_locked);
Qi Zheng	03c4f20	2021-11-05 13:38:38 -0700	[diff] [blame]	115	void pmd_install(struct mm_struct mm, pmd_t pmd, pgtable_t *pte);
Jan Beulich	42b7772	2008-07-23 21:27:10 -0700	[diff] [blame]	116
Matthew Wilcox (Oracle)	3506659	2021-11-28 14:53:35 -0500	[diff] [blame]	117	struct zap_details;
Michal Hocko	aac4536	2016-03-25 14:20:24 -0700	[diff] [blame]	118	void unmap_page_range(struct mmu_gather *tlb,
				119	struct vm_area_struct *vma,
				120	unsigned long addr, unsigned long end,
				121	struct zap_details *details);
				122
Matthew Wilcox (Oracle)	56a4d67	2021-07-24 23:26:14 -0400	[diff] [blame]	123	void page_cache_ra_order(struct readahead_control , struct file_ra_state ,
				124	unsigned int order);
Matthew Wilcox (Oracle)	fcd9ae4	2021-04-07 21:18:55 +0100	[diff] [blame]	125	void force_page_cache_ra(struct readahead_control *, unsigned long nr);
David Howells	7b3df3b	2020-10-15 20:06:24 -0700	[diff] [blame]	126	static inline void force_page_cache_readahead(struct address_space *mapping,
				127	struct file *file, pgoff_t index, unsigned long nr_to_read)
				128	{
Matthew Wilcox (Oracle)	fcd9ae4	2021-04-07 21:18:55 +0100	[diff] [blame]	129	DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index);
				130	force_page_cache_ra(&ractl, nr_to_read);
David Howells	7b3df3b	2020-10-15 20:06:24 -0700	[diff] [blame]	131	}
Fabian Frederick	29f175d	2014-04-07 15:37:55 -0700	[diff] [blame]	132
Vishal Moola (Oracle)	3392ca12	2022-10-17 09:17:59 -0700	[diff] [blame]	133	unsigned find_lock_entries(struct address_space mapping, pgoff_t start,
Matthew Wilcox (Oracle)	51dcbda	2021-12-07 14:15:07 -0500	[diff] [blame]	134	pgoff_t end, struct folio_batch fbatch, pgoff_t indices);
Vishal Moola (Oracle)	9fb6bee	2022-10-17 09:18:00 -0700	[diff] [blame]	135	unsigned find_get_entries(struct address_space mapping, pgoff_t start,
Matthew Wilcox (Oracle)	0e499ed	2020-09-01 23:17:50 -0400	[diff] [blame]	136	pgoff_t end, struct folio_batch fbatch, pgoff_t indices);
Matthew Wilcox (Oracle)	78f4266	2021-07-28 15:52:34 -0400	[diff] [blame]	137	void filemap_free_folio(struct address_space mapping, struct folio folio);
Matthew Wilcox (Oracle)	1e84a3d	2021-12-02 16:01:55 -0500	[diff] [blame]	138	int truncate_inode_folio(struct address_space mapping, struct folio folio);
Matthew Wilcox (Oracle)	b9a8a41	2020-05-27 17:59:22 -0400	[diff] [blame]	139	bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
				140	loff_t end);
Matthew Wilcox (Oracle)	d6c75dc	2022-02-13 15:22:28 -0500	[diff] [blame]	141	long invalidate_inode_page(struct page *page);
Matthew Wilcox (Oracle)	1a0fc81	2023-06-21 17:45:55 +0100	[diff] [blame]	142	unsigned long mapping_try_invalidate(struct address_space *mapping,
				143	pgoff_t start, pgoff_t end, unsigned long *nr_failed);
Matthew Wilcox (Oracle)	5c211ba	2021-02-25 17:15:56 -0800	[diff] [blame]	144
Yang Shi	1eb6234	2020-04-01 21:06:20 -0700	[diff] [blame]	145	/**
Matthew Wilcox (Oracle)	3eed3ef	2021-05-14 15:04:28 -0400	[diff] [blame]	146	* folio_evictable - Test whether a folio is evictable.
				147	* @folio: The folio to test.
Yang Shi	1eb6234	2020-04-01 21:06:20 -0700	[diff] [blame]	148	*
Matthew Wilcox (Oracle)	3eed3ef	2021-05-14 15:04:28 -0400	[diff] [blame]	149	* Test whether @folio is evictable -- i.e., should be placed on
				150	* active/inactive lists vs unevictable list.
Yang Shi	1eb6234	2020-04-01 21:06:20 -0700	[diff] [blame]	151	*
Matthew Wilcox (Oracle)	3eed3ef	2021-05-14 15:04:28 -0400	[diff] [blame]	152	* Reasons folio might not be evictable:
				153	* 1. folio's mapping marked unevictable
				154	* 2. One of the pages in the folio is part of an mlocked VMA
Yang Shi	1eb6234	2020-04-01 21:06:20 -0700	[diff] [blame]	155	*/
Matthew Wilcox (Oracle)	3eed3ef	2021-05-14 15:04:28 -0400	[diff] [blame]	156	static inline bool folio_evictable(struct folio *folio)
				157	{
				158	bool ret;
				159
				160	/* Prevent address_space of inode and swap cache from being freed */
				161	rcu_read_lock();
				162	ret = !mapping_unevictable(folio_mapping(folio)) &&
				163	!folio_test_mlocked(folio);
				164	rcu_read_unlock();
				165	return ret;
				166	}
				167
Nick Piggin	7835e98	2006-03-22 00:08:40 -0800	[diff] [blame]	168	/*
Joonsoo Kim	0139aa7	2016-05-19 17:10:49 -0700	[diff] [blame]	169	* Turn a non-refcounted page (->_refcount == 0) into refcounted with
Nick Piggin	7835e98	2006-03-22 00:08:40 -0800	[diff] [blame]	170	* a count of one.
				171	*/
				172	static inline void set_page_refcounted(struct page *page)
				173	{
Sasha Levin	309381fea	2014-01-23 15:52:54 -0800	[diff] [blame]	174	VM_BUG_ON_PAGE(PageTail(page), page);
Joonsoo Kim	fe896d1	2016-03-17 14:19:26 -0700	[diff] [blame]	175	VM_BUG_ON_PAGE(page_ref_count(page), page);
Nick Piggin	77a8a78	2006-01-06 00:10:57 -0800	[diff] [blame]	176	set_page_count(page, 1);
Nick Piggin	77a8a78	2006-01-06 00:10:57 -0800	[diff] [blame]	177	}
				178
David Howells	0201ebf	2023-06-28 11:48:51 +0100	[diff] [blame]	179	/*
				180	* Return true if a folio needs ->release_folio() calling upon it.
				181	*/
				182	static inline bool folio_needs_release(struct folio *folio)
				183	{
David Howells	b4fa966	2023-06-28 11:48:52 +0100	[diff] [blame]	184	struct address_space *mapping = folio_mapping(folio);
				185
				186	return folio_has_private(folio) \|\|
				187	(mapping && mapping_release_always(mapping));
David Howells	0201ebf	2023-06-28 11:48:51 +0100	[diff] [blame]	188	}
				189
Hugh Dickins	03f6462	2009-09-21 17:03:35 -0700	[diff] [blame]	190	extern unsigned long highest_memmap_pfn;
				191
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	192	/*
Johannes Weiner	c73322d	2017-05-03 14:51:51 -0700	[diff] [blame]	193	* Maximum number of reclaim retries without progress before the OOM
				194	* killer is consider the only way forward.
				195	*/
				196	#define MAX_RECLAIM_RETRIES 16
				197
				198	/*
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	199	* in mm/vmscan.c:
				200	*/
Baolin Wang	f7f9c00	2023-02-15 18:39:35 +0800	[diff] [blame]	201	bool isolate_lru_page(struct page *page);
Baolin Wang	be2d575	2023-02-15 18:39:34 +0800	[diff] [blame]	202	bool folio_isolate_lru(struct folio *folio);
Matthew Wilcox (Oracle)	ca6d60f3	2022-01-21 08:41:46 -0500	[diff] [blame]	203	void putback_lru_page(struct page *page);
				204	void folio_putback_lru(struct folio *folio);
Mel Gorman	c3f4a9a	2021-11-05 13:42:42 -0700	[diff] [blame]	205	extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
Nick Piggin	62695a8	2008-10-18 20:26:09 -0700	[diff] [blame]	206
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	207	/*
Bob Liu	6219049	2012-12-11 16:00:37 -0800	[diff] [blame]	208	* in mm/rmap.c:
				209	*/
Zach O'Keefe	5072280	2022-07-06 16:59:26 -0700	[diff] [blame]	210	pmd_t mm_find_pmd(struct mm_struct mm, unsigned long address);
Bob Liu	6219049	2012-12-11 16:00:37 -0800	[diff] [blame]	211
				212	/*
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	213	* in mm/page_alloc.c
				214	*/
Mike Rapoport (IBM)	eb8589b	2023-03-21 19:05:10 +0200	[diff] [blame]	215	#define K(x) ((x) << (PAGE_SHIFT-10))
Joonsoo Kim	3c60509	2014-11-13 15:19:21 -0800	[diff] [blame]	216
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	217	extern char * const zone_names[MAX_NR_ZONES];
				218
Mike Rapoport (IBM)	f2fc4b4	2023-03-21 19:05:08 +0200	[diff] [blame]	219	/* perform sanity checks on struct pages being allocated or freed */
				220	DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
				221
Kefeng Wang	e95d372	2023-05-16 14:38:20 +0800	[diff] [blame]	222	extern int min_free_kbytes;
				223
				224	void setup_per_zone_wmarks(void);
				225	void calculate_min_free_kbytes(void);
				226	int __meminit init_per_zone_wmark_min(void);
				227	void page_alloc_sysctl_init(void);
Mike Rapoport (IBM)	f2fc4b4	2023-03-21 19:05:08 +0200	[diff] [blame]	228
Joonsoo Kim	3c60509	2014-11-13 15:19:21 -0800	[diff] [blame]	229	/*
Vlastimil Babka	1a6d53a	2015-02-11 15:25:44 -0800	[diff] [blame]	230	* Structure for holding the mostly immutable allocation parameters passed
				231	* between functions involved in allocations, including the alloc_pages*
				232	* family of functions.
				233	*
Joonsoo Kim	97a225e	2020-06-03 15:59:01 -0700	[diff] [blame]	234	* nodemask, migratetype and highest_zoneidx are initialized only once in
Matthew Wilcox (Oracle)	84172f4	2021-04-29 23:01:15 -0700	[diff] [blame]	235	* __alloc_pages() and then never change.
Vlastimil Babka	1a6d53a	2015-02-11 15:25:44 -0800	[diff] [blame]	236	*
Joonsoo Kim	97a225e	2020-06-03 15:59:01 -0700	[diff] [blame]	237	* zonelist, preferred_zone and highest_zoneidx are set first in
Matthew Wilcox (Oracle)	84172f4	2021-04-29 23:01:15 -0700	[diff] [blame]	238	* __alloc_pages() for the fast path, and might be later changed
Ethon Paul	68956cc	2020-06-04 16:49:31 -0700	[diff] [blame]	239	* in __alloc_pages_slowpath(). All other functions pass the whole structure
Vlastimil Babka	1a6d53a	2015-02-11 15:25:44 -0800	[diff] [blame]	240	* by a const pointer.
				241	*/
				242	struct alloc_context {
				243	struct zonelist *zonelist;
				244	nodemask_t *nodemask;
Mel Gorman	c33d6c0	2016-05-19 17:14:10 -0700	[diff] [blame]	245	struct zoneref *preferred_zoneref;
Vlastimil Babka	1a6d53a	2015-02-11 15:25:44 -0800	[diff] [blame]	246	int migratetype;
Joonsoo Kim	97a225e	2020-06-03 15:59:01 -0700	[diff] [blame]	247
				248	/*
				249	* highest_zoneidx represents highest usable zone index of
				250	* the allocation request. Due to the nature of the zone,
				251	* memory on lower zone than the highest_zoneidx will be
				252	* protected by lowmem_reserve[highest_zoneidx].
				253	*
				254	* highest_zoneidx is also used by reclaim/compaction to limit
				255	* the target zone since higher zone than this index cannot be
				256	* usable for this allocation request.
				257	*/
				258	enum zone_type highest_zoneidx;
Mel Gorman	c9ab0c4	2015-11-06 16:28:12 -0800	[diff] [blame]	259	bool spread_dirty_pages;
Vlastimil Babka	1a6d53a	2015-02-11 15:25:44 -0800	[diff] [blame]	260	};
				261
				262	/*
Zi Yan	8170ac4	2022-04-28 23:16:01 -0700	[diff] [blame]	263	* This function returns the order of a free page in the buddy system. In
				264	* general, page_zone(page)->lock must be held by the caller to prevent the
				265	* page from being allocated in parallel and returning garbage as the order.
				266	* If a caller does not hold page_zone(page)->lock, it must guarantee that the
				267	* page cannot be allocated or merged in parallel. Alternatively, it must
				268	* handle invalid values gracefully, and use buddy_order_unsafe() below.
				269	*/
				270	static inline unsigned int buddy_order(struct page *page)
				271	{
				272	/* PageBuddy() must be checked by the caller */
				273	return page_private(page);
				274	}
				275
				276	/*
				277	* Like buddy_order(), but for callers who cannot afford to hold the zone lock.
				278	* PageBuddy() should be checked first by the caller to minimize race window,
				279	* and invalid values must be handled gracefully.
				280	*
				281	* READ_ONCE is used so that if the caller assigns the result into a local
				282	* variable and e.g. tests it for valid range before using, the compiler cannot
				283	* decide to remove the variable and inline the page_private(page) multiple
				284	* times, potentially observing different values in the tests and the actual
				285	* use of the result.
				286	*/
				287	#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
				288
				289	/*
				290	* This function checks whether a page is free && is the buddy
				291	* we can coalesce a page and its buddy if
				292	* (a) the buddy is not in a hole (check before calling!) &&
				293	* (b) the buddy is in the buddy system &&
				294	* (c) a page and its buddy have the same order &&
				295	* (d) a page and its buddy are in the same zone.
				296	*
				297	* For recording whether a page is in the buddy system, we set PageBuddy.
				298	* Setting, clearing, and testing PageBuddy is serialized by zone->lock.
				299	*
				300	* For recording page's order, we use page_private(page).
				301	*/
				302	static inline bool page_is_buddy(struct page page, struct page buddy,
				303	unsigned int order)
				304	{
				305	if (!page_is_guard(buddy) && !PageBuddy(buddy))
				306	return false;
				307
				308	if (buddy_order(buddy) != order)
				309	return false;
				310
				311	/*
				312	* zone check is done late to avoid uselessly calculating
				313	* zone/node ids for pages that could never merge.
				314	*/
				315	if (page_zone_id(page) != page_zone_id(buddy))
				316	return false;
				317
				318	VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
				319
				320	return true;
				321	}
				322
				323	/*
Joonsoo Kim	3c60509	2014-11-13 15:19:21 -0800	[diff] [blame]	324	* Locate the struct page for both the matching buddy in our
				325	* pair (buddy1) and the combined O(n+1) page they form (page).
				326	*
				327	* 1) Any buddy B1 will have an order O twin B2 which satisfies
				328	* the following equation:
				329	* B2 = B1 ^ (1 << O)
				330	* For example, if the starting buddy (buddy2) is #8 its order
				331	* 1 buddy is #10:
				332	* B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
				333	*
				334	* 2) Any buddy B will have an order O+1 parent P which
				335	* satisfies the following equation:
				336	* P = B & ~(1 << O)
				337	*
				338	* Assumption: *_mem_map is contiguous at least up to MAX_ORDER
				339	*/
				340	static inline unsigned long
Vlastimil Babka	76741e7	2017-02-22 15:41:48 -0800	[diff] [blame]	341	__find_buddy_pfn(unsigned long page_pfn, unsigned int order)
Joonsoo Kim	3c60509	2014-11-13 15:19:21 -0800	[diff] [blame]	342	{
Vlastimil Babka	76741e7	2017-02-22 15:41:48 -0800	[diff] [blame]	343	return page_pfn ^ (1 << order);
Joonsoo Kim	3c60509	2014-11-13 15:19:21 -0800	[diff] [blame]	344	}
				345
Zi Yan	8170ac4	2022-04-28 23:16:01 -0700	[diff] [blame]	346	/*
				347	* Find the buddy of @page and validate it.
				348	* @page: The input page
				349	* @pfn: The pfn of the page, it saves a call to page_to_pfn() when the
				350	* function is used in the performance-critical __free_one_page().
				351	* @order: The order of the page
				352	* @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to
				353	* page_to_pfn().
				354	*
				355	* The found buddy can be a non PageBuddy, out of @page's zone, or its order is
				356	* not the same as @page. The validation is necessary before use it.
				357	*
				358	* Return: the found buddy page or NULL if not found.
				359	*/
				360	static inline struct page find_buddy_page_pfn(struct page page,
				361	unsigned long pfn, unsigned int order, unsigned long *buddy_pfn)
				362	{
				363	unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order);
				364	struct page *buddy;
				365
				366	buddy = page + (__buddy_pfn - pfn);
				367	if (buddy_pfn)
				368	*buddy_pfn = __buddy_pfn;
				369
				370	if (page_is_buddy(page, buddy, order))
				371	return buddy;
				372	return NULL;
				373	}
				374
Joonsoo Kim	7cf91a9	2016-03-15 14:57:51 -0700	[diff] [blame]	375	extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
				376	unsigned long end_pfn, struct zone *zone);
				377
				378	static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
				379	unsigned long end_pfn, struct zone *zone)
				380	{
				381	if (zone->contiguous)
				382	return pfn_to_page(start_pfn);
				383
				384	return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
				385	}
				386
Kefeng Wang	904d585	2023-05-16 14:38:11 +0800	[diff] [blame]	387	void set_zone_contiguous(struct zone *zone);
				388
				389	static inline void clear_zone_contiguous(struct zone *zone)
				390	{
				391	zone->contiguous = false;
				392	}
				393
Joonsoo Kim	3c60509	2014-11-13 15:19:21 -0800	[diff] [blame]	394	extern int __isolate_free_page(struct page *page, unsigned int order);
Alexander Duyck	624f58d	2020-04-06 20:04:53 -0700	[diff] [blame]	395	extern void __putback_isolated_page(struct page *page, unsigned int order,
				396	int mt);
Mike Rapoport	7c2ee34	2018-10-30 15:09:36 -0700	[diff] [blame]	397	extern void memblock_free_pages(struct page *page, unsigned long pfn,
Mel Gorman	d70ddd7	2015-06-30 14:56:52 -0700	[diff] [blame]	398	unsigned int order);
Arun KS	a9cd410	2019-03-05 15:42:14 -0800	[diff] [blame]	399	extern void __free_pages_core(struct page *page, unsigned int order);
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	400
Tarun Sahu	1e3be48	2023-06-12 15:05:14 +0530	[diff] [blame]	401	/*
				402	* This will have no effect, other than possibly generating a warning, if the
				403	* caller passes in a non-large folio.
				404	*/
				405	static inline void folio_set_order(struct folio *folio, unsigned int order)
				406	{
				407	if (WARN_ON_ONCE(!order \|\| !folio_test_large(folio)))
				408	return;
				409
Matthew Wilcox (Oracle)	ebc1baf	2023-08-16 16:11:58 +0100	[diff] [blame]	410	folio->_flags_1 = (folio->_flags_1 & ~0xffUL) \| order;
Tarun Sahu	1e3be48	2023-06-12 15:05:14 +0530	[diff] [blame]	411	#ifdef CONFIG_64BIT
				412	folio->_folio_nr_pages = 1U << order;
				413	#endif
				414	}
				415
Matthew Wilcox (Oracle)	8dc4a8f	2023-08-16 16:11:52 +0100	[diff] [blame]	416	void folio_undo_large_rmappable(struct folio *folio);
				417
Hugh Dickins	23e4883	2023-10-03 02:25:33 -0700	[diff] [blame]	418	static inline struct folio page_rmappable_folio(struct page page)
				419	{
				420	struct folio folio = (struct folio )page;
				421
				422	if (folio && folio_order(folio) > 1)
				423	folio_prep_large_rmappable(folio);
				424	return folio;
				425	}
				426
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	427	static inline void prep_compound_head(struct page *page, unsigned int order)
				428	{
				429	struct folio folio = (struct folio )page;
				430
Tarun Sahu	1e3be48	2023-06-12 15:05:14 +0530	[diff] [blame]	431	folio_set_order(folio, order);
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	432	atomic_set(&folio->_entire_mapcount, -1);
				433	atomic_set(&folio->_nr_pages_mapped, 0);
				434	atomic_set(&folio->_pincount, 0);
				435	}
				436
				437	static inline void prep_compound_tail(struct page *head, int tail_idx)
				438	{
				439	struct page *p = head + tail_idx;
				440
				441	p->mapping = TAIL_MAPPING;
				442	set_compound_head(p, head);
				443	set_page_private(p, 0);
				444	}
				445
Kirill A. Shutemov	d00181b	2015-11-06 16:29:57 -0800	[diff] [blame]	446	extern void prep_compound_page(struct page *page, unsigned int order);
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	447
Joonsoo Kim	46f24fd	2016-07-26 15:23:58 -0700	[diff] [blame]	448	extern void post_alloc_hook(struct page *page, unsigned int order,
				449	gfp_t gfp_flags);
Han Pingtian	42aa83c	2014-01-23 15:53:28 -0800	[diff] [blame]	450	extern int user_min_free_kbytes;
Wu Fengguang	20a0307	2009-06-16 15:32:22 -0700	[diff] [blame]	451
Mel Gorman	44042b44	2021-06-28 19:43:08 -0700	[diff] [blame]	452	extern void free_unref_page(struct page *page, unsigned int order);
Matthew Wilcox (Oracle)	0966aeb	2020-12-14 19:08:02 -0800	[diff] [blame]	453	extern void free_unref_page_list(struct list_head *list);
				454
Mel Gorman	6826539	2019-11-30 17:55:15 -0800	[diff] [blame]	455	extern void zone_pcp_reset(struct zone *zone);
Vlastimil Babka	ec6e8c7e	2020-12-14 19:10:59 -0800	[diff] [blame]	456	extern void zone_pcp_disable(struct zone *zone);
				457	extern void zone_pcp_enable(struct zone *zone);
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	458	extern void zone_pcp_init(struct zone *zone);
Mel Gorman	6826539	2019-11-30 17:55:15 -0800	[diff] [blame]	459
Mike Rapoport	c803b3c	2021-09-02 14:58:02 -0700	[diff] [blame]	460	extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
				461	phys_addr_t min_addr,
				462	int nid, bool exact_nid);
				463
Kefeng Wang	e95d372	2023-05-16 14:38:20 +0800	[diff] [blame]	464	void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
				465	unsigned long, enum meminit_context, struct vmem_altmap *, int);
				466
				467
Zi Yan	86d28b07	2022-05-26 19:15:31 -0400	[diff] [blame]	468	int split_free_page(struct page *free_page,
				469	unsigned int order, unsigned long split_pfn_offset);
Zi Yan	b2c9e2f	2022-05-12 20:22:58 -0700	[diff] [blame]	470
Michal Nazarewicz	ff9543f	2011-12-29 13:09:50 +0100	[diff] [blame]	471	#if defined CONFIG_COMPACTION \|\| defined CONFIG_CMA
				472
				473	/*
				474	* in mm/compaction.c
				475	*/
				476	/*
				477	* compact_control is used to track pages being migrated and the free pages
				478	* they are being migrated to during memory compaction. The free_pfn starts
				479	* at the end of a zone and migrate_pfn begins at the start. Movable pages
				480	* are moved to the end of a zone during a compaction run and the run
				481	* completes when free_pfn <= migrate_pfn
				482	*/
				483	struct compact_control {
				484	struct list_head freepages; /* List of free pages to migrate to */
				485	struct list_head migratepages; /* List of pages being migrated */
Mel Gorman	c5fbd93	2019-03-05 15:44:25 -0800	[diff] [blame]	486	unsigned int nr_freepages; /* Number of isolated free pages */
				487	unsigned int nr_migratepages; /* Number of pages to migrate */
Michal Nazarewicz	ff9543f	2011-12-29 13:09:50 +0100	[diff] [blame]	488	unsigned long free_pfn; /* isolate_freepages search base */
Oscar Salvador	c2ad7a1	2021-05-04 18:35:17 -0700	[diff] [blame]	489	/*
				490	* Acts as an in/out parameter to page isolation for migration.
				491	* isolate_migratepages uses it as a search base.
				492	* isolate_migratepages_block will update the value to the next pfn
				493	* after the last isolated one.
				494	*/
				495	unsigned long migrate_pfn;
Mel Gorman	70b4459	2019-03-05 15:44:54 -0800	[diff] [blame]	496	unsigned long fast_start_pfn; /* a pfn to start linear scan from */
Mel Gorman	c5943b9	2019-03-05 15:44:28 -0800	[diff] [blame]	497	struct zone *zone;
				498	unsigned long total_migrate_scanned;
				499	unsigned long total_free_scanned;
Mel Gorman	dbe2d4e	2019-03-05 15:45:31 -0800	[diff] [blame]	500	unsigned short fast_search_fail;/* failures to use free list searches */
				501	short search_order; /* order to start a fast search at */
Vlastimil Babka	f25ba6d	2017-05-08 15:54:30 -0700	[diff] [blame]	502	const gfp_t gfp_mask; /* gfp mask of a direct compactor */
				503	int order; /* order a direct compactor needs */
Vlastimil Babka	d39773a	2017-05-08 15:54:46 -0700	[diff] [blame]	504	int migratetype; /* migratetype of direct compactor */
Vlastimil Babka	f25ba6d	2017-05-08 15:54:30 -0700	[diff] [blame]	505	const unsigned int alloc_flags; /* alloc flags of a direct compactor */
Joonsoo Kim	97a225e	2020-06-03 15:59:01 -0700	[diff] [blame]	506	const int highest_zoneidx; /* zone index of a direct compactor */
David Rientjes	e0b9dae	2014-06-04 16:08:28 -0700	[diff] [blame]	507	enum migrate_mode mode; /* Async or sync migration mode */
Mel Gorman	bb13ffe	2012-10-08 16:32:41 -0700	[diff] [blame]	508	bool ignore_skip_hint; /* Scan blocks even if marked skip */
Vlastimil Babka	2583d67	2017-11-17 15:26:38 -0800	[diff] [blame]	509	bool no_set_skip_hint; /* Don't mark blocks for skipping */
Vlastimil Babka	9f7e338	2016-10-07 17:00:37 -0700	[diff] [blame]	510	bool ignore_block_suitable; /* Scan blocks considered unsuitable */
Vlastimil Babka	accf624	2016-03-17 14:18:15 -0700	[diff] [blame]	511	bool direct_compaction; /* False from kcompactd or /proc/... */
Nitin Gupta	facdaa9	2020-08-11 18:31:00 -0700	[diff] [blame]	512	bool proactive_compaction; /* kcompactd proactive compaction */
Vlastimil Babka	06ed299	2016-10-07 16:57:35 -0700	[diff] [blame]	513	bool whole_zone; /* Whole zone should/has been scanned */
Miaohe Lin	d56c158	2022-04-28 23:16:17 -0700	[diff] [blame]	514	bool contended; /* Signal lock contention */
Mel Gorman	48731c8	2023-01-25 13:44:31 +0000	[diff] [blame]	515	bool finish_pageblock; /* Scan the remainder of a pageblock. Used
				516	* when there are potentially transient
				517	* isolation or migration failures to
				518	* ensure forward progress.
				519	*/
Rik van Riel	b06eda0	2020-04-01 21:10:28 -0700	[diff] [blame]	520	bool alloc_contig; /* alloc_contig_range allocation */
Michal Nazarewicz	ff9543f	2011-12-29 13:09:50 +0100	[diff] [blame]	521	};
				522
Mel Gorman	5e1f0f0	2019-03-05 15:45:41 -0800	[diff] [blame]	523	/*
				524	* Used in direct compaction when a page should be taken from the freelists
				525	* immediately when one is created during the free path.
				526	*/
				527	struct capture_control {
				528	struct compact_control *cc;
				529	struct page *page;
				530	};
				531
Michal Nazarewicz	ff9543f	2011-12-29 13:09:50 +0100	[diff] [blame]	532	unsigned long
Mel Gorman	bb13ffe	2012-10-08 16:32:41 -0700	[diff] [blame]	533	isolate_freepages_range(struct compact_control *cc,
				534	unsigned long start_pfn, unsigned long end_pfn);
Oscar Salvador	c2ad7a1	2021-05-04 18:35:17 -0700	[diff] [blame]	535	int
Vlastimil Babka	edc2ca6	2014-10-09 15:27:09 -0700	[diff] [blame]	536	isolate_migratepages_range(struct compact_control *cc,
				537	unsigned long low_pfn, unsigned long end_pfn);
Zi Yan	b2c9e2f	2022-05-12 20:22:58 -0700	[diff] [blame]	538
				539	int __alloc_contig_migrate_range(struct compact_control *cc,
				540	unsigned long start, unsigned long end);
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	541
				542	/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
				543	void init_cma_reserved_pageblock(struct page *page);
				544
				545	#endif /* CONFIG_COMPACTION \|\| CONFIG_CMA */
				546
Joonsoo Kim	2149cda	2015-04-14 15:45:21 -0700	[diff] [blame]	547	int find_suitable_fallback(struct free_area *area, unsigned int order,
				548	int migratetype, bool only_stealable, bool *can_steal);
Michal Nazarewicz	ff9543f	2011-12-29 13:09:50 +0100	[diff] [blame]	549
Mike Rapoport (IBM)	62f31bd	2023-03-26 19:02:15 +0300	[diff] [blame]	550	static inline bool free_area_empty(struct free_area *area, int migratetype)
				551	{
				552	return list_empty(&area->free_list[migratetype]);
				553	}
				554
Mel Gorman	48f13bf	2007-10-16 01:26:10 -0700	[diff] [blame]	555	/*
Konstantin Khlebnikov	30bdbb7	2016-02-02 16:57:46 -0800	[diff] [blame]	556	* These three helpers classifies VMAs for virtual memory accounting.
				557	*/
				558
				559	/*
				560	* Executable code area - executable, not writable, not stack
				561	*/
Konstantin Khlebnikov	d977d56	2016-02-02 16:57:43 -0800	[diff] [blame]	562	static inline bool is_exec_mapping(vm_flags_t flags)
				563	{
Konstantin Khlebnikov	30bdbb7	2016-02-02 16:57:46 -0800	[diff] [blame]	564	return (flags & (VM_EXEC \| VM_WRITE \| VM_STACK)) == VM_EXEC;
Konstantin Khlebnikov	d977d56	2016-02-02 16:57:43 -0800	[diff] [blame]	565	}
				566
Konstantin Khlebnikov	30bdbb7	2016-02-02 16:57:46 -0800	[diff] [blame]	567	/*
Rick Edgecombe	00547ef	2023-06-12 17:10:45 -0700	[diff] [blame]	568	* Stack area (including shadow stacks)
Konstantin Khlebnikov	30bdbb7	2016-02-02 16:57:46 -0800	[diff] [blame]	569	*
				570	* VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
				571	* do_mmap() forbids all other combinations.
				572	*/
Konstantin Khlebnikov	d977d56	2016-02-02 16:57:43 -0800	[diff] [blame]	573	static inline bool is_stack_mapping(vm_flags_t flags)
				574	{
Rick Edgecombe	00547ef	2023-06-12 17:10:45 -0700	[diff] [blame]	575	return ((flags & VM_STACK) == VM_STACK) \|\| (flags & VM_SHADOW_STACK);
Konstantin Khlebnikov	d977d56	2016-02-02 16:57:43 -0800	[diff] [blame]	576	}
				577
Konstantin Khlebnikov	30bdbb7	2016-02-02 16:57:46 -0800	[diff] [blame]	578	/*
				579	* Data area - private, writable, not stack
				580	*/
Konstantin Khlebnikov	d977d56	2016-02-02 16:57:43 -0800	[diff] [blame]	581	static inline bool is_data_mapping(vm_flags_t flags)
				582	{
Konstantin Khlebnikov	30bdbb7	2016-02-02 16:57:46 -0800	[diff] [blame]	583	return (flags & (VM_WRITE \| VM_SHARED \| VM_STACK)) == VM_WRITE;
Konstantin Khlebnikov	d977d56	2016-02-02 16:57:43 -0800	[diff] [blame]	584	}
				585
Namhyung Kim	6038def	2011-05-24 17:11:22 -0700	[diff] [blame]	586	/* mm/util.c */
Matthew Wilcox (Oracle)	e05b345	2022-01-29 11:52:52 -0500	[diff] [blame]	587	struct anon_vma folio_anon_vma(struct folio folio);
Namhyung Kim	6038def	2011-05-24 17:11:22 -0700	[diff] [blame]	588
Hugh Dickins	af8e335	2009-12-14 17:58:59 -0800	[diff] [blame]	589	#ifdef CONFIG_MMU
Matthew Wilcox (Oracle)	3506659	2021-11-28 14:53:35 -0500	[diff] [blame]	590	void unmap_mapping_folio(struct folio *folio);
Kirill A. Shutemov	fc05f56	2015-04-14 15:44:39 -0700	[diff] [blame]	591	extern long populate_vma_page_range(struct vm_area_struct *vma,
David Hildenbrand	a78f1cc	2021-06-30 18:52:23 -0700	[diff] [blame]	592	unsigned long start, unsigned long end, int *locked);
David Hildenbrand	4ca9b385	2021-06-30 18:52:28 -0700	[diff] [blame]	593	extern long faultin_vma_page_range(struct vm_area_struct *vma,
				594	unsigned long start, unsigned long end,
				595	bool write, int *locked);
Andrew Morton	b0cc5e8	2023-05-22 13:52:10 -0700	[diff] [blame]	596	extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
Lorenzo Stoakes	3c54a29	2023-05-22 09:24:12 +0100	[diff] [blame]	597	unsigned long bytes);
Yin Fengwei	28e5665	2023-09-18 15:33:16 +0800	[diff] [blame]	598
				599	/*
				600	* NOTE: This function can't tell whether the folio is "fully mapped" in the
				601	* range.
				602	* "fully mapped" means all the pages of folio is associated with the page
				603	* table of range while this function just check whether the folio range is
Muhammad Muzammil	be16dd76	2023-10-23 17:44:05 +0500	[diff] [blame]	604	* within the range [start, end). Function caller needs to do page table
Yin Fengwei	28e5665	2023-09-18 15:33:16 +0800	[diff] [blame]	605	* check if it cares about the page table association.
				606	*
				607	* Typical usage (like mlock or madvise) is:
				608	* Caller knows at least 1 page of folio is associated with page table of VMA
				609	* and the range [start, end) is intersect with the VMA range. Caller wants
				610	* to know whether the folio is fully associated with the range. It calls
				611	* this function to check whether the folio is in the range first. Then checks
				612	* the page table to know whether the folio is fully mapped to the range.
				613	*/
				614	static inline bool
				615	folio_within_range(struct folio folio, struct vm_area_struct vma,
				616	unsigned long start, unsigned long end)
				617	{
				618	pgoff_t pgoff, addr;
				619	unsigned long vma_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
				620
				621	VM_WARN_ON_FOLIO(folio_test_ksm(folio), folio);
				622	if (start > end)
				623	return false;
				624
				625	if (start < vma->vm_start)
				626	start = vma->vm_start;
				627
				628	if (end > vma->vm_end)
				629	end = vma->vm_end;
				630
				631	pgoff = folio_pgoff(folio);
				632
				633	/* if folio start address is not in vma range */
				634	if (!in_range(pgoff, vma->vm_pgoff, vma_pglen))
				635	return false;
				636
				637	addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
				638
				639	return !(addr < start \|\| end - addr < folio_size(folio));
				640	}
				641
				642	static inline bool
				643	folio_within_vma(struct folio folio, struct vm_area_struct vma)
				644	{
				645	return folio_within_range(folio, vma, vma->vm_start, vma->vm_end);
				646	}
				647
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	648	/*
Matthew Wilcox (Oracle)	7efecff	2023-01-16 19:28:25 +0000	[diff] [blame]	649	* mlock_vma_folio() and munlock_vma_folio():
Hugh Dickins	cea86fe	2022-02-14 18:26:39 -0800	[diff] [blame]	650	* should be called with vma's mmap_lock held for read or write,
				651	* under page table lock for the pte/pmd being added or removed.
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	652	*
Lorenzo Stoakes	96f97c4	2023-01-12 12:39:31 +0000	[diff] [blame]	653	* mlock is usually called at the end of page_add_*_rmap(), munlock at
				654	* the end of page_remove_rmap(); but new anon folios are managed by
				655	* folio_add_lru_vma() calling mlock_new_folio().
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	656	*/
Matthew Wilcox (Oracle)	dcc5d33	2022-02-15 13:33:59 -0500	[diff] [blame]	657	void mlock_folio(struct folio *folio);
				658	static inline void mlock_vma_folio(struct folio *folio,
Yin Fengwei	1acbc3f	2023-09-18 15:33:17 +0800	[diff] [blame]	659	struct vm_area_struct *vma)
Hugh Dickins	cea86fe	2022-02-14 18:26:39 -0800	[diff] [blame]	660	{
Hugh Dickins	c8263bd	2022-03-02 17:35:30 -0800	[diff] [blame]	661	/*
				662	* The VM_SPECIAL check here serves two purposes.
				663	* 1) VM_IO check prevents migration from double-counting during mlock.
				664	* 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED
				665	* is never left set on a VM_SPECIAL vma, there is an interval while
				666	* file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may
				667	* still be set while VM_SPECIAL bits are added: so ignore it then.
				668	*/
Yin Fengwei	1acbc3f	2023-09-18 15:33:17 +0800	[diff] [blame]	669	if (unlikely((vma->vm_flags & (VM_LOCKED\|VM_SPECIAL)) == VM_LOCKED))
Matthew Wilcox (Oracle)	dcc5d33	2022-02-15 13:33:59 -0500	[diff] [blame]	670	mlock_folio(folio);
Hugh Dickins	cea86fe	2022-02-14 18:26:39 -0800	[diff] [blame]	671	}
Matthew Wilcox (Oracle)	dcc5d33	2022-02-15 13:33:59 -0500	[diff] [blame]	672
Lorenzo Stoakes	96f97c4	2023-01-12 12:39:31 +0000	[diff] [blame]	673	void munlock_folio(struct folio *folio);
Lorenzo Stoakes	96f97c4	2023-01-12 12:39:31 +0000	[diff] [blame]	674	static inline void munlock_vma_folio(struct folio *folio,
Yin Fengwei	1acbc3f	2023-09-18 15:33:17 +0800	[diff] [blame]	675	struct vm_area_struct *vma)
Hugh Dickins	cea86fe	2022-02-14 18:26:39 -0800	[diff] [blame]	676	{
Yin Fengwei	1acbc3f	2023-09-18 15:33:17 +0800	[diff] [blame]	677	/*
				678	* munlock if the function is called. Ideally, we should only
				679	* do munlock if any page of folio is unmapped from VMA and
				680	* cause folio not fully mapped to VMA.
				681	*
				682	* But it's not easy to confirm that's the situation. So we
				683	* always munlock the folio and page reclaim will correct it
				684	* if it's wrong.
				685	*/
				686	if (unlikely(vma->vm_flags & VM_LOCKED))
Lorenzo Stoakes	96f97c4	2023-01-12 12:39:31 +0000	[diff] [blame]	687	munlock_folio(folio);
Hugh Dickins	cea86fe	2022-02-14 18:26:39 -0800	[diff] [blame]	688	}
Lorenzo Stoakes	96f97c4	2023-01-12 12:39:31 +0000	[diff] [blame]	689
Lorenzo Stoakes	96f97c4	2023-01-12 12:39:31 +0000	[diff] [blame]	690	void mlock_new_folio(struct folio *folio);
				691	bool need_mlock_drain(int cpu);
				692	void mlock_drain_local(void);
				693	void mlock_drain_remote(int cpu);
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	694
Linus Torvalds	f55e101	2017-11-29 09:01:01 -0800	[diff] [blame]	695	extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
Mel Gorman	b32967f	2012-11-19 12:35:47 +0000	[diff] [blame]	696
Kirill A. Shutemov	e9b61f1	2016-01-15 16:54:10 -0800	[diff] [blame]	697	/*
Muchun Song	6a8e059	2022-04-28 23:16:10 -0700	[diff] [blame]	698	* Return the start of user virtual address at the specific offset within
				699	* a vma.
Kirill A. Shutemov	e9b61f1	2016-01-15 16:54:10 -0800	[diff] [blame]	700	*/
				701	static inline unsigned long
Muchun Song	6a8e059	2022-04-28 23:16:10 -0700	[diff] [blame]	702	vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages,
				703	struct vm_area_struct *vma)
Kirill A. Shutemov	e9b61f1	2016-01-15 16:54:10 -0800	[diff] [blame]	704	{
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	705	unsigned long address;
Kirill A. Shutemov	a8fa41ad	2017-02-24 14:57:54 -0800	[diff] [blame]	706
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	707	if (pgoff >= vma->vm_pgoff) {
				708	address = vma->vm_start +
				709	((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
				710	/* Check for address beyond vma (or wrapped through 0?) */
				711	if (address < vma->vm_start \|\| address >= vma->vm_end)
				712	address = -EFAULT;
Muchun Song	6a8e059	2022-04-28 23:16:10 -0700	[diff] [blame]	713	} else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) {
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	714	/* Test above avoids possibility of wrap to 0 on 32-bit */
				715	address = vma->vm_start;
				716	} else {
				717	address = -EFAULT;
				718	}
				719	return address;
				720	}
Kirill A. Shutemov	e9b61f1	2016-01-15 16:54:10 -0800	[diff] [blame]	721
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	722	/*
Muchun Song	6a8e059	2022-04-28 23:16:10 -0700	[diff] [blame]	723	* Return the start of user virtual address of a page within a vma.
				724	* Returns -EFAULT if all of the page is outside the range of vma.
				725	* If page is a compound head, the entire compound page is considered.
				726	*/
				727	static inline unsigned long
				728	vma_address(struct page page, struct vm_area_struct vma)
				729	{
				730	VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
				731	return vma_pgoff_address(page_to_pgoff(page), compound_nr(page), vma);
				732	}
				733
				734	/*
Matthew Wilcox (Oracle)	2aff7a4	2022-02-03 11:40:17 -0500	[diff] [blame]	735	* Then at what user virtual address will none of the range be found in vma?
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	736	* Assumes that vma_address() already returned a good starting address.
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	737	*/
Matthew Wilcox (Oracle)	2aff7a4	2022-02-03 11:40:17 -0500	[diff] [blame]	738	static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	739	{
Matthew Wilcox (Oracle)	2aff7a4	2022-02-03 11:40:17 -0500	[diff] [blame]	740	struct vm_area_struct *vma = pvmw->vma;
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	741	pgoff_t pgoff;
				742	unsigned long address;
Kirill A. Shutemov	e9b61f1	2016-01-15 16:54:10 -0800	[diff] [blame]	743
Matthew Wilcox (Oracle)	2aff7a4	2022-02-03 11:40:17 -0500	[diff] [blame]	744	/* Common case, plus ->pgoff is invalid for KSM */
				745	if (pvmw->nr_pages == 1)
				746	return pvmw->address + PAGE_SIZE;
				747
				748	pgoff = pvmw->pgoff + pvmw->nr_pages;
Hugh Dickins	494334e	2021-06-15 18:23:56 -0700	[diff] [blame]	749	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
				750	/* Check for address beyond vma (or wrapped through 0?) */
				751	if (address < vma->vm_start \|\| address > vma->vm_end)
				752	address = vma->vm_end;
				753	return address;
Kirill A. Shutemov	e9b61f1	2016-01-15 16:54:10 -0800	[diff] [blame]	754	}
				755
Johannes Weiner	89b1533	2019-11-30 17:50:22 -0800	[diff] [blame]	756	static inline struct file maybe_unlock_mmap_for_io(struct vm_fault vmf,
				757	struct file *fpin)
				758	{
				759	int flags = vmf->flags;
				760
				761	if (fpin)
				762	return fpin;
				763
				764	/*
				765	* FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	766	* anything, so we only pin the file and drop the mmap_lock if only
Peter Xu	4064b98	2020-04-01 21:08:45 -0700	[diff] [blame]	767	* FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
Johannes Weiner	89b1533	2019-11-30 17:50:22 -0800	[diff] [blame]	768	*/
Peter Xu	4064b98	2020-04-01 21:08:45 -0700	[diff] [blame]	769	if (fault_flag_allow_retry_first(flags) &&
				770	!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
Johannes Weiner	89b1533	2019-11-30 17:50:22 -0800	[diff] [blame]	771	fpin = get_file(vmf->vma->vm_file);
Matthew Wilcox (Oracle)	0790e1e	2023-08-12 01:20:33 +0100	[diff] [blame]	772	release_fault_lock(vmf);
Johannes Weiner	89b1533	2019-11-30 17:50:22 -0800	[diff] [blame]	773	}
				774	return fpin;
				775	}
Hugh Dickins	af8e335	2009-12-14 17:58:59 -0800	[diff] [blame]	776	#else /* !CONFIG_MMU */
Matthew Wilcox (Oracle)	3506659	2021-11-28 14:53:35 -0500	[diff] [blame]	777	static inline void unmap_mapping_folio(struct folio *folio) { }
Lorenzo Stoakes	96f97c4	2023-01-12 12:39:31 +0000	[diff] [blame]	778	static inline void mlock_new_folio(struct folio *folio) { }
				779	static inline bool need_mlock_drain(int cpu) { return false; }
				780	static inline void mlock_drain_local(void) { }
				781	static inline void mlock_drain_remote(int cpu) { }
Nicholas Piggin	4ad0ae8	2021-04-29 22:59:01 -0700	[diff] [blame]	782	static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
				783	{
				784	}
Hugh Dickins	af8e335	2009-12-14 17:58:59 -0800	[diff] [blame]	785	#endif /* !CONFIG_MMU */
Lee Schermerhorn	894bc31	2008-10-18 20:26:39 -0700	[diff] [blame]	786
Mel Gorman	6b74ab9	2008-07-23 21:26:49 -0700	[diff] [blame]	787	/* Memory initialisation debug and verification */
Mike Rapoport (IBM)	9420f89	2023-03-21 19:05:02 +0200	[diff] [blame]	788	#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
				789	DECLARE_STATIC_KEY_TRUE(deferred_pages);
				790
				791	bool __init deferred_grow_zone(struct zone *zone, unsigned int order);
				792	#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
				793
Mel Gorman	6b74ab9	2008-07-23 21:26:49 -0700	[diff] [blame]	794	enum mminit_level {
				795	MMINIT_WARNING,
				796	MMINIT_VERIFY,
				797	MMINIT_TRACE
				798	};
				799
				800	#ifdef CONFIG_DEBUG_MEMORY_INIT
				801
				802	extern int mminit_loglevel;
				803
				804	#define mminit_dprintk(level, prefix, fmt, arg...) \
				805	do { \
				806	if (level < mminit_loglevel) { \
Rasmus Villemoes	fc5199d	2015-02-12 15:00:02 -0800	[diff] [blame]	807	if (level <= MMINIT_WARNING) \
Joe Perches	1170532	2016-03-17 14:19:50 -0700	[diff] [blame]	808	pr_warn("mminit::" prefix " " fmt, ##arg); \
Rasmus Villemoes	fc5199d	2015-02-12 15:00:02 -0800	[diff] [blame]	809	else \
				810	printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \
Mel Gorman	6b74ab9	2008-07-23 21:26:49 -0700	[diff] [blame]	811	} \
				812	} while (0)
				813
Mel Gorman	708614e	2008-07-23 21:26:51 -0700	[diff] [blame]	814	extern void mminit_verify_pageflags_layout(void);
Mel Gorman	68ad8df	2008-07-23 21:26:52 -0700	[diff] [blame]	815	extern void mminit_verify_zonelist(void);
Mel Gorman	6b74ab9	2008-07-23 21:26:49 -0700	[diff] [blame]	816	#else
				817
				818	static inline void mminit_dprintk(enum mminit_level level,
				819	const char prefix, const char fmt, ...)
				820	{
				821	}
				822
Mel Gorman	708614e	2008-07-23 21:26:51 -0700	[diff] [blame]	823	static inline void mminit_verify_pageflags_layout(void)
				824	{
				825	}
				826
Mel Gorman	68ad8df	2008-07-23 21:26:52 -0700	[diff] [blame]	827	static inline void mminit_verify_zonelist(void)
				828	{
				829	}
Mel Gorman	6b74ab9	2008-07-23 21:26:49 -0700	[diff] [blame]	830	#endif /* CONFIG_DEBUG_MEMORY_INIT */
Mel Gorman	2dbb51c	2008-07-23 21:26:52 -0700	[diff] [blame]	831
Mel Gorman	a5f5f91	2016-07-28 15:46:32 -0700	[diff] [blame]	832	#define NODE_RECLAIM_NOSCAN -2
				833	#define NODE_RECLAIM_FULL -1
				834	#define NODE_RECLAIM_SOME 0
				835	#define NODE_RECLAIM_SUCCESS 1
Wu Fengguang	7c116f2	2009-12-16 12:19:59 +0100	[diff] [blame]	836
Wei Yang	8b09549	2018-12-28 00:34:36 -0800	[diff] [blame]	837	#ifdef CONFIG_NUMA
				838	extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
Dave Hansen	79c28a4	2021-09-02 14:59:06 -0700	[diff] [blame]	839	extern int find_next_best_node(int node, nodemask_t *used_node_mask);
Wei Yang	8b09549	2018-12-28 00:34:36 -0800	[diff] [blame]	840	#else
				841	static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
				842	unsigned int order)
				843	{
				844	return NODE_RECLAIM_NOSCAN;
				845	}
Dave Hansen	79c28a4	2021-09-02 14:59:06 -0700	[diff] [blame]	846	static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
				847	{
				848	return NUMA_NO_NODE;
				849	}
Wei Yang	8b09549	2018-12-28 00:34:36 -0800	[diff] [blame]	850	#endif
				851
zhenwei pi	60f272f	2022-05-12 20:23:09 -0700	[diff] [blame]	852	/*
				853	* mm/memory-failure.c
				854	*/
Wu Fengguang	31d3d34	2009-12-16 12:19:59 +0100	[diff] [blame]	855	extern int hwpoison_filter(struct page *p);
				856
Wu Fengguang	7c116f2	2009-12-16 12:19:59 +0100	[diff] [blame]	857	extern u32 hwpoison_filter_dev_major;
				858	extern u32 hwpoison_filter_dev_minor;
Wu Fengguang	478c5ff	2009-12-16 12:19:59 +0100	[diff] [blame]	859	extern u64 hwpoison_filter_flags_mask;
				860	extern u64 hwpoison_filter_flags_value;
Andi Kleen	4fd466e	2009-12-16 12:19:59 +0100	[diff] [blame]	861	extern u64 hwpoison_filter_memcg;
Haicheng Li	1bfe5fe	2009-12-16 12:19:59 +0100	[diff] [blame]	862	extern u32 hwpoison_filter_enable;
Al Viro	eb36c58	2012-05-30 20:17:35 -0400	[diff] [blame]	863
Michal Hocko	dc0ef0d	2016-05-23 16:25:27 -0700	[diff] [blame]	864	extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long,
Al Viro	eb36c58	2012-05-30 20:17:35 -0400	[diff] [blame]	865	unsigned long, unsigned long,
Michal Hocko	9fbeb5a	2016-05-23 16:25:30 -0700	[diff] [blame]	866	unsigned long, unsigned long);
Xishi Qiu	ca57df7	2012-07-31 16:43:19 -0700	[diff] [blame]	867
				868	extern void set_pageblock_order(void);
Kefeng Wang	4bf4f15	2023-04-17 19:48:07 +0800	[diff] [blame]	869	unsigned long reclaim_pages(struct list_head *folio_list);
Maninder Singh	730ec8c	2020-06-03 16:01:18 -0700	[diff] [blame]	870	unsigned int reclaim_clean_pages_from_list(struct zone *zone,
Kefeng Wang	4bf4f15	2023-04-17 19:48:07 +0800	[diff] [blame]	871	struct list_head *folio_list);
Bartlomiej Zolnierkiewicz	d95ea5d	2012-10-08 16:32:05 -0700	[diff] [blame]	872	/* The ALLOC_WMARK bits are used as an index to zone->watermark */
				873	#define ALLOC_WMARK_MIN WMARK_MIN
				874	#define ALLOC_WMARK_LOW WMARK_LOW
				875	#define ALLOC_WMARK_HIGH WMARK_HIGH
				876	#define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */
				877
				878	/* Mask to get the watermark bits */
				879	#define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1)
				880
Michal Hocko	cd04ae1	2017-09-06 16:24:50 -0700	[diff] [blame]	881	/*
				882	* Only MMU archs have async oom victim reclaim - aka oom_reaper so we
				883	* cannot assume a reduced access to memory reserves is sufficient for
				884	* !MMU
				885	*/
				886	#ifdef CONFIG_MMU
				887	#define ALLOC_OOM 0x08
				888	#else
				889	#define ALLOC_OOM ALLOC_NO_WATERMARKS
				890	#endif
				891
Mel Gorman	1ebbb21	2023-01-13 11:12:16 +0000	[diff] [blame]	892	#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access
				893	* to 25% of the min watermark or
				894	* 62.5% if __GFP_HIGH is set.
				895	*/
Mel Gorman	524c480	2023-01-13 11:12:12 +0000	[diff] [blame]	896	#define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
				897	* of the min watermark.
				898	*/
Mel Gorman	6bb1545	2018-12-28 00:35:41 -0800	[diff] [blame]	899	#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
				900	#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
				901	#ifdef CONFIG_ZONE_DMA32
				902	#define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */
				903	#else
				904	#define ALLOC_NOFRAGMENT 0x0
				905	#endif
Mel Gorman	eb2e2b4	2023-01-13 11:12:14 +0000	[diff] [blame]	906	#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
Mateusz Nosek	736838e	2020-04-01 21:09:47 -0700	[diff] [blame]	907	#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
Bartlomiej Zolnierkiewicz	d95ea5d	2012-10-08 16:32:05 -0700	[diff] [blame]	908
Mel Gorman	ab35088	2023-01-13 11:12:15 +0000	[diff] [blame]	909	/* Flags that allow allocations below the min watermark. */
Mel Gorman	1ebbb21	2023-01-13 11:12:16 +0000	[diff] [blame]	910	#define ALLOC_RESERVES (ALLOC_NON_BLOCK\|ALLOC_MIN_RESERVE\|ALLOC_HIGHATOMIC\|ALLOC_OOM)
Mel Gorman	ab35088	2023-01-13 11:12:15 +0000	[diff] [blame]	911
Mel Gorman	72b252a	2015-09-04 15:47:32 -0700	[diff] [blame]	912	enum ttu_flags;
				913	struct tlbflush_unmap_batch;
				914
Michal Hocko	ce61287	2017-04-07 16:05:05 -0700	[diff] [blame]	915
				916	/*
				917	* only for MM internal work items which do not depend on
				918	* any allocations or locks which might depend on allocations
				919	*/
				920	extern struct workqueue_struct *mm_percpu_wq;
				921
Mel Gorman	72b252a	2015-09-04 15:47:32 -0700	[diff] [blame]	922	#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
				923	void try_to_unmap_flush(void);
Mel Gorman	d950c94	2015-09-04 15:47:35 -0700	[diff] [blame]	924	void try_to_unmap_flush_dirty(void);
Mel Gorman	3ea2771	2017-08-02 13:31:52 -0700	[diff] [blame]	925	void flush_tlb_batched_pending(struct mm_struct *mm);
Mel Gorman	72b252a	2015-09-04 15:47:32 -0700	[diff] [blame]	926	#else
				927	static inline void try_to_unmap_flush(void)
				928	{
				929	}
Mel Gorman	d950c94	2015-09-04 15:47:35 -0700	[diff] [blame]	930	static inline void try_to_unmap_flush_dirty(void)
				931	{
				932	}
Mel Gorman	3ea2771	2017-08-02 13:31:52 -0700	[diff] [blame]	933	static inline void flush_tlb_batched_pending(struct mm_struct *mm)
				934	{
				935	}
Mel Gorman	72b252a	2015-09-04 15:47:32 -0700	[diff] [blame]	936	#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
Vlastimil Babka	edf14cd	2016-03-15 14:55:56 -0700	[diff] [blame]	937
				938	extern const struct trace_print_flags pageflag_names[];
Hyeonggon Yoo	4c85c0b	2023-01-30 13:25:13 +0900	[diff] [blame]	939	extern const struct trace_print_flags pagetype_names[];
Vlastimil Babka	edf14cd	2016-03-15 14:55:56 -0700	[diff] [blame]	940	extern const struct trace_print_flags vmaflag_names[];
				941	extern const struct trace_print_flags gfpflag_names[];
				942
Xishi Qiu	a6ffdc0	2017-05-03 14:52:52 -0700	[diff] [blame]	943	static inline bool is_migrate_highatomic(enum migratetype migratetype)
				944	{
				945	return migratetype == MIGRATE_HIGHATOMIC;
				946	}
				947
				948	static inline bool is_migrate_highatomic_page(struct page *page)
				949	{
				950	return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
				951	}
				952
Michal Hocko	72675e1	2017-09-06 16:20:24 -0700	[diff] [blame]	953	void setup_zone_pageset(struct zone *zone);
Joonsoo Kim	19fc7be	2020-08-11 18:37:25 -0700	[diff] [blame]	954
				955	struct migration_target_control {
				956	int nid; /* preferred node id */
				957	nodemask_t *nmask;
				958	gfp_t gfp_mask;
				959	};
				960
Nicholas Piggin	b67177e	2021-04-29 22:58:53 -0700	[diff] [blame]	961	/*
David Howells	07073eb	2023-02-14 15:01:42 +0000	[diff] [blame]	962	* mm/filemap.c
				963	*/
				964	size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
				965	struct folio *folio, loff_t fpos, size_t size);
				966
				967	/*
Nicholas Piggin	b67177e	2021-04-29 22:58:53 -0700	[diff] [blame]	968	* mm/vmalloc.c
				969	*/
Nicholas Piggin	4ad0ae8	2021-04-29 22:59:01 -0700	[diff] [blame]	970	#ifdef CONFIG_MMU
Mike Rapoport (IBM)	b6714911	2023-03-21 19:05:12 +0200	[diff] [blame]	971	void __init vmalloc_init(void);
Alexander Potapenko	d905ae2	2023-04-13 15:12:23 +0200	[diff] [blame]	972	int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end,
Nicholas Piggin	b67177e	2021-04-29 22:58:53 -0700	[diff] [blame]	973	pgprot_t prot, struct page **pages, unsigned int page_shift);
Nicholas Piggin	4ad0ae8	2021-04-29 22:59:01 -0700	[diff] [blame]	974	#else
Mike Rapoport (IBM)	b6714911	2023-03-21 19:05:12 +0200	[diff] [blame]	975	static inline void vmalloc_init(void)
				976	{
				977	}
				978
Nicholas Piggin	4ad0ae8	2021-04-29 22:59:01 -0700	[diff] [blame]	979	static inline
Alexander Potapenko	d905ae2	2023-04-13 15:12:23 +0200	[diff] [blame]	980	int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end,
Nicholas Piggin	4ad0ae8	2021-04-29 22:59:01 -0700	[diff] [blame]	981	pgprot_t prot, struct page **pages, unsigned int page_shift)
				982	{
				983	return -EINVAL;
				984	}
				985	#endif
				986
Alexander Potapenko	d905ae2	2023-04-13 15:12:23 +0200	[diff] [blame]	987	int __must_check __vmap_pages_range_noflush(unsigned long addr,
				988	unsigned long end, pgprot_t prot,
				989	struct page **pages, unsigned int page_shift);
Alexander Potapenko	b073d7f	2022-09-15 17:03:48 +0200	[diff] [blame]	990
Nicholas Piggin	4ad0ae8	2021-04-29 22:59:01 -0700	[diff] [blame]	991	void vunmap_range_noflush(unsigned long start, unsigned long end);
Nicholas Piggin	b67177e	2021-04-29 22:58:53 -0700	[diff] [blame]	992
Alexander Potapenko	b073d7f	2022-09-15 17:03:48 +0200	[diff] [blame]	993	void __vunmap_range_noflush(unsigned long start, unsigned long end);
				994
Kefeng Wang	cda6d93	2023-09-21 15:44:15 +0800	[diff] [blame]	995	int numa_migrate_prep(struct folio folio, struct vm_area_struct vma,
Yang Shi	f4c0d83	2021-06-30 18:51:39 -0700	[diff] [blame]	996	unsigned long addr, int page_nid, int *flags);
				997
Christoph Hellwig	27674ef	2022-02-16 15:31:36 +1100	[diff] [blame]	998	void free_zone_device_page(struct page *page);
Alistair Popple	b05a79d	2022-07-15 10:05:13 -0500	[diff] [blame]	999	int migrate_device_coherent_page(struct page *page);
Christoph Hellwig	27674ef	2022-02-16 15:31:36 +1100	[diff] [blame]	1000
Matthew Wilcox (Oracle)	ece1ed7	2022-02-04 10:27:40 -0500	[diff] [blame]	1001	/*
				1002	* mm/gup.c
				1003	*/
				1004	struct folio try_grab_folio(struct page page, int refs, unsigned int flags);
Jason Gunthorpe	7ce154f	2023-01-24 16:34:25 -0400	[diff] [blame]	1005	int __must_check try_grab_page(struct page *page, unsigned int flags);
Matthew Wilcox (Oracle)	ece1ed7	2022-02-04 10:27:40 -0500	[diff] [blame]	1006
David Hildenbrand	8b9c1cc	2023-08-03 16:32:03 +0200	[diff] [blame]	1007	/*
				1008	* mm/huge_memory.c
				1009	*/
				1010	struct page follow_trans_huge_pmd(struct vm_area_struct vma,
				1011	unsigned long addr, pmd_t *pmd,
				1012	unsigned int flags);
				1013
Lorenzo Stoakes	adb20b0	2023-10-11 18:04:29 +0100	[diff] [blame]	1014	/*
				1015	* mm/mmap.c
				1016	*/
Lorenzo Stoakes	93bf5d4	2023-10-11 18:04:31 +0100	[diff] [blame]	1017	struct vm_area_struct vma_merge_extend(struct vma_iterator vmi,
				1018	struct vm_area_struct *vma,
				1019	unsigned long delta);
Lorenzo Stoakes	adb20b0	2023-10-11 18:04:29 +0100	[diff] [blame]	1020
Jason Gunthorpe	2c22410	2023-01-24 16:34:34 -0400	[diff] [blame]	1021	enum {
				1022	/* mark page accessed */
				1023	FOLL_TOUCH = 1 << 16,
				1024	/* a retry, previous pass started an IO */
				1025	FOLL_TRIED = 1 << 17,
				1026	/* we are working on non-current tsk/mm */
				1027	FOLL_REMOTE = 1 << 18,
				1028	/* pages must be released via unpin_user_page */
				1029	FOLL_PIN = 1 << 19,
				1030	/* gup_fast: prevent fall-back to slow gup */
				1031	FOLL_FAST_ONLY = 1 << 20,
				1032	/* allow unlocking the mmap lock */
				1033	FOLL_UNLOCKABLE = 1 << 21,
				1034	};
				1035
Lorenzo Stoakes	0f20bba	2023-10-03 00:14:52 +0100	[diff] [blame]	1036	#define INTERNAL_GUP_FLAGS (FOLL_TOUCH \| FOLL_TRIED \| FOLL_REMOTE \| FOLL_PIN \| \
				1037	FOLL_FAST_ONLY \| FOLL_UNLOCKABLE)
				1038
Jason Gunthorpe	63b6051	2023-01-24 16:34:33 -0400	[diff] [blame]	1039	/*
				1040	* Indicates for which pages that are write-protected in the page table,
				1041	* whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the
				1042	* GUP pin will remain consistent with the pages mapped into the page tables
				1043	* of the MM.
				1044	*
				1045	* Temporary unmapping of PageAnonExclusive() pages or clearing of
				1046	* PageAnonExclusive() has to protect against concurrent GUP:
				1047	* * Ordinary GUP: Using the PT lock
				1048	* * GUP-fast and fork(): mm->write_protect_seq
				1049	* * GUP-fast and KSM or temporary unmapping (swap, migration): see
				1050	* page_try_share_anon_rmap()
				1051	*
				1052	* Must be called with the (sub)page that's actually referenced via the
				1053	* page table entry, which might not necessarily be the head page for a
				1054	* PTE-mapped THP.
				1055	*
				1056	* If the vma is NULL, we're coming from the GUP-fast path and might have
				1057	* to fallback to the slow path just to lookup the vma.
				1058	*/
				1059	static inline bool gup_must_unshare(struct vm_area_struct *vma,
				1060	unsigned int flags, struct page *page)
				1061	{
				1062	/*
				1063	* FOLL_WRITE is implicitly handled correctly as the page table entry
				1064	* has to be writable -- and if it references (part of) an anonymous
				1065	* folio, that part is required to be marked exclusive.
				1066	*/
				1067	if ((flags & (FOLL_WRITE \| FOLL_PIN)) != FOLL_PIN)
				1068	return false;
				1069	/*
				1070	* Note: PageAnon(page) is stable until the page is actually getting
				1071	* freed.
				1072	*/
				1073	if (!PageAnon(page)) {
				1074	/*
				1075	* We only care about R/O long-term pining: R/O short-term
				1076	* pinning does not have the semantics to observe successive
				1077	* changes through the process page tables.
				1078	*/
				1079	if (!(flags & FOLL_LONGTERM))
				1080	return false;
				1081
				1082	/* We really need the vma ... */
				1083	if (!vma)
				1084	return true;
				1085
				1086	/*
				1087	* ... because we only care about writable private ("COW")
				1088	* mappings where we have to break COW early.
				1089	*/
				1090	return is_cow_mapping(vma->vm_flags);
				1091	}
				1092
				1093	/* Paired with a memory barrier in page_try_share_anon_rmap(). */
				1094	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
				1095	smp_rmb();
				1096
				1097	/*
David Hildenbrand	5805192	2023-08-05 12:12:56 +0200	[diff] [blame]	1098	* During GUP-fast we might not get called on the head page for a
				1099	* hugetlb page that is mapped using cont-PTE, because GUP-fast does
				1100	* not work with the abstracted hugetlb PTEs that always point at the
				1101	* head page. For hugetlb, PageAnonExclusive only applies on the head
				1102	* page (as it cannot be partially COW-shared), so lookup the head page.
				1103	*/
				1104	if (unlikely(!PageHead(page) && PageHuge(page)))
				1105	page = compound_head(page);
				1106
				1107	/*
Jason Gunthorpe	63b6051	2023-01-24 16:34:33 -0400	[diff] [blame]	1108	* Note that PageKsm() pages cannot be exclusive, and consequently,
				1109	* cannot get pinned.
				1110	*/
				1111	return !PageAnonExclusive(page);
				1112	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1113
Ma Wupeng	902c2d9	2022-06-14 17:21:56 +0800	[diff] [blame]	1114	extern bool mirrored_kernelcore;
Ma Wupeng	0db31d6	2023-08-02 15:23:28 +0800	[diff] [blame]	1115	extern bool memblock_has_mirror(void);
Ma Wupeng	902c2d9	2022-06-14 17:21:56 +0800	[diff] [blame]	1116
Peter Xu	76aefad	2022-07-25 10:20:46 -0400	[diff] [blame]	1117	static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
				1118	{
				1119	/*
				1120	* NOTE: we must check this before VM_SOFTDIRTY on soft-dirty
				1121	* enablements, because when without soft-dirty being compiled in,
				1122	* VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
				1123	* will be constantly true.
				1124	*/
				1125	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
				1126	return false;
				1127
				1128	/*
				1129	* Soft-dirty is kind of special: its tracking is enabled when the
				1130	* vma flags not set.
				1131	*/
				1132	return !(vma->vm_flags & VM_SOFTDIRTY);
				1133	}
				1134
Liam R. Howlett	53bee98	2023-07-24 14:31:48 -0400	[diff] [blame]	1135	static inline void vma_iter_config(struct vma_iterator *vmi,
				1136	unsigned long index, unsigned long last)
				1137	{
				1138	MAS_BUG_ON(&vmi->mas, vmi->mas.node != MAS_START &&
				1139	(vmi->mas.index > index \|\| vmi->mas.last < index));
				1140	__mas_set_range(&vmi->mas, index, last - 1);
				1141	}
				1142
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1143	/*
				1144	* VMA Iterator functions shared between nommu and mmap
				1145	*/
Liam R. Howlett	b5df092	2023-07-24 14:31:52 -0400	[diff] [blame]	1146	static inline int vma_iter_prealloc(struct vma_iterator *vmi,
				1147	struct vm_area_struct *vma)
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1148	{
Liam R. Howlett	b5df092	2023-07-24 14:31:52 -0400	[diff] [blame]	1149	return mas_preallocate(&vmi->mas, vma, GFP_KERNEL);
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1150	}
				1151
Liam R. Howlett	b5df092	2023-07-24 14:31:52 -0400	[diff] [blame]	1152	static inline void vma_iter_clear(struct vma_iterator *vmi)
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1153	{
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1154	mas_store_prealloc(&vmi->mas, NULL);
				1155	}
				1156
Liam R. Howlett	f72cf24	2023-07-24 14:31:51 -0400	[diff] [blame]	1157	static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
				1158	unsigned long start, unsigned long end, gfp_t gfp)
				1159	{
Liam R. Howlett	b5df092	2023-07-24 14:31:52 -0400	[diff] [blame]	1160	__mas_set_range(&vmi->mas, start, end - 1);
Liam R. Howlett	f72cf24	2023-07-24 14:31:51 -0400	[diff] [blame]	1161	mas_store_gfp(&vmi->mas, NULL, gfp);
				1162	if (unlikely(mas_is_err(&vmi->mas)))
				1163	return -ENOMEM;
				1164
				1165	return 0;
				1166	}
				1167
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1168	static inline struct vm_area_struct vma_iter_load(struct vma_iterator vmi)
				1169	{
				1170	return mas_walk(&vmi->mas);
				1171	}
				1172
				1173	/* Store a VMA with preallocated memory */
				1174	static inline void vma_iter_store(struct vma_iterator *vmi,
				1175	struct vm_area_struct *vma)
				1176	{
				1177
				1178	#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
Liam R. Howlett	36bd931	2023-05-18 10:55:27 -0400	[diff] [blame]	1179	if (MAS_WARN_ON(&vmi->mas, vmi->mas.node != MAS_START &&
				1180	vmi->mas.index > vma->vm_start)) {
				1181	pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n",
				1182	vmi->mas.index, vma->vm_start, vma->vm_start,
				1183	vma->vm_end, vmi->mas.index, vmi->mas.last);
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1184	}
Liam R. Howlett	36bd931	2023-05-18 10:55:27 -0400	[diff] [blame]	1185	if (MAS_WARN_ON(&vmi->mas, vmi->mas.node != MAS_START &&
				1186	vmi->mas.last < vma->vm_start)) {
				1187	pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n",
				1188	vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end,
				1189	vmi->mas.index, vmi->mas.last);
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1190	}
				1191	#endif
				1192
				1193	if (vmi->mas.node != MAS_START &&
				1194	((vmi->mas.index > vma->vm_start) \|\| (vmi->mas.last < vma->vm_start)))
				1195	vma_iter_invalidate(vmi);
				1196
Liam R. Howlett	b5df092	2023-07-24 14:31:52 -0400	[diff] [blame]	1197	__mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1198	mas_store_prealloc(&vmi->mas, vma);
				1199	}
				1200
				1201	static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
				1202	struct vm_area_struct *vma, gfp_t gfp)
				1203	{
				1204	if (vmi->mas.node != MAS_START &&
				1205	((vmi->mas.index > vma->vm_start) \|\| (vmi->mas.last < vma->vm_start)))
				1206	vma_iter_invalidate(vmi);
				1207
Liam R. Howlett	b5df092	2023-07-24 14:31:52 -0400	[diff] [blame]	1208	__mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
Liam R. Howlett	b62b633	2023-01-20 11:26:08 -0500	[diff] [blame]	1209	mas_store_gfp(&vmi->mas, vma, gfp);
				1210	if (unlikely(mas_is_err(&vmi->mas)))
				1211	return -ENOMEM;
				1212
				1213	return 0;
				1214	}
Liam R. Howlett	440703e	2023-01-20 11:26:41 -0500	[diff] [blame]	1215
				1216	/*
				1217	* VMA lock generalization
				1218	*/
				1219	struct vma_prepare {
				1220	struct vm_area_struct *vma;
				1221	struct vm_area_struct *adj_next;
				1222	struct file *file;
				1223	struct address_space *mapping;
				1224	struct anon_vma *anon_vma;
				1225	struct vm_area_struct *insert;
				1226	struct vm_area_struct *remove;
				1227	struct vm_area_struct *remove2;
				1228	};
Qi Zheng	3ee0aa9	2023-09-11 17:25:14 +0800	[diff] [blame]	1229
Usama Arif	fde1c4e	2023-09-13 11:54:01 +0100	[diff] [blame]	1230	void __meminit __init_single_page(struct page *page, unsigned long pfn,
				1231	unsigned long zone, int nid);
				1232
Qi Zheng	3ee0aa9	2023-09-11 17:25:14 +0800	[diff] [blame]	1233	/* shrinker related functions */
Qi Zheng	96f7b2b	2023-09-11 17:25:15 +0800	[diff] [blame]	1234	unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
				1235	int priority);
Qi Zheng	3ee0aa9	2023-09-11 17:25:14 +0800	[diff] [blame]	1236
				1237	#ifdef CONFIG_SHRINKER_DEBUG
Lucy Mielke	f04eba1	2023-10-06 22:30:51 +0200	[diff] [blame]	1238	static inline __printf(2, 0) int shrinker_debugfs_name_alloc(
				1239	struct shrinker shrinker, const char fmt, va_list ap)
Qi Zheng	c42d50a	2023-09-11 17:44:00 +0800	[diff] [blame]	1240	{
				1241	shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap);
				1242
				1243	return shrinker->name ? 0 : -ENOMEM;
				1244	}
				1245
				1246	static inline void shrinker_debugfs_name_free(struct shrinker *shrinker)
				1247	{
				1248	kfree_const(shrinker->name);
				1249	shrinker->name = NULL;
				1250	}
				1251
Qi Zheng	3ee0aa9	2023-09-11 17:25:14 +0800	[diff] [blame]	1252	extern int shrinker_debugfs_add(struct shrinker *shrinker);
				1253	extern struct dentry shrinker_debugfs_detach(struct shrinker shrinker,
				1254	int *debugfs_id);
				1255	extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
				1256	int debugfs_id);
				1257	#else /* CONFIG_SHRINKER_DEBUG */
				1258	static inline int shrinker_debugfs_add(struct shrinker *shrinker)
				1259	{
				1260	return 0;
				1261	}
Qi Zheng	c42d50a	2023-09-11 17:44:00 +0800	[diff] [blame]	1262	static inline int shrinker_debugfs_name_alloc(struct shrinker *shrinker,
				1263	const char *fmt, va_list ap)
				1264	{
				1265	return 0;
				1266	}
				1267	static inline void shrinker_debugfs_name_free(struct shrinker *shrinker)
				1268	{
				1269	}
Qi Zheng	3ee0aa9	2023-09-11 17:25:14 +0800	[diff] [blame]	1270	static inline struct dentry shrinker_debugfs_detach(struct shrinker shrinker,
				1271	int *debugfs_id)
				1272	{
				1273	*debugfs_id = -1;
				1274	return NULL;
				1275	}
				1276	static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
				1277	int debugfs_id)
				1278	{
				1279	}
				1280	#endif /* CONFIG_SHRINKER_DEBUG */
				1281
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1282	#endif /* __MM_INTERNAL_H */