Blame - mm/mlock.c - linux

blob: c83896a725042d45694777305107fd33ba9c9c28 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/mm/mlock.c
				3	*
				4	* (C) Copyright 1995 Linus Torvalds
				5	* (C) Copyright 2002 Christoph Hellwig
				6	*/
				7
Randy.Dunlap	c59ede7	2006-01-11 12:17:46 -0800	[diff] [blame]	8	#include <linux/capability.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	9	#include <linux/mman.h>
				10	#include <linux/mm.h>
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	11	#include <linux/swap.h>
				12	#include <linux/swapops.h>
				13	#include <linux/pagemap.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include <linux/mempolicy.h>
				15	#include <linux/syscalls.h>
Alexey Dobriyan	e8edc6e	2007-05-21 01:22:52 +0400	[diff] [blame]	16	#include <linux/sched.h>
				17	#include <linux/module.h>
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	18	#include <linux/rmap.h>
				19	#include <linux/mmzone.h>
				20	#include <linux/hugetlb.h>
				21
				22	#include "internal.h"
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	23
Alexey Dobriyan	e8edc6e	2007-05-21 01:22:52 +0400	[diff] [blame]	24	int can_do_mlock(void)
				25	{
				26	if (capable(CAP_IPC_LOCK))
				27	return 1;
				28	if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
				29	return 1;
				30	return 0;
				31	}
				32	EXPORT_SYMBOL(can_do_mlock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	34	#ifdef CONFIG_UNEVICTABLE_LRU
				35	/*
				36	* Mlocked pages are marked with PageMlocked() flag for efficient testing
				37	* in vmscan and, possibly, the fault path; and to support semi-accurate
				38	* statistics.
				39	*
				40	* An mlocked page [PageMlocked(page)] is unevictable. As such, it will
				41	* be placed on the LRU "unevictable" list, rather than the [in]active lists.
				42	* The unevictable list is an LRU sibling list to the [in]active lists.
				43	* PageUnevictable is set to indicate the unevictable state.
				44	*
				45	* When lazy mlocking via vmscan, it is important to ensure that the
				46	* vma's VM_LOCKED status is not concurrently being modified, otherwise we
				47	* may have mlocked a page that is being munlocked. So lazy mlock must take
				48	* the mmap_sem for read, and verify that the vma really is locked
				49	* (see mm/rmap.c).
				50	*/
				51
				52	/*
				53	* LRU accounting for clear_page_mlock()
				54	*/
				55	void __clear_page_mlock(struct page *page)
				56	{
				57	VM_BUG_ON(!PageLocked(page));
				58
				59	if (!page->mapping) { /* truncated ? */
				60	return;
				61	}
				62
				63	if (!isolate_lru_page(page)) {
				64	putback_lru_page(page);
				65	} else {
				66	/*
				67	* Page not on the LRU yet. Flush all pagevecs and retry.
				68	*/
				69	lru_add_drain_all();
				70	if (!isolate_lru_page(page))
				71	putback_lru_page(page);
				72	}
				73	}
				74
				75	/*
				76	* Mark page as mlocked if not already.
				77	* If page on LRU, isolate and putback to move to unevictable list.
				78	*/
				79	void mlock_vma_page(struct page *page)
				80	{
				81	BUG_ON(!PageLocked(page));
				82
				83	if (!TestSetPageMlocked(page) && !isolate_lru_page(page))
				84	putback_lru_page(page);
				85	}
				86
				87	/*
				88	* called from munlock()/munmap() path with page supposedly on the LRU.
				89	*
				90	* Note: unlike mlock_vma_page(), we can't just clear the PageMlocked
				91	* [in try_to_munlock()] and then attempt to isolate the page. We must
				92	* isolate the page to keep others from messing with its unevictable
				93	* and mlocked state while trying to munlock. However, we pre-clear the
				94	* mlocked state anyway as we might lose the isolation race and we might
				95	* not get another chance to clear PageMlocked. If we successfully
				96	* isolate the page and try_to_munlock() detects other VM_LOCKED vmas
				97	* mapping the page, it will restore the PageMlocked state, unless the page
				98	* is mapped in a non-linear vma. So, we go ahead and SetPageMlocked(),
				99	* perhaps redundantly.
				100	* If we lose the isolation race, and the page is mapped by other VM_LOCKED
				101	* vmas, we'll detect this in vmscan--via try_to_munlock() or try_to_unmap()
				102	* either of which will restore the PageMlocked state by calling
				103	* mlock_vma_page() above, if it can grab the vma's mmap sem.
				104	*/
				105	static void munlock_vma_page(struct page *page)
				106	{
				107	BUG_ON(!PageLocked(page));
				108
				109	if (TestClearPageMlocked(page) && !isolate_lru_page(page)) {
				110	try_to_munlock(page);
				111	putback_lru_page(page);
				112	}
				113	}
				114
				115	/*
				116	* mlock a range of pages in the vma.
				117	*
				118	* This takes care of making the pages present too.
				119	*
				120	* vma->vm_mm->mmap_sem must be held for write.
				121	*/
				122	static int __mlock_vma_pages_range(struct vm_area_struct *vma,
				123	unsigned long start, unsigned long end)
				124	{
				125	struct mm_struct *mm = vma->vm_mm;
				126	unsigned long addr = start;
				127	struct page pages[16]; / 16 gives a reasonable batch */
				128	int write = !!(vma->vm_flags & VM_WRITE);
				129	int nr_pages = (end - start) / PAGE_SIZE;
				130	int ret;
				131
				132	VM_BUG_ON(start & ~PAGE_MASK \|\| end & ~PAGE_MASK);
				133	VM_BUG_ON(start < vma->vm_start \|\| end > vma->vm_end);
				134	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
				135
				136	lru_add_drain_all(); /* push cached pages to LRU */
				137
				138	while (nr_pages > 0) {
				139	int i;
				140
				141	cond_resched();
				142
				143	/*
				144	* get_user_pages makes pages present if we are
				145	* setting mlock. and this extra reference count will
				146	* disable migration of this page. However, page may
				147	* still be truncated out from under us.
				148	*/
				149	ret = get_user_pages(current, mm, addr,
				150	min_t(int, nr_pages, ARRAY_SIZE(pages)),
				151	write, 0, pages, NULL);
				152	/*
				153	* This can happen for, e.g., VM_NONLINEAR regions before
				154	* a page has been allocated and mapped at a given offset,
				155	* or for addresses that map beyond end of a file.
				156	* We'll mlock the the pages if/when they get faulted in.
				157	*/
				158	if (ret < 0)
				159	break;
				160	if (ret == 0) {
				161	/*
				162	* We know the vma is there, so the only time
				163	* we cannot get a single page should be an
				164	* error (ret < 0) case.
				165	*/
				166	WARN_ON(1);
				167	break;
				168	}
				169
				170	lru_add_drain(); /* push cached pages to LRU */
				171
				172	for (i = 0; i < ret; i++) {
				173	struct page *page = pages[i];
				174
				175	lock_page(page);
				176	/*
				177	* Because we lock page here and migration is blocked
				178	* by the elevated reference, we need only check for
				179	* page truncation (file-cache only).
				180	*/
				181	if (page->mapping)
				182	mlock_vma_page(page);
				183	unlock_page(page);
				184	put_page(page); /* ref from get_user_pages() */
				185
				186	/*
				187	* here we assume that get_user_pages() has given us
				188	* a list of virtually contiguous pages.
				189	*/
				190	addr += PAGE_SIZE; /* for next get_user_pages() */
				191	nr_pages--;
				192	}
				193	}
				194
				195	lru_add_drain_all(); /* to update stats */
				196
				197	return 0; /* count entire vma as locked_vm */
				198	}
				199
				200	/*
				201	* private structure for munlock page table walk
				202	*/
				203	struct munlock_page_walk {
				204	struct vm_area_struct *vma;
				205	pmd_t pmd; / for migration_entry_wait() */
				206	};
				207
				208	/*
				209	* munlock normal pages for present ptes
				210	*/
				211	static int __munlock_pte_handler(pte_t *ptep, unsigned long addr,
				212	unsigned long end, struct mm_walk *walk)
				213	{
				214	struct munlock_page_walk *mpw = walk->private;
				215	swp_entry_t entry;
				216	struct page *page;
				217	pte_t pte;
				218
				219	retry:
				220	pte = *ptep;
				221	/*
				222	* If it's a swap pte, we might be racing with page migration.
				223	*/
				224	if (unlikely(!pte_present(pte))) {
				225	if (!is_swap_pte(pte))
				226	goto out;
				227	entry = pte_to_swp_entry(pte);
				228	if (is_migration_entry(entry)) {
				229	migration_entry_wait(mpw->vma->vm_mm, mpw->pmd, addr);
				230	goto retry;
				231	}
				232	goto out;
				233	}
				234
				235	page = vm_normal_page(mpw->vma, addr, pte);
				236	if (!page)
				237	goto out;
				238
				239	lock_page(page);
				240	if (!page->mapping) {
				241	unlock_page(page);
				242	goto retry;
				243	}
				244	munlock_vma_page(page);
				245	unlock_page(page);
				246
				247	out:
				248	return 0;
				249	}
				250
				251	/*
				252	* Save pmd for pte handler for waiting on migration entries
				253	*/
				254	static int __munlock_pmd_handler(pmd_t *pmd, unsigned long addr,
				255	unsigned long end, struct mm_walk *walk)
				256	{
				257	struct munlock_page_walk *mpw = walk->private;
				258
				259	mpw->pmd = pmd;
				260	return 0;
				261	}
				262
				263
				264	/*
				265	* munlock a range of pages in the vma using standard page table walk.
				266	*
				267	* vma->vm_mm->mmap_sem must be held for write.
				268	*/
				269	static void __munlock_vma_pages_range(struct vm_area_struct *vma,
				270	unsigned long start, unsigned long end)
				271	{
				272	struct mm_struct *mm = vma->vm_mm;
				273	struct munlock_page_walk mpw = {
				274	.vma = vma,
				275	};
				276	struct mm_walk munlock_page_walk = {
				277	.pmd_entry = __munlock_pmd_handler,
				278	.pte_entry = __munlock_pte_handler,
				279	.private = &mpw,
				280	.mm = mm,
				281	};
				282
				283	VM_BUG_ON(start & ~PAGE_MASK \|\| end & ~PAGE_MASK);
				284	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
				285	VM_BUG_ON(start < vma->vm_start);
				286	VM_BUG_ON(end > vma->vm_end);
				287
				288	lru_add_drain_all(); /* push cached pages to LRU */
				289	walk_page_range(start, end, &munlock_page_walk);
				290	lru_add_drain_all(); /* to update stats */
				291	}
				292
				293	#else /* CONFIG_UNEVICTABLE_LRU */
				294
				295	/*
				296	* Just make pages present if VM_LOCKED. No-op if unlocking.
				297	*/
				298	static int __mlock_vma_pages_range(struct vm_area_struct *vma,
				299	unsigned long start, unsigned long end)
				300	{
				301	if (vma->vm_flags & VM_LOCKED)
				302	make_pages_present(start, end);
				303	return 0;
				304	}
				305
				306	/*
				307	* munlock a range of pages in the vma -- no-op.
				308	*/
				309	static void __munlock_vma_pages_range(struct vm_area_struct *vma,
				310	unsigned long start, unsigned long end)
				311	{
				312	}
				313	#endif /* CONFIG_UNEVICTABLE_LRU */
				314
				315	/*
				316	* mlock all pages in this vma range. For mmap()/mremap()/...
				317	*/
				318	int mlock_vma_pages_range(struct vm_area_struct *vma,
				319	unsigned long start, unsigned long end)
				320	{
Lee Schermerhorn	8edb08c	2008-10-18 20:26:49 -0700	[diff] [blame^]	321	struct mm_struct *mm = vma->vm_mm;
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	322	int nr_pages = (end - start) / PAGE_SIZE;
				323	BUG_ON(!(vma->vm_flags & VM_LOCKED));
				324
				325	/*
				326	* filter unlockable vmas
				327	*/
				328	if (vma->vm_flags & (VM_IO \| VM_PFNMAP))
				329	goto no_mlock;
				330
				331	if (!((vma->vm_flags & (VM_DONTEXPAND \| VM_RESERVED)) \|\|
				332	is_vm_hugetlb_page(vma) \|\|
Lee Schermerhorn	8edb08c	2008-10-18 20:26:49 -0700	[diff] [blame^]	333	vma == get_gate_vma(current))) {
				334	downgrade_write(&mm->mmap_sem);
				335	nr_pages = __mlock_vma_pages_range(vma, start, end);
				336
				337	up_read(&mm->mmap_sem);
				338	/* vma can change or disappear */
				339	down_write(&mm->mmap_sem);
				340	vma = find_vma(mm, start);
				341	/* non-NULL vma must contain @start, but need to check @end */
				342	if (!vma \|\| end > vma->vm_end)
				343	return -EAGAIN;
				344	return nr_pages;
				345	}
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	346
				347	/*
				348	* User mapped kernel pages or huge pages:
				349	* make these pages present to populate the ptes, but
				350	* fall thru' to reset VM_LOCKED--no need to unlock, and
				351	* return nr_pages so these don't get counted against task's
				352	* locked limit. huge pages are already counted against
				353	* locked vm limit.
				354	*/
				355	make_pages_present(start, end);
				356
				357	no_mlock:
				358	vma->vm_flags &= ~VM_LOCKED; /* and don't come back! */
				359	return nr_pages; /* pages NOT mlocked */
				360	}
				361
				362
				363	/*
				364	* munlock all pages in vma. For munmap() and exit().
				365	*/
				366	void munlock_vma_pages_all(struct vm_area_struct *vma)
				367	{
				368	vma->vm_flags &= ~VM_LOCKED;
				369	__munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end);
				370	}
				371
				372	/*
				373	* mlock_fixup - handle mlock[all]/munlock[all] requests.
				374	*
				375	* Filters out "special" vmas -- VM_LOCKED never gets set for these, and
				376	* munlock is a no-op. However, for some special vmas, we go ahead and
				377	* populate the ptes via make_pages_present().
				378	*
				379	* For vmas that pass the filters, merge/split as appropriate.
				380	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	381	static int mlock_fixup(struct vm_area_struct vma, struct vm_area_struct *prev,
				382	unsigned long start, unsigned long end, unsigned int newflags)
				383	{
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	384	struct mm_struct *mm = vma->vm_mm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	385	pgoff_t pgoff;
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	386	int nr_pages;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	387	int ret = 0;
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	388	int lock = newflags & VM_LOCKED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	389
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	390	if (newflags == vma->vm_flags \|\|
				391	(vma->vm_flags & (VM_IO \| VM_PFNMAP)))
				392	goto out; /* don't set VM_LOCKED, don't count */
				393
				394	if ((vma->vm_flags & (VM_DONTEXPAND \| VM_RESERVED)) \|\|
				395	is_vm_hugetlb_page(vma) \|\|
				396	vma == get_gate_vma(current)) {
				397	if (lock)
				398	make_pages_present(start, end);
				399	goto out; /* don't set VM_LOCKED, don't count */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	400	}
				401
				402	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
				403	prev = vma_merge(mm, prev, start, end, newflags, vma->anon_vma,
				404	vma->vm_file, pgoff, vma_policy(vma));
				405	if (*prev) {
				406	vma = *prev;
				407	goto success;
				408	}
				409
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	410	if (start != vma->vm_start) {
				411	ret = split_vma(mm, vma, start, 1);
				412	if (ret)
				413	goto out;
				414	}
				415
				416	if (end != vma->vm_end) {
				417	ret = split_vma(mm, vma, end, 0);
				418	if (ret)
				419	goto out;
				420	}
				421
				422	success:
				423	/*
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	424	* Keep track of amount of locked VM.
				425	*/
				426	nr_pages = (end - start) >> PAGE_SHIFT;
				427	if (!lock)
				428	nr_pages = -nr_pages;
				429	mm->locked_vm += nr_pages;
				430
				431	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	432	* vm_flags is protected by the mmap_sem held in write mode.
				433	* It's okay if try_to_unmap_one unmaps a page just after we
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	434	* set VM_LOCKED, __mlock_vma_pages_range will bring it back.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	435	*/
				436	vma->vm_flags = newflags;
				437
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	438	if (lock) {
Lee Schermerhorn	8edb08c	2008-10-18 20:26:49 -0700	[diff] [blame^]	439	/*
				440	* mmap_sem is currently held for write. Downgrade the write
				441	* lock to a read lock so that other faults, mmap scans, ...
				442	* while we fault in all pages.
				443	*/
				444	downgrade_write(&mm->mmap_sem);
				445
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	446	ret = __mlock_vma_pages_range(vma, start, end);
				447	if (ret > 0) {
				448	mm->locked_vm -= ret;
				449	ret = 0;
				450	}
Lee Schermerhorn	8edb08c	2008-10-18 20:26:49 -0700	[diff] [blame^]	451	/*
				452	* Need to reacquire mmap sem in write mode, as our callers
				453	* expect this. We have no support for atomically upgrading
				454	* a sem to write, so we need to check for ranges while sem
				455	* is unlocked.
				456	*/
				457	up_read(&mm->mmap_sem);
				458	/* vma can change or disappear */
				459	down_write(&mm->mmap_sem);
				460	*prev = find_vma(mm, start);
				461	/* non-NULL prev must contain @start, but need to check @end /
				462	if (!(prev) \|\| end > (prev)->vm_end)
				463	ret = -EAGAIN;
				464	} else {
				465	/*
				466	* TODO: for unlocking, pages will already be resident, so
				467	* we don't need to wait for allocations/reclaim/pagein, ...
				468	* However, unlocking a very large region can still take a
				469	* while. Should we downgrade the semaphore for both lock
				470	* AND unlock ?
				471	*/
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	472	__munlock_vma_pages_range(vma, start, end);
Lee Schermerhorn	8edb08c	2008-10-18 20:26:49 -0700	[diff] [blame^]	473	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	474
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	475	out:
Nick Piggin	b291f00	2008-10-18 20:26:44 -0700	[diff] [blame]	476	*prev = vma;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	477	return ret;
				478	}
				479
				480	static int do_mlock(unsigned long start, size_t len, int on)
				481	{
				482	unsigned long nstart, end, tmp;
				483	struct vm_area_struct * vma, * prev;
				484	int error;
				485
				486	len = PAGE_ALIGN(len);
				487	end = start + len;
				488	if (end < start)
				489	return -EINVAL;
				490	if (end == start)
				491	return 0;
				492	vma = find_vma_prev(current->mm, start, &prev);
				493	if (!vma \|\| vma->vm_start > start)
				494	return -ENOMEM;
				495
				496	if (start > vma->vm_start)
				497	prev = vma;
				498
				499	for (nstart = start ; ; ) {
				500	unsigned int newflags;
				501
				502	/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
				503
				504	newflags = vma->vm_flags \| VM_LOCKED;
				505	if (!on)
				506	newflags &= ~VM_LOCKED;
				507
				508	tmp = vma->vm_end;
				509	if (tmp > end)
				510	tmp = end;
				511	error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
				512	if (error)
				513	break;
				514	nstart = tmp;
				515	if (nstart < prev->vm_end)
				516	nstart = prev->vm_end;
				517	if (nstart >= end)
				518	break;
				519
				520	vma = prev->vm_next;
				521	if (!vma \|\| vma->vm_start != nstart) {
				522	error = -ENOMEM;
				523	break;
				524	}
				525	}
				526	return error;
				527	}
				528
				529	asmlinkage long sys_mlock(unsigned long start, size_t len)
				530	{
				531	unsigned long locked;
				532	unsigned long lock_limit;
				533	int error = -ENOMEM;
				534
				535	if (!can_do_mlock())
				536	return -EPERM;
				537
				538	down_write(&current->mm->mmap_sem);
				539	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
				540	start &= PAGE_MASK;
				541
				542	locked = len >> PAGE_SHIFT;
				543	locked += current->mm->locked_vm;
				544
				545	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
				546	lock_limit >>= PAGE_SHIFT;
				547
				548	/* check against resource limits */
				549	if ((locked <= lock_limit) \|\| capable(CAP_IPC_LOCK))
				550	error = do_mlock(start, len, 1);
				551	up_write(&current->mm->mmap_sem);
				552	return error;
				553	}
				554
				555	asmlinkage long sys_munlock(unsigned long start, size_t len)
				556	{
				557	int ret;
				558
				559	down_write(&current->mm->mmap_sem);
				560	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
				561	start &= PAGE_MASK;
				562	ret = do_mlock(start, len, 0);
				563	up_write(&current->mm->mmap_sem);
				564	return ret;
				565	}
				566
				567	static int do_mlockall(int flags)
				568	{
				569	struct vm_area_struct * vma, * prev = NULL;
				570	unsigned int def_flags = 0;
				571
				572	if (flags & MCL_FUTURE)
				573	def_flags = VM_LOCKED;
				574	current->mm->def_flags = def_flags;
				575	if (flags == MCL_FUTURE)
				576	goto out;
				577
				578	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
				579	unsigned int newflags;
				580
				581	newflags = vma->vm_flags \| VM_LOCKED;
				582	if (!(flags & MCL_CURRENT))
				583	newflags &= ~VM_LOCKED;
				584
				585	/* Ignore errors */
				586	mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
				587	}
				588	out:
				589	return 0;
				590	}
				591
				592	asmlinkage long sys_mlockall(int flags)
				593	{
				594	unsigned long lock_limit;
				595	int ret = -EINVAL;
				596
				597	if (!flags \|\| (flags & ~(MCL_CURRENT \| MCL_FUTURE)))
				598	goto out;
				599
				600	ret = -EPERM;
				601	if (!can_do_mlock())
				602	goto out;
				603
				604	down_write(&current->mm->mmap_sem);
				605
				606	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
				607	lock_limit >>= PAGE_SHIFT;
				608
				609	ret = -ENOMEM;
				610	if (!(flags & MCL_CURRENT) \|\| (current->mm->total_vm <= lock_limit) \|\|
				611	capable(CAP_IPC_LOCK))
				612	ret = do_mlockall(flags);
				613	up_write(&current->mm->mmap_sem);
				614	out:
				615	return ret;
				616	}
				617
				618	asmlinkage long sys_munlockall(void)
				619	{
				620	int ret;
				621
				622	down_write(&current->mm->mmap_sem);
				623	ret = do_mlockall(0);
				624	up_write(&current->mm->mmap_sem);
				625	return ret;
				626	}
				627
				628	/*
				629	* Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
				630	* shm segments) get accounted against the user_struct instead.
				631	*/
				632	static DEFINE_SPINLOCK(shmlock_user_lock);
				633
				634	int user_shm_lock(size_t size, struct user_struct *user)
				635	{
				636	unsigned long lock_limit, locked;
				637	int allowed = 0;
				638
				639	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
				640	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
Herbert van den Bergh	5ed44a4	2007-07-15 23:38:25 -0700	[diff] [blame]	641	if (lock_limit == RLIM_INFINITY)
				642	allowed = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	643	lock_limit >>= PAGE_SHIFT;
				644	spin_lock(&shmlock_user_lock);
Herbert van den Bergh	5ed44a4	2007-07-15 23:38:25 -0700	[diff] [blame]	645	if (!allowed &&
				646	locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	647	goto out;
				648	get_uid(user);
				649	user->locked_shm += locked;
				650	allowed = 1;
				651	out:
				652	spin_unlock(&shmlock_user_lock);
				653	return allowed;
				654	}
				655
				656	void user_shm_unlock(size_t size, struct user_struct *user)
				657	{
				658	spin_lock(&shmlock_user_lock);
				659	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
				660	spin_unlock(&shmlock_user_lock);
				661	free_uid(user);
				662	}