readahead: fault retry breaks mmap file read random detection
.fault now can retry. The retry can break state machine of .fault. In
filemap_fault, if page is miss, ra->mmap_miss is increased. In the second
try, since the page is in page cache now, ra->mmap_miss is decreased. And
these are done in one fault, so we can't detect random mmap file access.
Add a new flag to indicate .fault is tried once. In the second try, skip
ra->mmap_miss decreasing. The filemap_fault state machine is ok with it.
I only tested x86, didn't test other archs, but looks the change for other
archs is obvious, but who knows :)
Signed-off-by: Shaohua Li <shaohua.li@fusionio.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c3bd8345..5dbf13f 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -336,6 +336,7 @@
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index b92e609..b2f2d2d 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -152,6 +152,7 @@
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would have
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 45fd542..73312ab 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -186,6 +186,7 @@
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 06695cc..513b74c 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -113,6 +113,7 @@
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 8443daf..6cf0341 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -184,6 +184,7 @@
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index aeebbb7..a563727 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -170,6 +170,7 @@
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index eb365d67..714b35a 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -233,6 +233,7 @@
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index c14f6df..9f51348 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,6 +171,7 @@
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 40f850e..e2bfafc 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -183,6 +183,7 @@
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5495ebe9..0a6b283 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -451,6 +451,7 @@
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac9122c..04ad400 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -367,6 +367,7 @@
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
down_read(&mm->mmap_sem);
goto retry;
}
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 3bdc1ad..cbbdcad8 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -504,6 +504,7 @@
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 77ac917..e98bfda 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -265,6 +265,7 @@
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 1fe0429..413d292 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -452,6 +452,7 @@
}
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 84ce7ab..fe811fa 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -454,6 +454,7 @@
tsk->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/*
* No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0353b98..0f00e9c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -89,6 +89,7 @@
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a530b23..8e13ecb 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1220,6 +1220,7 @@
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
* of starvation. */
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 5a74c53..2c2f710 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -126,6 +126,7 @@
current->min_flt++;
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
/* No need to up_read(&mm->mmap_sem) as we would
* have already released it in __lock_page_or_retry
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b01e585..bcaab4e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -161,6 +161,7 @@
#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
+#define FAULT_FLAG_TRIED 0x40 /* second try */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
diff --git a/mm/filemap.c b/mm/filemap.c
index a9827b4..83efee7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1607,13 +1607,13 @@
* Do we have something in the page cache already?
*/
page = find_get_page(mapping, offset);
- if (likely(page)) {
+ if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
/*
* We found the page, so try async readahead before
* waiting for the lock.
*/
do_async_mmap_readahead(vma, ra, file, page, offset);
- } else {
+ } else if (!page) {
/* No page in the page cache at all */
do_sync_mmap_readahead(vma, ra, file, offset);
count_vm_event(PGMAJFAULT);