vmscan: stop kswapd waiting on congestion when the min watermark is not being met If reclaim fails to make sufficient progress, the priority is raised. Once the priority is higher, kswapd starts waiting on congestion. However, if the zone is below the min watermark then kswapd needs to continue working without delay as there is a danger of an increased rate of GFP_ATOMIC allocation failure. This patch changes the conditions under which kswapd waits on congestion by only going to sleep if the min watermarks are being met. [mel@csn.ul.ie: add stats to track how relevant the logic is] [mel@csn.ul.ie: make kswapd only check its own zones and rename the relevant counters] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: bb3ab596832b920c703d1aea1ce76d69c0f71fb7 [log] [tgz]
author: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Mon Dec 14 17:58:55 2009 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Tue Dec 15 08:53:16 2009 -0800
tree: 048162bf5081e7bd9802eba012e8b680a4444da8
parent: f50de2d3811081957156b5d736778799379c29de [diff] [blame]
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e176bd3..cb69f71 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -1905,19 +1905,25 @@
 #endif
 
 /* is kswapd sleeping prematurely? */
-static int sleeping_prematurely(int order, long remaining)
+static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 {
-	struct zone *zone;
+	int i;
 
 	/* If a direct reclaimer woke kswapd within HZ/10, it's premature */
 	if (remaining)
 		return 1;
 
 	/* If after HZ/10, a zone is below the high mark, it's premature */
-	for_each_populated_zone(zone)
+	for (i = 0; i < pgdat->nr_zones; i++) {
+		struct zone *zone = pgdat->node_zones + i;
+
+		if (!populated_zone(zone))
+			continue;
+
 		if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
 								0, 0))
 			return 1;
+	}
 
 	return 0;
 }
@@ -1979,6 +1985,7 @@
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 		unsigned long lru_pages = 0;
+		int has_under_min_watermark_zone = 0;
 
 		/* The swap token gets in the way of swapout... */
 		if (!priority)
@@ -2085,6 +2092,15 @@
 			if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
 			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
 				sc.may_writepage = 1;
+
+			/*
+			 * We are still under min water mark. it mean we have
+			 * GFP_ATOMIC allocation failure risk. Hurry up!
+			 */
+			if (!zone_watermark_ok(zone, order, min_wmark_pages(zone),
+					      end_zone, 0))
+				has_under_min_watermark_zone = 1;
+
 		}
 		if (all_zones_ok)
 			break;		/* kswapd: all done */
@@ -2092,8 +2108,12 @@
 		 * OK, kswapd is getting into trouble.  Take a nap, then take
 		 * another pass across the zones.
 		 */
-		if (total_scanned && priority < DEF_PRIORITY - 2)
-			congestion_wait(BLK_RW_ASYNC, HZ/10);
+		if (total_scanned && (priority < DEF_PRIORITY - 2)) {
+			if (has_under_min_watermark_zone)
+				count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
+			else
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
+		}
 
 		/*
 		 * We do this so kswapd doesn't build up large priorities for
@@ -2207,7 +2227,7 @@
 				long remaining = 0;
 
 				/* Try to sleep for a short interval */
-				if (!sleeping_prematurely(order, remaining)) {
+				if (!sleeping_prematurely(pgdat, order, remaining)) {
 					remaining = schedule_timeout(HZ/10);
 					finish_wait(&pgdat->kswapd_wait, &wait);
 					prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -2218,13 +2238,13 @@
 				 * premature sleep. If not, then go fully
 				 * to sleep until explicitly woken up
 				 */
-				if (!sleeping_prematurely(order, remaining))
+				if (!sleeping_prematurely(pgdat, order, remaining))
 					schedule();
 				else {
 					if (remaining)
-						count_vm_event(KSWAPD_PREMATURE_FAST);
+						count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
 					else
-						count_vm_event(KSWAPD_PREMATURE_SLOW);
+						count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
 				}
 			}
commit	bb3ab596832b920c703d1aea1ce76d69c0f71fb7	[log] [tgz]
author	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>	Mon Dec 14 17:58:55 2009 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Tue Dec 15 08:53:16 2009 -0800
tree	048162bf5081e7bd9802eba012e8b680a4444da8
parent	f50de2d3811081957156b5d736778799379c29de [diff] [blame]