mm/vmscan.c: avoid throttling reclaim for loop-back nfsd threads When a loopback NFS mount is active and the backing device for the NFS mount becomes congested, that can impose throttling delays on the nfsd threads. These delays significantly reduce throughput and so the NFS mount remains congested. This results in a livelock and the reduced throughput persists. This livelock has been found in testing with the 'wait_iff_congested' call, and could possibly be caused by the 'congestion_wait' call. This livelock is similar to the deadlock which justified the introduction of PF_LESS_THROTTLE, and the same flag can be used to remove this livelock. To minimise the impact of the change, we still throttle nfsd when the filesystem it is writing to is congested, but not when some separate filesystem (e.g. the NFS filesystem) is congested. Signed-off-by: NeilBrown <neilb@suse.de> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: 399ba0b95670c70aaaa3f4f1623ea9e76c391681 [log] [tgz]
author: NeilBrown <neilb@suse.de> Wed Jun 04 16:07:42 2014 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Wed Jun 04 16:54:01 2014 -0700
tree: 6db21b9d0b97a6015654f6806eb4101c61ea949d
parent: 11de9927f9dd3cb0a0f18064fa4b6976fc37e79c [diff] [blame]
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 53e4534..5a8776e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -1439,6 +1439,19 @@
 }
 
 /*
+ * If a kernel thread (such as nfsd for loop-back mounts) services
+ * a backing device by writing to the page cache it sets PF_LESS_THROTTLE.
+ * In that case we should only throttle if the backing device it is
+ * writing to is congested.  In other cases it is safe to throttle.
+ */
+static int current_may_throttle(void)
+{
+	return !(current->flags & PF_LESS_THROTTLE) ||
+		current->backing_dev_info == NULL ||
+		bdi_write_congested(current->backing_dev_info);
+}
+
+/*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
  */
@@ -1566,7 +1579,8 @@
 		 * implies that pages are cycling through the LRU faster than
 		 * they are written so also forcibly stall.
 		 */
-		if (nr_unqueued_dirty == nr_taken || nr_immediate)
+		if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
+		    current_may_throttle())
 			congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 
@@ -1575,7 +1589,8 @@
 	 * is congested. Allow kswapd to continue until it starts encountering
 	 * unqueued dirty pages or cycling through the LRU too quickly.
 	 */
-	if (!sc->hibernation_mode && !current_is_kswapd())
+	if (!sc->hibernation_mode && !current_is_kswapd() &&
+	    current_may_throttle())
 		wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
 
 	trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
commit	399ba0b95670c70aaaa3f4f1623ea9e76c391681	[log] [tgz]
author	NeilBrown <neilb@suse.de>	Wed Jun 04 16:07:42 2014 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Wed Jun 04 16:54:01 2014 -0700
tree	6db21b9d0b97a6015654f6806eb4101c61ea949d
parent	11de9927f9dd3cb0a0f18064fa4b6976fc37e79c [diff] [blame]