Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net Pull networking fixes from David Miller: 1) Netlink socket dumping had several missing verifications and checks. In particular, address comparisons in the request byte code interpreter could access past the end of the address in the inet_request_sock. Also, address family and address prefix lengths were not validated properly at all. This means arbitrary applications can read past the end of certain kernel data structures. Fixes from Neal Cardwell. 2) ip_check_defrag() operates in contexts where we're in the process of, or about to, input the packet into the real protocols (specifically macvlan and AF_PACKET snooping). Unfortunately, it does a pskb_may_pull() which can modify the backing packet data which is not legal if the SKB is shared. It very much can be shared in this context. Deal with the possibility that the SKB is segmented by using skb_copy_bits(). Fix from Johannes Berg based upon a report by Eric Leblond. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: ipv4: ip_check_defrag must not modify skb before unsharing inet_diag: validate port comparison byte code to prevent unsafe reads inet_diag: avoid unsafe and nonsensical prefix matches in inet_diag_bc_run() inet_diag: validate byte code to prevent oops in inet_diag_bc_run() inet_diag: fix oops for IPv4 AF_INET6 TCP SYN-RECV state

commit: 2c68bc72dc77be3e8c32cd7ad6c7714ee21efd79 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Mon Dec 10 16:07:11 2012 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Mon Dec 10 16:07:11 2012 -0800
tree: 7b0f9cd6ac0b4dd8cac207d36799a336062d664f
parent: caf491916b1c1e939a2c7575efb7a77f11fc9bdf [diff]
parent: 1bf3751ec90cc3174e01f0d701e8449ce163d113 [diff]
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 107f09b..9b285da 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -1796,7 +1796,7 @@
 	 */
 	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 	gfp = mapping_gfp_mask(mapping);
-	gfp |= __GFP_NORETRY | __GFP_NOWARN;
+	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
 	gfp &= ~(__GFP_IO | __GFP_WAIT);
 	for_each_sg(st->sgl, sg, page_count, i) {
 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
@@ -1809,7 +1809,7 @@
 			 * our own buffer, now let the real VM do its job and
 			 * go down in flames if truly OOM.
 			 */
-			gfp &= ~(__GFP_NORETRY | __GFP_NOWARN);
+			gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
 			gfp |= __GFP_IO | __GFP_WAIT;
 
 			i915_gem_shrink_all(dev_priv);
@@ -1817,7 +1817,7 @@
 			if (IS_ERR(page))
 				goto err_pages;
 
-			gfp |= __GFP_NORETRY | __GFP_NOWARN;
+			gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
 			gfp &= ~(__GFP_IO | __GFP_WAIT);
 		}
 

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 374c46d..ec794a72 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c

@@ -1077,7 +1077,8 @@
  * until the request succeeds or until the allocation size falls below
  * the system page size. This attempts to make sure it does not adversely
  * impact system performance, so when allocating more than one page, we
- * ask the memory allocator to avoid re-trying.
+ * ask the memory allocator to avoid re-trying, swapping, writing back
+ * or performing I/O.
  *
  * Note, this function also makes sure that the allocated buffer is aligned to
  * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value.
@@ -1091,7 +1092,8 @@
  */
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
 {
-	gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY;
+	gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
+		       __GFP_NORETRY | __GFP_NO_KSWAPD;
 	size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
 	void *kbuf;
 

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 76e1aa2..d0a7967 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h

@@ -30,9 +30,10 @@
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
-#define ___GFP_NOTRACK		0x100000u
-#define ___GFP_OTHER_NODE	0x200000u
-#define ___GFP_WRITE		0x400000u
+#define ___GFP_NOTRACK		0x200000u
+#define ___GFP_NO_KSWAPD	0x400000u
+#define ___GFP_OTHER_NODE	0x800000u
+#define ___GFP_WRITE		0x1000000u
 
 /*
  * GFP bitmasks..
@@ -85,6 +86,7 @@
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
+#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
@@ -94,7 +96,7 @@
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 23	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
@@ -114,7 +116,8 @@
 				 __GFP_MOVABLE)
 #define GFP_IOFS	(__GFP_IO | __GFP_FS)
 #define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
-			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
+			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
+			 __GFP_NO_KSWAPD)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)

diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 9391706..d6fd8e5 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h

@@ -36,6 +36,7 @@
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
+	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
 	{(unsigned long)__GFP_OTHER_NODE,	"GFP_OTHER_NODE"}	\
 	) : "GFP_NOWAIT"
 

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a8f2c87..7e208f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -2378,15 +2378,6 @@
 	return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
 }
 
-/* Returns true if the allocation is likely for THP */
-static bool is_thp_alloc(gfp_t gfp_mask, unsigned int order)
-{
-	if (order == pageblock_order &&
-	    (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
-		return true;
-	return false;
-}
-
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
@@ -2425,10 +2416,9 @@
 		goto nopage;
 
 restart:
-	/* The decision whether to wake kswapd for THP is made later */
-	if (!is_thp_alloc(gfp_mask, order))
+	if (!(gfp_mask & __GFP_NO_KSWAPD))
 		wake_all_kswapd(order, zonelist, high_zoneidx,
-					zone_idx(preferred_zone));
+						zone_idx(preferred_zone));
 
 	/*
 	 * OK, we're below the kswapd watermark and have kicked background
@@ -2498,21 +2488,15 @@
 		goto got_pg;
 	sync_migration = true;
 
-	if (is_thp_alloc(gfp_mask, order)) {
-		/*
-		 * If compaction is deferred for high-order allocations, it is
-		 * because sync compaction recently failed. If this is the case
-		 * and the caller requested a movable allocation that does not
-		 * heavily disrupt the system then fail the allocation instead
-		 * of entering direct reclaim.
-		 */
-		if (deferred_compaction || contended_compaction)
-			goto nopage;
-
-		/* If process is willing to reclaim/compact then wake kswapd */
-		wake_all_kswapd(order, zonelist, high_zoneidx,
-					zone_idx(preferred_zone));
-	}
+	/*
+	 * If compaction is deferred for high-order allocations, it is because
+	 * sync compaction recently failed. In this is the case and the caller
+	 * requested a movable allocation that does not heavily disrupt the
+	 * system then fail the allocation instead of entering direct reclaim.
+	 */
+	if ((deferred_compaction || contended_compaction) &&
+						(gfp_mask & __GFP_NO_KSWAPD))
+		goto nopage;
 
 	/* Try direct reclaim and then allocating */
 	page = __alloc_pages_direct_reclaim(gfp_mask, order,
commit	2c68bc72dc77be3e8c32cd7ad6c7714ee21efd79	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Mon Dec 10 16:07:11 2012 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Mon Dec 10 16:07:11 2012 -0800
tree	7b0f9cd6ac0b4dd8cac207d36799a336062d664f
parent	caf491916b1c1e939a2c7575efb7a77f11fc9bdf [diff]
parent	1bf3751ec90cc3174e01f0d701e8449ce163d113 [diff]