Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Netlink socket dumping had several missing verifications and checks.
In particular, address comparisons in the request byte code
interpreter could access past the end of the address in the
inet_request_sock.
Also, address family and address prefix lengths were not validated
properly at all.
This means arbitrary applications can read past the end of certain
kernel data structures.
Fixes from Neal Cardwell.
2) ip_check_defrag() operates in contexts where we're in the process
of, or about to, input the packet into the real protocols
(specifically macvlan and AF_PACKET snooping).
Unfortunately, it does a pskb_may_pull() which can modify the
backing packet data which is not legal if the SKB is shared. It
very much can be shared in this context.
Deal with the possibility that the SKB is segmented by using
skb_copy_bits().
Fix from Johannes Berg based upon a report by Eric Leblond.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
ipv4: ip_check_defrag must not modify skb before unsharing
inet_diag: validate port comparison byte code to prevent unsafe reads
inet_diag: avoid unsafe and nonsensical prefix matches in inet_diag_bc_run()
inet_diag: validate byte code to prevent oops in inet_diag_bc_run()
inet_diag: fix oops for IPv4 AF_INET6 TCP SYN-RECV state
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 107f09b..9b285da 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1796,7 +1796,7 @@
*/
mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
gfp = mapping_gfp_mask(mapping);
- gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
gfp &= ~(__GFP_IO | __GFP_WAIT);
for_each_sg(st->sgl, sg, page_count, i) {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
@@ -1809,7 +1809,7 @@
* our own buffer, now let the real VM do its job and
* go down in flames if truly OOM.
*/
- gfp &= ~(__GFP_NORETRY | __GFP_NOWARN);
+ gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
gfp |= __GFP_IO | __GFP_WAIT;
i915_gem_shrink_all(dev_priv);
@@ -1817,7 +1817,7 @@
if (IS_ERR(page))
goto err_pages;
- gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
gfp &= ~(__GFP_IO | __GFP_WAIT);
}
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 374c46d..ec794a72 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1077,7 +1077,8 @@
* until the request succeeds or until the allocation size falls below
* the system page size. This attempts to make sure it does not adversely
* impact system performance, so when allocating more than one page, we
- * ask the memory allocator to avoid re-trying.
+ * ask the memory allocator to avoid re-trying, swapping, writing back
+ * or performing I/O.
*
* Note, this function also makes sure that the allocated buffer is aligned to
* the MTD device's min. I/O unit, i.e. the "mtd->writesize" value.
@@ -1091,7 +1092,8 @@
*/
void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
{
- gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY;
+ gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
+ __GFP_NORETRY | __GFP_NO_KSWAPD;
size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
void *kbuf;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 76e1aa2..d0a7967 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,9 +30,10 @@
#define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u
-#define ___GFP_NOTRACK 0x100000u
-#define ___GFP_OTHER_NODE 0x200000u
-#define ___GFP_WRITE 0x400000u
+#define ___GFP_NOTRACK 0x200000u
+#define ___GFP_NO_KSWAPD 0x400000u
+#define ___GFP_OTHER_NODE 0x800000u
+#define ___GFP_WRITE 0x1000000u
/*
* GFP bitmasks..
@@ -85,6 +86,7 @@
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
+#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
@@ -94,7 +96,7 @@
*/
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
-#define __GFP_BITS_SHIFT 23 /* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/* This equals 0, but use constants in case they ever change */
@@ -114,7 +116,8 @@
__GFP_MOVABLE)
#define GFP_IOFS (__GFP_IO | __GFP_FS)
#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
- __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
+ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
+ __GFP_NO_KSWAPD)
#ifdef CONFIG_NUMA
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 9391706..d6fd8e5 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -36,6 +36,7 @@
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
+ {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
{(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \
) : "GFP_NOWAIT"
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a8f2c87..7e208f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2378,15 +2378,6 @@
return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
}
-/* Returns true if the allocation is likely for THP */
-static bool is_thp_alloc(gfp_t gfp_mask, unsigned int order)
-{
- if (order == pageblock_order &&
- (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
- return true;
- return false;
-}
-
static inline struct page *
__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
@@ -2425,10 +2416,9 @@
goto nopage;
restart:
- /* The decision whether to wake kswapd for THP is made later */
- if (!is_thp_alloc(gfp_mask, order))
+ if (!(gfp_mask & __GFP_NO_KSWAPD))
wake_all_kswapd(order, zonelist, high_zoneidx,
- zone_idx(preferred_zone));
+ zone_idx(preferred_zone));
/*
* OK, we're below the kswapd watermark and have kicked background
@@ -2498,21 +2488,15 @@
goto got_pg;
sync_migration = true;
- if (is_thp_alloc(gfp_mask, order)) {
- /*
- * If compaction is deferred for high-order allocations, it is
- * because sync compaction recently failed. If this is the case
- * and the caller requested a movable allocation that does not
- * heavily disrupt the system then fail the allocation instead
- * of entering direct reclaim.
- */
- if (deferred_compaction || contended_compaction)
- goto nopage;
-
- /* If process is willing to reclaim/compact then wake kswapd */
- wake_all_kswapd(order, zonelist, high_zoneidx,
- zone_idx(preferred_zone));
- }
+ /*
+ * If compaction is deferred for high-order allocations, it is because
+ * sync compaction recently failed. In this is the case and the caller
+ * requested a movable allocation that does not heavily disrupt the
+ * system then fail the allocation instead of entering direct reclaim.
+ */
+ if ((deferred_compaction || contended_compaction) &&
+ (gfp_mask & __GFP_NO_KSWAPD))
+ goto nopage;
/* Try direct reclaim and then allocating */
page = __alloc_pages_direct_reclaim(gfp_mask, order,