userns: Add a limit on the number of user namespaces

Export the export the maximum number of user namespaces as
/proc/sys/userns/max_user_namespaces.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
diff --git a/kernel/fork.c b/kernel/fork.c
index 52e725d4..daa6a82 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -321,6 +321,8 @@
 	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
+
+	init_user_ns.max_user_namespaces = max_threads;
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,
diff --git a/kernel/ucount.c b/kernel/ucount.c
index cbde1dc8..6c2205c 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -43,7 +43,18 @@
 	.permissions = set_permissions,
 };
 
+static int zero = 0;
+static int int_max = INT_MAX;
 static struct ctl_table userns_table[] = {
+	{
+		.procname	= "max_user_namespaces",
+		.data		= &init_user_ns.max_user_namespaces,
+		.maxlen		= sizeof(init_user_ns.max_user_namespaces),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
+	},
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -55,6 +66,8 @@
 	setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 	tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL);
 	if (tbl) {
+		tbl[0].data = &ns->max_user_namespaces;
+
 		ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl);
 	}
 	if (!ns->sysctls) {
@@ -78,6 +91,46 @@
 #endif
 }
 
+static inline bool atomic_inc_below(atomic_t *v, int u)
+{
+	int c, old;
+	c = atomic_read(v);
+	for (;;) {
+		if (unlikely(c >= u))
+			return false;
+		old = atomic_cmpxchg(v, c, c+1);
+		if (likely(old == c))
+			return true;
+		c = old;
+	}
+}
+
+bool inc_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos, *bad;
+	for (pos = ns; pos; pos = pos->parent) {
+		int max = READ_ONCE(pos->max_user_namespaces);
+		if (!atomic_inc_below(&pos->user_namespaces, max))
+			goto fail;
+	}
+	return true;
+fail:
+	bad = pos;
+	for (pos = ns; pos != bad; pos = pos->parent)
+		atomic_dec(&pos->user_namespaces);
+
+	return false;
+}
+
+void dec_user_namespaces(struct user_namespace *ns)
+{
+	struct user_namespace *pos;
+	for (pos = ns; pos; pos = pos->parent) {
+		int dec = atomic_dec_if_positive(&pos->user_namespaces);
+		WARN_ON_ONCE(dec < 0);
+	}
+}
+
 static __init int user_namespace_sysctl_init(void)
 {
 #ifdef CONFIG_SYSCTL
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a633322..7d87017 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -23,9 +23,6 @@
 #include <linux/projid.h>
 #include <linux/fs_struct.h>
 
-extern bool setup_userns_sysctls(struct user_namespace *ns);
-extern void retire_userns_sysctls(struct user_namespace *ns);
-
 static struct kmem_cache *user_ns_cachep __read_mostly;
 static DEFINE_MUTEX(userns_state_mutex);
 
@@ -34,6 +31,7 @@
 				struct uid_gid_map *map);
 static void free_user_ns(struct work_struct *work);
 
+
 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
 {
 	/* Start with the same capabilities as init but useless for doing
@@ -68,8 +66,12 @@
 	kgid_t group = new->egid;
 	int ret;
 
+	ret = -EUSERS;
 	if (parent_ns->level > 32)
-		return -EUSERS;
+		goto fail;
+
+	if (!inc_user_namespaces(parent_ns))
+		goto fail;
 
 	/*
 	 * Verify that we can not violate the policy of which files
@@ -77,26 +79,27 @@
 	 * by verifing that the root directory is at the root of the
 	 * mount namespace which allows all files to be accessed.
 	 */
+	ret = -EPERM;
 	if (current_chrooted())
-		return -EPERM;
+		goto fail_dec;
 
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
 	 * created a user_namespace.
 	 */
+	ret = -EPERM;
 	if (!kuid_has_mapping(parent_ns, owner) ||
 	    !kgid_has_mapping(parent_ns, group))
-		return -EPERM;
+		goto fail_dec;
 
+	ret = -ENOMEM;
 	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
 	if (!ns)
-		return -ENOMEM;
+		goto fail_dec;
 
 	ret = ns_alloc_inum(&ns->ns);
-	if (ret) {
-		kmem_cache_free(user_ns_cachep, ns);
-		return ret;
-	}
+	if (ret)
+		goto fail_free;
 	ns->ns.ops = &userns_operations;
 
 	atomic_set(&ns->count, 1);
@@ -106,6 +109,7 @@
 	ns->owner = owner;
 	ns->group = group;
 	INIT_WORK(&ns->work, free_user_ns);
+	ns->max_user_namespaces = INT_MAX;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 	mutex_lock(&userns_state_mutex);
@@ -126,7 +130,11 @@
 	key_put(ns->persistent_keyring_register);
 #endif
 	ns_free_inum(&ns->ns);
+fail_free:
 	kmem_cache_free(user_ns_cachep, ns);
+fail_dec:
+	dec_user_namespaces(parent_ns);
+fail:
 	return ret;
 }
 
@@ -163,6 +171,7 @@
 #endif
 		ns_free_inum(&ns->ns);
 		kmem_cache_free(user_ns_cachep, ns);
+		dec_user_namespaces(parent);
 		ns = parent;
 	} while (atomic_dec_and_test(&parent->count));
 }