net: Introduce net_rwsem to protect net_namespace_list

rtnl_lock() is used everywhere, and contention is very high.
When someone wants to iterate over alive net namespaces,
he/she has no a possibility to do that without exclusive lock.
But the exclusive rtnl_lock() in such places is overkill,
and it just increases the contention. Yes, there is already
for_each_net_rcu() in kernel, but it requires rcu_read_lock(),
and this can't be sleepable. Also, sometimes it may be need
really prevent net_namespace_list growth, so for_each_net_rcu()
is not fit there.

This patch introduces new rw_semaphore, which will be used
instead of rtnl_mutex to protect net_namespace_list. It is
sleepable and allows not-exclusive iterations over net
namespaces list. It allows to stop using rtnl_lock()
in several places (what is made in next patches) and makes
less the time, we keep rtnl_mutex. Here we just add new lock,
while the explanation of we can remove rtnl_lock() there are
in next patches.

Fine grained locks generally are better, then one big lock,
so let's do that with net_namespace_list, while the situation
allows that.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b5796d1..7fdf321 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list;
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
 
+/* Protects net_namespace_list. Nests iside rtnl_lock() */
+DECLARE_RWSEM(net_rwsem);
+EXPORT_SYMBOL_GPL(net_rwsem);
+
 struct net init_net = {
 	.count		= REFCOUNT_INIT(1),
 	.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
@@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 		if (error < 0)
 			goto out_undo;
 	}
-	rtnl_lock();
+	down_write(&net_rwsem);
 	list_add_tail_rcu(&net->list, &net_namespace_list);
-	rtnl_unlock();
+	up_write(&net_rwsem);
 out:
 	return error;
 
@@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last)
 	 * and this work is the only process, that may delete
 	 * a net from net_namespace_list. So, when the below
 	 * is executing, the list may only grow. Thus, we do not
-	 * use for_each_net_rcu() or rtnl_lock().
+	 * use for_each_net_rcu() or net_rwsem.
 	 */
 	for_each_net(tmp) {
 		int id;
@@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work)
 	down_read(&pernet_ops_rwsem);
 
 	/* Don't let anyone else find us. */
-	rtnl_lock();
+	down_write(&net_rwsem);
 	llist_for_each_entry(net, net_kill_list, cleanup_list)
 		list_del_rcu(&net->list);
 	/* Cache last net. After we unlock rtnl, no one new net
@@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work)
 	 * useless anyway, as netns_ids are destroyed there.
 	 */
 	last = list_last_entry(&net_namespace_list, struct net, list);
-	rtnl_unlock();
+	up_write(&net_rwsem);
 
 	llist_for_each_entry(net, net_kill_list, cleanup_list) {
 		unhash_nsid(net, last);
@@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list,
 
 	list_add_tail(&ops->list, list);
 	if (ops->init || (ops->id && ops->size)) {
+		/* We held write locked pernet_ops_rwsem, and parallel
+		 * setup_net() and cleanup_net() are not possible.
+		 */
 		for_each_net(net) {
 			error = ops_init(ops, net);
 			if (error)
@@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
 	LIST_HEAD(net_exit_list);
 
 	list_del(&ops->list);
+	/* See comment in __register_pernet_operations() */
 	for_each_net(net)
 		list_add_tail(&net->exit_list, &net_exit_list);
 	ops_exit_list(ops, &net_exit_list);