net: openvswitch: reorder masks array based on usage

This patch reorders the masks array every 4 seconds based on their
usage count. This greatly reduces the masks per packet hit, and
hence the overall performance. Especially in the OVS/OVN case for
OpenShift.

Here are some results from the OVS/OVN OpenShift test, which use
8 pods, each pod having 512 uperf connections, each connection
sends a 64-byte request and gets a 1024-byte response (TCP).
All uperf clients are on 1 worker node while all uperf servers are
on the other worker node.

Kernel without this patch     :  7.71 Gbps
Kernel with this patch applied: 14.52 Gbps

We also run some tests to verify the rebalance activity does not
lower the flow insertion rate, which does not.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Tested-by: Andrew Theurer <atheurer@redhat.com>
Reviewed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 8a5cea6..1f664b0 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -27,9 +27,17 @@ struct mask_cache_entry {
 	u32 mask_index;
 };
 
+struct mask_count {
+	int index;
+	u64 counter;
+};
+
 struct mask_array {
 	struct rcu_head rcu;
 	int count, max;
+	u64 __percpu *masks_usage_cntr;
+	u64 *masks_usage_zero_cntr;
+	struct u64_stats_sync syncp;
 	struct sw_flow_mask __rcu *masks[];
 };
 
@@ -86,4 +94,7 @@ bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
 
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       bool full, const struct sw_flow_mask *mask);
+
+void ovs_flow_masks_rebalance(struct flow_table *table);
+
 #endif /* flow_table.h */