[INET]: local port range robustness
Expansion of original idea from Denis V. Lunev <den@openvz.org>
Add robustness and locking to the local_port_range sysctl.
1. Enforce that low < high when setting.
2. Use seqlock to ensure atomic update.
The locking might seem like overkill, but there are
cases where sysadmin might want to change value in the
middle of a DoS attack.
Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fbe7714..3cef128 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -33,6 +33,19 @@
* This array holds the first and last local port number.
*/
int sysctl_local_port_range[2] = { 32768, 61000 };
+DEFINE_SEQLOCK(sysctl_port_range_lock);
+
+void inet_get_local_port_range(int *low, int *high)
+{
+ unsigned seq;
+ do {
+ seq = read_seqbegin(&sysctl_port_range_lock);
+
+ *low = sysctl_local_port_range[0];
+ *high = sysctl_local_port_range[1];
+ } while (read_seqretry(&sysctl_port_range_lock, seq));
+}
+EXPORT_SYMBOL(inet_get_local_port_range);
int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb)
@@ -77,10 +90,11 @@
local_bh_disable();
if (!snum) {
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int remaining = (high - low) + 1;
- int rover = net_random() % (high - low) + low;
+ int remaining, rover, low, high;
+
+ inet_get_local_port_range(&low, &high);
+ remaining = high - low;
+ rover = net_random() % remaining + low;
do {
head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb66262..fac6398 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -279,19 +279,18 @@
int ret;
if (!snum) {
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int range = high - low;
- int i;
- int port;
+ int i, remaining, low, high, port;
static u32 hint;
u32 offset = hint + inet_sk_port_offset(sk);
struct hlist_node *node;
struct inet_timewait_sock *tw = NULL;
+ inet_get_local_port_range(&low, &high);
+ remaining = high - low;
+
local_bh_disable();
- for (i = 1; i <= range; i++) {
- port = low + (i + offset) % range;
+ for (i = 1; i <= remaining; i++) {
+ port = low + (i + offset) % remaining;
head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 53ef0f4..eb286ab 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
#include <linux/sysctl.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
+#include <linux/seqlock.h>
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
@@ -89,6 +90,74 @@
return 1;
}
+extern seqlock_t sysctl_port_range_lock;
+extern int sysctl_local_port_range[2];
+
+/* Update system visible IP port range */
+static void set_local_port_range(int range[2])
+{
+ write_seqlock(&sysctl_port_range_lock);
+ sysctl_local_port_range[0] = range[0];
+ sysctl_local_port_range[1] = range[1];
+ write_sequnlock(&sysctl_port_range_lock);
+}
+
+/* Validate changes from /proc interface. */
+static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+ int range[2] = { sysctl_local_port_range[0],
+ sysctl_local_port_range[1] };
+ ctl_table tmp = {
+ .data = &range,
+ .maxlen = sizeof(range),
+ .mode = table->mode,
+ .extra1 = &ip_local_port_range_min,
+ .extra2 = &ip_local_port_range_max,
+ };
+
+ ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ if (range[1] <= range[0])
+ ret = -EINVAL;
+ else
+ set_local_port_range(range);
+ }
+
+ return ret;
+}
+
+/* Validate changes from sysctl interface. */
+static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
+ int nlen, void __user *oldval,
+ size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+{
+ int ret;
+ int range[2] = { sysctl_local_port_range[0],
+ sysctl_local_port_range[1] };
+ ctl_table tmp = {
+ .data = &range,
+ .maxlen = sizeof(range),
+ .mode = table->mode,
+ .extra1 = &ip_local_port_range_min,
+ .extra2 = &ip_local_port_range_max,
+ };
+
+ ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
+ if (ret == 0 && newval && newlen) {
+ if (range[1] <= range[0])
+ ret = -EINVAL;
+ else
+ set_local_port_range(range);
+ }
+ return ret;
+}
+
+
static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -427,10 +496,8 @@
.data = &sysctl_local_port_range,
.maxlen = sizeof(sysctl_local_port_range),
.mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = ip_local_port_range_min,
- .extra2 = ip_local_port_range_max
+ .proc_handler = &ipv4_local_port_range,
+ .strategy = &ipv4_sysctl_local_port_range,
},
{
.ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8855e64..38cf73a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2470,6 +2470,5 @@
EXPORT_SYMBOL(tcp_proc_register);
EXPORT_SYMBOL(tcp_proc_unregister);
#endif
-EXPORT_SYMBOL(sysctl_local_port_range);
EXPORT_SYMBOL(sysctl_tcp_low_latency);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ef4d901..cb9fc58 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -147,11 +147,11 @@
write_lock_bh(&udp_hash_lock);
if (!snum) {
- int i;
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
+ int i, low, high;
unsigned rover, best, best_size_so_far;
+ inet_get_local_port_range(&low, &high);
+
best_size_so_far = UINT_MAX;
best = rover = net_random() % (high - low) + low;