blob: c0851fec11d46532f87423e1ab38988bf4418172 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/vmalloc.h>
#include <linux/rhashtable.h>
#include <linux/audit.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_offload.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static u64 table_handle;
enum {
NFT_VALIDATE_SKIP = 0,
NFT_VALIDATE_NEED,
NFT_VALIDATE_DO,
};
static struct rhltable nft_objname_ht;
static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
static u32 nft_objname_hash(const void *data, u32 len, u32 seed);
static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed);
static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *);
static const struct rhashtable_params nft_chain_ht_params = {
.head_offset = offsetof(struct nft_chain, rhlhead),
.key_offset = offsetof(struct nft_chain, name),
.hashfn = nft_chain_hash,
.obj_hashfn = nft_chain_hash_obj,
.obj_cmpfn = nft_chain_hash_cmp,
.automatic_shrinking = true,
};
static const struct rhashtable_params nft_objname_ht_params = {
.head_offset = offsetof(struct nft_object, rhlhead),
.key_offset = offsetof(struct nft_object, key),
.hashfn = nft_objname_hash,
.obj_hashfn = nft_objname_hash_obj,
.obj_cmpfn = nft_objname_hash_cmp,
.automatic_shrinking = true,
};
struct nft_audit_data {
struct nft_table *table;
int entries;
int op;
struct list_head list;
};
static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
[NFT_MSG_NEWTABLE] = AUDIT_NFT_OP_TABLE_REGISTER,
[NFT_MSG_GETTABLE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELTABLE] = AUDIT_NFT_OP_TABLE_UNREGISTER,
[NFT_MSG_NEWCHAIN] = AUDIT_NFT_OP_CHAIN_REGISTER,
[NFT_MSG_GETCHAIN] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELCHAIN] = AUDIT_NFT_OP_CHAIN_UNREGISTER,
[NFT_MSG_NEWRULE] = AUDIT_NFT_OP_RULE_REGISTER,
[NFT_MSG_GETRULE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELRULE] = AUDIT_NFT_OP_RULE_UNREGISTER,
[NFT_MSG_NEWSET] = AUDIT_NFT_OP_SET_REGISTER,
[NFT_MSG_GETSET] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELSET] = AUDIT_NFT_OP_SET_UNREGISTER,
[NFT_MSG_NEWSETELEM] = AUDIT_NFT_OP_SETELEM_REGISTER,
[NFT_MSG_GETSETELEM] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELSETELEM] = AUDIT_NFT_OP_SETELEM_UNREGISTER,
[NFT_MSG_NEWGEN] = AUDIT_NFT_OP_GEN_REGISTER,
[NFT_MSG_GETGEN] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_TRACE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_NEWOBJ] = AUDIT_NFT_OP_OBJ_REGISTER,
[NFT_MSG_GETOBJ] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELOBJ] = AUDIT_NFT_OP_OBJ_UNREGISTER,
[NFT_MSG_GETOBJ_RESET] = AUDIT_NFT_OP_OBJ_RESET,
[NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
[NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
};
static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{
struct nftables_pernet *nft_net = nft_pernet(net);
switch (nft_net->validate_state) {
case NFT_VALIDATE_SKIP:
WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO);
break;
case NFT_VALIDATE_NEED:
break;
case NFT_VALIDATE_DO:
if (new_validate_state == NFT_VALIDATE_NEED)
return;
}
nft_net->validate_state = new_validate_state;
}
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
u8 family,
struct nft_table *table,
struct nft_chain *chain,
const struct nlattr * const *nla)
{
ctx->net = net;
ctx->family = family;
ctx->level = 0;
ctx->table = table;
ctx->chain = chain;
ctx->nla = nla;
ctx->portid = NETLINK_CB(skb).portid;
ctx->report = nlmsg_report(nlh);
ctx->flags = nlh->nlmsg_flags;
ctx->seq = nlh->nlmsg_seq;
}
static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
int msg_type, u32 size, gfp_t gfp)
{
struct nft_trans *trans;
trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
if (trans == NULL)
return NULL;
trans->msg_type = msg_type;
trans->ctx = *ctx;
return trans;
}
static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
int msg_type, u32 size)
{
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
static void nft_trans_destroy(struct nft_trans *trans)
{
list_del(&trans->list);
kfree(trans);
}
static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
struct nft_trans *trans;
if (!nft_set_is_anonymous(set))
return;
nft_net = nft_pernet(net);
list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (nft_trans_set(trans) == set)
nft_trans_set_bound(trans) = true;
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans) == set)
nft_trans_elem_set_bound(trans) = true;
break;
}
}
}
static int nft_netdev_register_hooks(struct net *net,
struct list_head *hook_list)
{
struct nft_hook *hook;
int err, j;
j = 0;
list_for_each_entry(hook, hook_list, list) {
err = nf_register_net_hook(net, &hook->ops);
if (err < 0)
goto err_register;
j++;
}
return 0;
err_register:
list_for_each_entry(hook, hook_list, list) {
if (j-- <= 0)
break;
nf_unregister_net_hook(net, &hook->ops);
}
return err;
}
static void nft_netdev_unregister_hooks(struct net *net,
struct list_head *hook_list)
{
struct nft_hook *hook;
list_for_each_entry(hook, hook_list, list)
nf_unregister_net_hook(net, &hook->ops);
}
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return 0;
basechain = nft_base_chain(chain);
ops = &basechain->ops;
if (basechain->type->ops_register)
return basechain->type->ops_register(net, ops);
if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
return nft_netdev_register_hooks(net, &basechain->hook_list);
return nf_register_net_hook(net, &basechain->ops);
}
static void nf_tables_unregister_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
{
struct nft_base_chain *basechain;
const struct nf_hook_ops *ops;
if (table->flags & NFT_TABLE_F_DORMANT ||
!nft_is_base_chain(chain))
return;
basechain = nft_base_chain(chain);
ops = &basechain->ops;
if (basechain->type->ops_unregister)
return basechain->type->ops_unregister(net, ops);
if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
nft_netdev_unregister_hooks(net, &basechain->hook_list);
else
nf_unregister_net_hook(net, &basechain->ops);
}
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
list_add_tail(&trans->list, &nft_net->commit_list);
}
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
if (trans == NULL)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWTABLE)
nft_activate_next(ctx->net, ctx->table);
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
static int nft_deltable(struct nft_ctx *ctx)
{
int err;
err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE);
if (err < 0)
return err;
nft_deactivate_next(ctx->net, ctx->table);
return err;
}
static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
if (trans == NULL)
return ERR_PTR(-ENOMEM);
if (msg_type == NFT_MSG_NEWCHAIN) {
nft_activate_next(ctx->net, ctx->chain);
if (ctx->nla[NFTA_CHAIN_ID]) {
nft_trans_chain_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
}
}
nft_trans_commit_list_add_tail(ctx->net, trans);
return trans;
}
static int nft_delchain(struct nft_ctx *ctx)
{
struct nft_trans *trans;
trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
if (IS_ERR(trans))
return PTR_ERR(trans);
ctx->table->use--;
nft_deactivate_next(ctx->net, ctx->chain);
return 0;
}
static void nft_rule_expr_activate(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
struct nft_expr *expr;
expr = nft_expr_first(rule);
while (nft_expr_more(rule, expr)) {
if (expr->ops->activate)
expr->ops->activate(ctx, expr);
expr = nft_expr_next(expr);
}
}
static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
struct nft_rule *rule,
enum nft_trans_phase phase)
{
struct nft_expr *expr;
expr = nft_expr_first(rule);
while (nft_expr_more(rule, expr)) {
if (expr->ops->deactivate)
expr->ops->deactivate(ctx, expr, phase);
expr = nft_expr_next(expr);
}
}
static int
nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
if (nft_is_active_next(ctx->net, rule)) {
nft_deactivate_next(ctx->net, rule);
ctx->chain->use--;
return 0;
}
return -ENOENT;
}
static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
struct nft_rule *rule)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
if (trans == NULL)
return NULL;
if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) {
nft_trans_rule_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
}
nft_trans_rule(trans) = rule;
nft_trans_commit_list_add_tail(ctx->net, trans);
return trans;
}
static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_flow_rule *flow;
struct nft_trans *trans;
int err;
trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule);
if (trans == NULL)
return -ENOMEM;
if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) {
flow = nft_flow_rule_create(ctx->net, rule);
if (IS_ERR(flow)) {
nft_trans_destroy(trans);
return PTR_ERR(flow);
}
nft_trans_flow_rule(trans) = flow;
}
err = nf_tables_delrule_deactivate(ctx, rule);
if (err < 0) {
nft_trans_destroy(trans);
return err;
}
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
return 0;
}
static int nft_delrule_by_chain(struct nft_ctx *ctx)
{
struct nft_rule *rule;
int err;
list_for_each_entry(rule, &ctx->chain->rules, list) {
if (!nft_is_active_next(ctx->net, rule))
continue;
err = nft_delrule(ctx, rule);
if (err < 0)
return err;
}
return 0;
}
static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
if (trans == NULL)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set);
if (err < 0)
return err;
nft_deactivate_next(ctx->net, set);
ctx->table->use--;
return err;
}
static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type,
struct nft_object *obj)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj));
if (trans == NULL)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWOBJ)
nft_activate_next(ctx->net, obj);
nft_trans_obj(trans) = obj;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
{
int err;
err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj);
if (err < 0)
return err;
nft_deactivate_next(ctx->net, obj);
ctx->table->use--;
return err;
}
static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
struct nft_flowtable *flowtable)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type,
sizeof(struct nft_trans_flowtable));
if (trans == NULL)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
static int nft_delflowtable(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
int err;
err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
if (err < 0)
return err;
nft_deactivate_next(ctx->net, flowtable);
ctx->table->use--;
return err;
}
/*
* Tables
*/
static struct nft_table *nft_table_lookup(const struct net *net,
const struct nlattr *nla,
u8 family, u8 genmask, u32 nlpid)
{
struct nftables_pernet *nft_net;
struct nft_table *table;
if (nla == NULL)
return ERR_PTR(-EINVAL);
nft_net = nft_pernet(net);
list_for_each_entry_rcu(table, &nft_net->tables, list,
lockdep_is_held(&nft_net->commit_mutex)) {
if (!nla_strcmp(nla, table->name) &&
table->family == family &&
nft_active_genmask(table, genmask)) {
if (nft_table_has_owner(table) &&
nlpid && table->nlpid != nlpid)
return ERR_PTR(-EPERM);
return table;
}
}
return ERR_PTR(-ENOENT);
}
static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
const struct nlattr *nla,
u8 genmask, u32 nlpid)
{
struct nftables_pernet *nft_net;
struct nft_table *table;
nft_net = nft_pernet(net);
list_for_each_entry(table, &nft_net->tables, list) {
if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
nft_active_genmask(table, genmask)) {
if (nft_table_has_owner(table) &&
nlpid && table->nlpid != nlpid)
return ERR_PTR(-EPERM);
return table;
}
}
return ERR_PTR(-ENOENT);
}
static inline u64 nf_tables_alloc_handle(struct nft_table *table)
{
return ++table->hgenerator;
}
static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
static const struct nft_chain_type *
__nft_chain_type_get(u8 family, enum nft_chain_types type)
{
if (family >= NFPROTO_NUMPROTO ||
type >= NFT_CHAIN_T_MAX)
return NULL;
return chain_type[family][type];
}
static const struct nft_chain_type *
__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
{
const struct nft_chain_type *type;
int i;
for (i = 0; i < NFT_CHAIN_T_MAX; i++) {
type = __nft_chain_type_get(family, i);
if (!type)
continue;
if (!nla_strcmp(nla, type->name))
return type;
}
return NULL;
}
struct nft_module_request {
struct list_head list;
char module[MODULE_NAME_LEN];
bool done;
};
#ifdef CONFIG_MODULES
__printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
...)
{
char module_name[MODULE_NAME_LEN];
struct nftables_pernet *nft_net;
struct nft_module_request *req;
va_list args;
int ret;
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (ret >= MODULE_NAME_LEN)
return 0;
nft_net = nft_pernet(net);
list_for_each_entry(req, &nft_net->module_list, list) {
if (!strcmp(req->module, module_name)) {
if (req->done)
return 0;
/* A request to load this module already exists. */
return -EAGAIN;
}
}
req = kmalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return -ENOMEM;
req->done = false;
strlcpy(req->module, module_name, MODULE_NAME_LEN);
list_add_tail(&req->list, &nft_net->module_list);
return -EAGAIN;
}
EXPORT_SYMBOL_GPL(nft_request_module);
#endif
static void lockdep_nfnl_nft_mutex_not_held(void)
{
#ifdef CONFIG_PROVE_LOCKING
if (debug_locks)
WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
#endif
}
static const struct nft_chain_type *
nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
u8 family, bool autoload)
{
const struct nft_chain_type *type;
type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return type;
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (autoload) {
if (nft_request_module(net, "nft-chain-%u-%.*s", family,
nla_len(nla),
(const char *)nla_data(nla)) == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
return ERR_PTR(-ENOENT);
}
static __be16 nft_base_seq(const struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
return htons(nft_net->base_seq & 0xffff);
}
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
[NFTA_TABLE_HANDLE] = { .type = NLA_U64 },
[NFTA_TABLE_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN }
};
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event, u32 flags,
int family, const struct nft_table *table)
{
struct nlmsghdr *nlh;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS,
htonl(table->flags & NFT_TABLE_F_MASK)) ||
nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
NFTA_TABLE_PAD))
goto nla_put_failure;
if (nft_table_has_owner(table) &&
nla_put_be32(skb, NFTA_TABLE_OWNER, htonl(table->nlpid)))
goto nla_put_failure;
if (table->udata) {
if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata))
goto nla_put_failure;
}
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_trim(skb, nlh);
return -1;
}
struct nftnl_skb_parms {
bool report;
};
#define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb))
static void nft_notify_enqueue(struct sk_buff *skb, bool report,
struct list_head *notify_list)
{
NFT_CB(skb).report = report;
list_add_tail(&skb->list, notify_list);
}
static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
u16 flags = 0;
int err;
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return;
skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto err;
if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
event, flags, ctx->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
}
nft_net = nft_pernet(ctx->net);
nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
return;
err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
static int nf_tables_dump_tables(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
struct nftables_pernet *nft_net;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
if (idx < s_idx)
goto cont;
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (!nft_is_active(net, table))
continue;
if (nf_tables_fill_table_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWTABLE, NLM_F_MULTI,
table->family, table) < 0)
goto done;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
done:
rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct netlink_dump_control *c)
{
int err;
if (!try_module_get(THIS_MODULE))
return -EINVAL;
rcu_read_unlock();
err = netlink_dump_start(nlsk, skb, nlh, c);
rcu_read_lock();
module_put(THIS_MODULE);
return err;
}
/* called with rcu_read_lock held */
static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_table *table;
struct net *net = info->net;
struct sk_buff *skb2;
int err;
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_tables,
.module = THIS_MODULE,
};
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]);
return PTR_ERR(table);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWTABLE,
0, family, table);
if (err < 0)
goto err_fill_table_info;
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
err_fill_table_info:
kfree_skb(skb2);
return err;
}
static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
{
struct nft_chain *chain;
u32 i = 0;
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
if (!nft_is_base_chain(chain))
continue;
if (cnt && i++ == cnt)
break;
nf_tables_unregister_hook(net, table, chain);
}
}
static int nf_tables_table_enable(struct net *net, struct nft_table *table)
{
struct nft_chain *chain;
int err, i = 0;
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
if (!nft_is_base_chain(chain))
continue;
err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err_register_hooks;
i++;
}
return 0;
err_register_hooks:
if (i)
nft_table_disable(net, table, i);
return err;
}
static void nf_tables_table_disable(struct net *net, struct nft_table *table)
{
table->flags &= ~NFT_TABLE_F_DORMANT;
nft_table_disable(net, table, 0);
table->flags |= NFT_TABLE_F_DORMANT;
}
#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1)
#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0)
#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1)
#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
__NFT_TABLE_F_WAS_AWAKEN)
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
u32 flags;
int ret;
if (!ctx->nla[NFTA_TABLE_FLAGS])
return 0;
flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
if (flags & ~NFT_TABLE_F_MASK)
return -EOPNOTSUPP;
if (flags == ctx->table->flags)
return 0;
if ((nft_table_has_owner(ctx->table) &&
!(flags & NFT_TABLE_F_OWNER)) ||
(!nft_table_has_owner(ctx->table) &&
flags & NFT_TABLE_F_OWNER))
return -EOPNOTSUPP;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
return -ENOMEM;
if ((flags & NFT_TABLE_F_DORMANT) &&
!(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
ctx->table->flags |= NFT_TABLE_F_DORMANT;
if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE))
ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) {
ret = nf_tables_table_enable(ctx->net, ctx->table);
if (ret < 0)
goto err_register_hooks;
ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT;
}
}
nft_trans_table_update(trans) = true;
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err_register_hooks:
nft_trans_destroy(trans);
return ret;
}
static u32 nft_chain_hash(const void *data, u32 len, u32 seed)
{
const char *name = data;
return jhash(name, strlen(name), seed);
}
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed)
{
const struct nft_chain *chain = data;
return nft_chain_hash(chain->name, 0, seed);
}
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct nft_chain *chain = ptr;
const char *name = arg->key;
return strcmp(chain->name, name);
}
static u32 nft_objname_hash(const void *data, u32 len, u32 seed)
{
const struct nft_object_hash_key *k = data;
seed ^= hash_ptr(k->table, 32);
return jhash(k->name, strlen(k->name), seed);
}
static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed)
{
const struct nft_object *obj = data;
return nft_objname_hash(&obj->key, 0, seed);
}
static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct nft_object_hash_key *k = arg->key;
const struct nft_object *obj = ptr;
if (obj->key.table != k->table)
return -1;
return strcmp(obj->key.name, k->name);
}
static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_ctx ctx;
u32 flags = 0;
int err;
lockdep_assert_held(&nft_net->commit_mutex);
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask,
NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
} else {
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nf_tables_updtable(&ctx);
}
if (nla[NFTA_TABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
if (flags & ~NFT_TABLE_F_MASK)
return -EOPNOTSUPP;
}
err = -ENOMEM;
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (table == NULL)
goto err_kzalloc;
table->name = nla_strdup(attr, GFP_KERNEL);
if (table->name == NULL)
goto err_strdup;
if (nla[NFTA_TABLE_USERDATA]) {
table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL);
if (table->udata == NULL)
goto err_table_udata;
table->udlen = nla_len(nla[NFTA_TABLE_USERDATA]);
}
err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
if (err)
goto err_chain_ht;
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
table->handle = ++table_handle;
if (table->flags & NFT_TABLE_F_OWNER)
table->nlpid = NETLINK_CB(skb).portid;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err_trans;
list_add_tail_rcu(&table->list, &nft_net->tables);
return 0;
err_trans:
rhltable_destroy(&table->chains_ht);
err_chain_ht:
kfree(table->udata);
err_table_udata:
kfree(table->name);
err_strdup:
kfree(table);
err_kzalloc:
return err;
}
static int nft_flush_table(struct nft_ctx *ctx)
{
struct nft_flowtable *flowtable, *nft;
struct nft_chain *chain, *nc;
struct nft_object *obj, *ne;
struct nft_set *set, *ns;
int err;
list_for_each_entry(chain, &ctx->table->chains, list) {
if (!nft_is_active_next(ctx->net, chain))
continue;
if (nft_chain_is_bound(chain))
continue;
ctx->chain = chain;
err = nft_delrule_by_chain(ctx);
if (err < 0)
goto out;
}
list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
if (!nft_is_active_next(ctx->net, set))
continue;
if (nft_set_is_anonymous(set) &&
!list_empty(&set->bindings))
continue;
err = nft_delset(ctx, set);
if (err < 0)
goto out;
}
list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
if (!nft_is_active_next(ctx->net, flowtable))
continue;
err = nft_delflowtable(ctx, flowtable);
if (err < 0)
goto out;
}
list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
if (!nft_is_active_next(ctx->net, obj))
continue;
err = nft_delobj(ctx, obj);
if (err < 0)
goto out;
}
list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
if (!nft_is_active_next(ctx->net, chain))
continue;
if (nft_chain_is_bound(chain))
continue;
ctx->chain = chain;
err = nft_delchain(ctx);
if (err < 0)
goto out;
}
err = nft_deltable(ctx);
out:
return err;
}
static int nft_flush(struct nft_ctx *ctx, int family)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table, *nt;
int err = 0;
list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
if (family != AF_UNSPEC && table->family != family)
continue;
ctx->family = table->family;
if (!nft_is_active_next(ctx->net, table))
continue;
if (nft_table_has_owner(table) && table->nlpid != ctx->portid)
continue;
if (nla[NFTA_TABLE_NAME] &&
nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
continue;
ctx->table = table;
err = nft_flush_table(ctx);
if (err < 0)
goto out;
}
out:
return err;
}
static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_ctx ctx;
nft_ctx_init(&ctx, net, skb, info->nlh, 0, NULL, NULL, nla);
if (family == AF_UNSPEC ||
(!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
return nft_flush(&ctx, family);
if (nla[NFTA_TABLE_HANDLE]) {
attr = nla[NFTA_TABLE_HANDLE];
table = nft_table_lookup_byhandle(net, attr, genmask,
NETLINK_CB(skb).portid);
} else {
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask,
NETLINK_CB(skb).portid);
}
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(table);
}
if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
table->use > 0)
return -EBUSY;
ctx.family = family;
ctx.table = table;
return nft_flush_table(&ctx);
}
static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
if (WARN_ON(ctx->table->use > 0))
return;
rhltable_destroy(&ctx->table->chains_ht);
kfree(ctx->table->name);
kfree(ctx->table->udata);
kfree(ctx->table);
}
void nft_register_chain_type(const struct nft_chain_type *ctype)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (WARN_ON(__nft_chain_type_get(ctype->family, ctype->type))) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return;
}
chain_type[ctype->family][ctype->type] = ctype;
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_register_chain_type);
void nft_unregister_chain_type(const struct nft_chain_type *ctype)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
chain_type[ctype->family][ctype->type] = NULL;
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
/*
* Chains
*/
static struct nft_chain *
nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
{
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list) {
if (chain->handle == handle &&
nft_active_genmask(chain, genmask))
return chain;
}
return ERR_PTR(-ENOENT);
}
static bool lockdep_commit_lock_is_held(const struct net *net)
{
#ifdef CONFIG_PROVE_LOCKING
struct nftables_pernet *nft_net = nft_pernet(net);
return lockdep_is_held(&nft_net->commit_mutex);
#else
return true;
#endif
}
static struct nft_chain *nft_chain_lookup(struct net *net,
struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
char search[NFT_CHAIN_MAXNAMELEN + 1];
struct rhlist_head *tmp, *list;
struct nft_chain *chain;
if (nla == NULL)
return ERR_PTR(-EINVAL);
nla_strscpy(search, nla, sizeof(search));
WARN_ON(!rcu_read_lock_held() &&
!lockdep_commit_lock_is_held(net));
chain = ERR_PTR(-ENOENT);
rcu_read_lock();
list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params);
if (!list)
goto out_unlock;
rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
if (nft_active_genmask(chain, genmask))
goto out_unlock;
}
chain = ERR_PTR(-ENOENT);
out_unlock:
rcu_read_unlock();
return chain;
}
static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
[NFTA_CHAIN_TABLE] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_CHAIN_HANDLE] = { .type = NLA_U64 },
[NFTA_CHAIN_NAME] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
[NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
[NFTA_CHAIN_TYPE] = { .type = NLA_STRING,
.len = NFT_MODULE_AUTOLOAD_LIMIT },
[NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
[NFTA_CHAIN_FLAGS] = { .type = NLA_U32 },
[NFTA_CHAIN_ID] = { .type = NLA_U32 },
[NFTA_CHAIN_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
[NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 },
[NFTA_HOOK_PRIORITY] = { .type = NLA_U32 },
[NFTA_HOOK_DEV] = { .type = NLA_STRING,
.len = IFNAMSIZ - 1 },
};
static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
{
struct nft_stats *cpu_stats, total;
struct nlattr *nest;
unsigned int seq;
u64 pkts, bytes;
int cpu;
if (!stats)
return 0;
memset(&total, 0, sizeof(total));
for_each_possible_cpu(cpu) {
cpu_stats = per_cpu_ptr(stats, cpu);
do {
seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
pkts = cpu_stats->pkts;
bytes = cpu_stats->bytes;
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
total.pkts += pkts;
total.bytes += bytes;
}
nest = nla_nest_start_noflag(skb, NFTA_CHAIN_COUNTERS);
if (nest == NULL)
goto nla_put_failure;
if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts),
NFTA_COUNTER_PAD) ||
nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
NFTA_COUNTER_PAD))
goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
nla_put_failure:
return -ENOSPC;
}
static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
const struct nft_base_chain *basechain)
{
const struct nf_hook_ops *ops = &basechain->ops;
struct nft_hook *hook, *first = NULL;
struct nlattr *nest, *nest_devs;
int n = 0;
nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
if (nest == NULL)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
goto nla_put_failure;
if (nft_base_chain_netdev(family, ops->hooknum)) {
nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
list_for_each_entry(hook, &basechain->hook_list, list) {
if (!first)
first = hook;
if (nla_put_string(skb, NFTA_DEVICE_NAME,
hook->ops.dev->name))
goto nla_put_failure;
n++;
}
nla_nest_end(skb, nest_devs);
if (n == 1 &&
nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
return 0;
nla_put_failure:
return -1;
}
static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event, u32 flags,
int family, const struct nft_table *table,
const struct nft_chain *chain)
{
struct nlmsghdr *nlh;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
goto nla_put_failure;
if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle),
NFTA_CHAIN_PAD))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
goto nla_put_failure;
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
struct nft_stats __percpu *stats;
if (nft_dump_basechain_hook(skb, family, basechain))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
htonl(basechain->policy)))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
goto nla_put_failure;
stats = rcu_dereference_check(basechain->stats,
lockdep_commit_lock_is_held(net));
if (nft_dump_stats(skb, stats))
goto nla_put_failure;
}
if (chain->flags &&
nla_put_be32(skb, NFTA_CHAIN_FLAGS, htonl(chain->flags)))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
goto nla_put_failure;
if (chain->udata &&
nla_put(skb, NFTA_CHAIN_USERDATA, chain->udlen, chain->udata))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_trim(skb, nlh);
return -1;
}
static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
u16 flags = 0;
int err;
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return;
skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto err;
if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
event, flags, ctx->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
goto err;
}
nft_net = nft_pernet(ctx->net);
nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
return;
err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
static int nf_tables_dump_chains(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nftables_pernet *nft_net;
const struct nft_table *table;
const struct nft_chain *chain;
rcu_read_lock();
nft_net = nft_pernet(net);
cb->seq = nft_net->base_seq;
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
list_for_each_entry_rcu(chain, &table->chains, list) {
if (idx < s_idx)
goto cont;
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (!nft_is_active(net, chain))
continue;
if (nf_tables_fill_chain_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWCHAIN,
NLM_F_MULTI,
table->family, table,
chain) < 0)
goto done;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
}
done:
rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
/* called with rcu_read_lock held */
static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_chain *chain;
struct net *net = info->net;
struct nft_table *table;
struct sk_buff *skb2;
int err;
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_chains,
.module = THIS_MODULE,
};
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
}
chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return PTR_ERR(chain);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN,
0, family, table, chain);
if (err < 0)
goto err_fill_chain_info;
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
err_fill_chain_info:
kfree_skb(skb2);
return err;
}
static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
{
struct nlattr *tb[NFTA_COUNTER_MAX+1];
struct nft_stats __percpu *newstats;
struct nft_stats *stats;
int err;
err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr,
nft_counter_policy, NULL);
if (err < 0)
return ERR_PTR(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
return ERR_PTR(-EINVAL);
newstats = netdev_alloc_pcpu_stats(struct nft_stats);
if (newstats == NULL)
return ERR_PTR(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
*/
preempt_disable();
stats = this_cpu_ptr(newstats);
stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
preempt_enable();
return newstats;
}
static void nft_chain_stats_replace(struct nft_trans *trans)
{
struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
if (!nft_trans_chain_stats(trans))
return;
nft_trans_chain_stats(trans) =
rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans),
lockdep_commit_lock_is_held(trans->ctx.net));
if (!nft_trans_chain_stats(trans))
static_branch_inc(&nft_counters_enabled);
}
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
{
struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0);
struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1);
if (g0 != g1)
kvfree(g1);
kvfree(g0);
/* should be NULL either via abort or via successful commit */
WARN_ON_ONCE(chain->rules_next);
kvfree(chain->rules_next);
}
void nf_tables_chain_destroy(struct nft_ctx *ctx)
{
struct nft_chain *chain = ctx->chain;
struct nft_hook *hook, *next;
if (WARN_ON(chain->use > 0))
return;
/* no concurrent access possible anymore */
nf_tables_chain_free_chain_rules(chain);
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) {
list_for_each_entry_safe(hook, next,
&basechain->hook_list, list) {
list_del_rcu(&hook->list);
kfree_rcu(hook, rcu);
}
}
module_put(basechain->type->owner);
if (rcu_access_pointer(basechain->stats)) {
static_branch_dec(&nft_counters_enabled);
free_percpu(rcu_dereference_raw(basechain->stats));
}
kfree(chain->name);
kfree(chain->udata);
kfree(basechain);
} else {
kfree(chain->name);
kfree(chain->udata);
kfree(chain);
}
}
static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
const struct nlattr *attr)
{
struct net_device *dev;
char ifname[IFNAMSIZ];
struct nft_hook *hook;
int err;
hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL);
if (!hook) {
err = -ENOMEM;
goto err_hook_alloc;
}
nla_strscpy(ifname, attr, IFNAMSIZ);
/* nf_tables_netdev_event() is called under rtnl_mutex, this is
* indirectly serializing all the other holders of the commit_mutex with
* the rtnl_mutex.
*/
dev = __dev_get_by_name(net, ifname);
if (!dev) {
err = -ENOENT;
goto err_hook_dev;
}
hook->ops.dev = dev;
hook->inactive = false;
return hook;
err_hook_dev:
kfree(hook);
err_hook_alloc:
return ERR_PTR(err);
}
static struct nft_hook *nft_hook_list_find(struct list_head *hook_list,
const struct nft_hook *this)
{
struct nft_hook *hook;
list_for_each_entry(hook, hook_list, list) {
if (this->ops.dev == hook->ops.dev)
return hook;
}
return NULL;
}
static int nf_tables_parse_netdev_hooks(struct net *net,
const struct nlattr *attr,
struct list_head *hook_list)
{
struct nft_hook *hook, *next;
const struct nlattr *tmp;
int rem, n = 0, err;
nla_for_each_nested(tmp, attr, rem) {
if (nla_type(tmp) != NFTA_DEVICE_NAME) {
err = -EINVAL;
goto err_hook;
}
hook = nft_netdev_hook_alloc(net, tmp);
if (IS_ERR(hook)) {
err = PTR_ERR(hook);
goto err_hook;
}
if (nft_hook_list_find(hook_list, hook)) {
kfree(hook);
err = -EEXIST;
goto err_hook;
}
list_add_tail(&hook->list, hook_list);
n++;
if (n == NFT_NETDEVICE_MAX) {
err = -EFBIG;
goto err_hook;
}
}
return 0;
err_hook:
list_for_each_entry_safe(hook, next, hook_list, list) {
list_del(&hook->list);
kfree(hook);
}
return err;
}
struct nft_chain_hook {
u32 num;
s32 priority;
const struct nft_chain_type *type;
struct list_head list;
};
static int nft_chain_parse_netdev(struct net *net,
struct nlattr *tb[],
struct list_head *hook_list)
{
struct nft_hook *hook;
int err;
if (tb[NFTA_HOOK_DEV]) {
hook = nft_netdev_hook_alloc(net, tb[NFTA_HOOK_DEV]);
if (IS_ERR(hook))
return PTR_ERR(hook);
list_add_tail(&hook->list, hook_list);
} else if (tb[NFTA_HOOK_DEVS]) {
err = nf_tables_parse_netdev_hooks(net, tb[NFTA_HOOK_DEVS],
hook_list);
if (err < 0)
return err;
if (list_empty(hook_list))
return -EINVAL;
} else {
return -EINVAL;
}
return 0;
}
static int nft_chain_parse_hook(struct net *net,
const struct nlattr * const nla[],
struct nft_chain_hook *hook, u8 family,
struct netlink_ext_ack *extack, bool autoload)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nft_chain_type *type;
int err;
lockdep_assert_held(&nft_net->commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX,
nla[NFTA_CHAIN_HOOK],
nft_hook_policy, NULL);
if (err < 0)
return err;
if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
ha[NFTA_HOOK_PRIORITY] == NULL)
return -EINVAL;
hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT);
if (!type)
return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_TYPE]) {
type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
family, autoload);
if (IS_ERR(type)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
return PTR_ERR(type);
}
}
if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
return -EOPNOTSUPP;
if (type->type == NFT_CHAIN_T_NAT &&
hook->priority <= NF_IP_PRI_CONNTRACK)
return -EOPNOTSUPP;
if (!try_module_get(type->owner)) {
if (nla[NFTA_CHAIN_TYPE])
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
return -ENOENT;
}
hook->type = type;
INIT_LIST_HEAD(&hook->list);
if (nft_base_chain_netdev(family, hook->num)) {
err = nft_chain_parse_netdev(net, ha, &hook->list);
if (err < 0) {
module_put(type->owner);
return err;
}
} else if (ha[NFTA_HOOK_DEV] || ha[NFTA_HOOK_DEVS]) {
module_put(type->owner);
return -EOPNOTSUPP;
}
return 0;
}
static void nft_chain_release_hook(struct nft_chain_hook *hook)
{
struct nft_hook *h, *next;
list_for_each_entry_safe(h, next, &hook->list, list) {
list_del(&h->list);
kfree(h);
}
module_put(hook->type->owner);
}
struct nft_rules_old {
struct rcu_head h;
struct nft_rule **start;
};
static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain,
unsigned int alloc)
{
if (alloc > INT_MAX)
return NULL;
alloc += 1; /* NULL, ends rules */
if (sizeof(struct nft_rule *) > INT_MAX / alloc)
return NULL;
alloc *= sizeof(struct nft_rule *);
alloc += sizeof(struct nft_rules_old);
return kvmalloc(alloc, GFP_KERNEL);
}
static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
const struct nft_chain_hook *hook,
struct nft_chain *chain)
{
ops->pf = family;
ops->hooknum = hook->num;
ops->priority = hook->priority;
ops->priv = chain;
ops->hook = hook->type->hooks[ops->hooknum];
ops->hook_ops_type = NF_HOOK_OP_NF_TABLES;
}
static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
struct nft_chain_hook *hook, u32 flags)
{
struct nft_chain *chain;
struct nft_hook *h;
basechain->type = hook->type;
INIT_LIST_HEAD(&basechain->hook_list);
chain = &basechain->chain;
if (nft_base_chain_netdev(family, hook->num)) {
list_splice_init(&hook->list, &basechain->hook_list);
list_for_each_entry(h, &basechain->hook_list, list)
nft_basechain_hook_init(&h->ops, family, hook, chain);
basechain->ops.hooknum = hook->num;
basechain->ops.priority = hook->priority;
} else {
nft_basechain_hook_init(&basechain->ops, family, hook, chain);
}
chain->flags |= NFT_CHAIN_BASE | flags;
basechain->policy = NF_ACCEPT;
if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
nft_chain_offload_priority(basechain) < 0)
return -EOPNOTSUPP;
flow_block_init(&basechain->flow_block);
return 0;
}
static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
{
int err;
err = rhltable_insert_key(&table->chains_ht, chain->name,
&chain->rhlhead, nft_chain_ht_params);
if (err)
return err;
list_add_tail_rcu(&chain->list, &table->chains);
return 0;
}
static u64 chain_id;
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, u32 flags,
struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_base_chain *basechain;
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
struct nft_trans *trans;
struct nft_chain *chain;
struct nft_rule **rules;
int err;
if (table->use == UINT_MAX)
return -EOVERFLOW;
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
if (flags & NFT_CHAIN_BINDING)
return -EOPNOTSUPP;
err = nft_chain_parse_hook(net, nla, &hook, family, extack,
true);
if (err < 0)
return err;
basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
if (basechain == NULL) {
nft_chain_release_hook(&hook);
return -ENOMEM;
}
chain = &basechain->chain;
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats)) {
nft_chain_release_hook(&hook);
kfree(basechain);
return PTR_ERR(stats);
}
rcu_assign_pointer(basechain->stats, stats);
static_branch_inc(&nft_counters_enabled);
}
err = nft_basechain_init(basechain, family, &hook, flags);
if (err < 0) {
nft_chain_release_hook(&hook);
kfree(basechain);
return err;
}
} else {
if (flags & NFT_CHAIN_BASE)
return -EINVAL;
if (flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
return -ENOMEM;
chain->flags = flags;
}
ctx->chain = chain;
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
chain->table = table;
if (nla[NFTA_CHAIN_NAME]) {
chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
} else {
if (!(flags & NFT_CHAIN_BINDING)) {
err = -EINVAL;
goto err_destroy_chain;
}
snprintf(name, sizeof(name), "__chain%llu", ++chain_id);
chain->name = kstrdup(name, GFP_KERNEL);
}
if (!chain->name) {
err = -ENOMEM;
goto err_destroy_chain;
}
if (nla[NFTA_CHAIN_USERDATA]) {
chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL);
if (chain->udata == NULL) {
err = -ENOMEM;
goto err_destroy_chain;
}
chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]);
}
rules = nf_tables_chain_alloc_rules(chain, 0);
if (!rules) {
err = -ENOMEM;
goto err_destroy_chain;
}
*rules = NULL;
rcu_assign_pointer(chain->rules_gen_0, rules);
rcu_assign_pointer(chain->rules_gen_1, rules);
err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err_destroy_chain;
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto err_unregister_hook;
}
nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
if (nft_is_base_chain(chain))
nft_trans_chain_policy(trans) = policy;
err = nft_chain_add(table, chain);
if (err < 0) {
nft_trans_destroy(trans);
goto err_unregister_hook;
}
table->use++;
return 0;
err_unregister_hook:
nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
return err;
}
static bool nft_hook_list_equal(struct list_head *hook_list1,
struct list_head *hook_list2)
{
struct nft_hook *hook;
int n = 0, m = 0;
n = 0;
list_for_each_entry(hook, hook_list2, list) {
if (!nft_hook_list_find(hook_list1, hook))
return false;
n++;
}
list_for_each_entry(hook, hook_list1, list)
m++;
return n == m;
}
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags, const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
struct nft_chain *chain = ctx->chain;
struct nft_base_chain *basechain;
struct nft_stats *stats = NULL;
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
struct nft_trans *trans;
int err;
if (chain->flags ^ flags)
return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain)) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
extack, false);
if (err < 0)
return err;
basechain = nft_base_chain(chain);
if (basechain->type != hook.type) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
if (nft_base_chain_netdev(ctx->family, hook.num)) {
if (!nft_hook_list_equal(&basechain->hook_list,
&hook.list)) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
} else {
ops = &basechain->ops;
if (ops->hooknum != hook.num ||
ops->priority != hook.priority) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
}
nft_chain_release_hook(&hook);
}
if (nla[NFTA_CHAIN_HANDLE] &&
nla[NFTA_CHAIN_NAME]) {
struct nft_chain *chain2;
chain2 = nft_chain_lookup(ctx->net, table,
nla[NFTA_CHAIN_NAME], genmask);
if (!IS_ERR(chain2)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return -EEXIST;
}
}
if (nla[NFTA_CHAIN_COUNTERS]) {
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats))
return PTR_ERR(stats);
}
err = -ENOMEM;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
if (trans == NULL)
goto err;
nft_trans_chain_stats(trans) = stats;
nft_trans_chain_update(trans) = true;
if (nla[NFTA_CHAIN_POLICY])
nft_trans_chain_policy(trans) = policy;
else
nft_trans_chain_policy(trans) = -1;
if (nla[NFTA_CHAIN_HANDLE] &&
nla[NFTA_CHAIN_NAME]) {
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct nft_trans *tmp;
char *name;
err = -ENOMEM;
name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
if (!name)
goto err;
err = -EEXIST;
list_for_each_entry(tmp, &nft_net->commit_list, list) {
if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
tmp->ctx.table == table &&
nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
kfree(name);
goto err;
}
}
nft_trans_chain_name(trans) = name;
}
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err:
free_percpu(stats);
kfree(trans);
return err;
}
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
const struct nlattr *nla)
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
struct nft_trans *trans;
list_for_each_entry(trans, &nft_net->commit_list, list) {
struct nft_chain *chain = trans->ctx.chain;
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
id == nft_trans_chain_id(trans))
return chain;
}
return ERR_PTR(-ENOENT);
}
static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
struct nft_chain *chain = NULL;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
u8 policy = NF_ACCEPT;
struct nft_ctx ctx;
u64 handle = 0;
u32 flags = 0;
lockdep_assert_held(&nft_net->commit_mutex);
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
}
chain = NULL;
attr = nla[NFTA_CHAIN_NAME];
if (nla[NFTA_CHAIN_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
chain = nft_chain_lookup_byhandle(table, handle, genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
return PTR_ERR(chain);
}
attr = nla[NFTA_CHAIN_HANDLE];
} else if (nla[NFTA_CHAIN_NAME]) {
chain = nft_chain_lookup(net, table, attr, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT) {
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(chain);
}
chain = NULL;
}
} else if (!nla[NFTA_CHAIN_ID]) {
return -EINVAL;
}
if (nla[NFTA_CHAIN_POLICY]) {
if (chain != NULL &&
!nft_is_base_chain(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
return -EOPNOTSUPP;
}
if (chain == NULL &&
nla[NFTA_CHAIN_HOOK] == NULL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
return -EOPNOTSUPP;
}
policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
switch (policy) {
case NF_DROP:
case NF_ACCEPT:
break;
default:
return -EINVAL;
}
}
if (nla[NFTA_CHAIN_FLAGS])
flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
else if (chain)
flags = chain->flags;
if (flags & ~NFT_CHAIN_FLAGS)
return -EOPNOTSUPP;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (chain != NULL) {
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
flags |= chain->flags & NFT_CHAIN_BASE;
return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
extack);
}
return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
}
static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule;
struct nft_ctx ctx;
u64 handle;
u32 use;
int err;
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
}
if (nla[NFTA_CHAIN_HANDLE]) {
attr = nla[NFTA_CHAIN_HANDLE];
handle = be64_to_cpu(nla_get_be64(attr));
chain = nft_chain_lookup_byhandle(table, handle, genmask);
} else {
attr = nla[NFTA_CHAIN_NAME];
chain = nft_chain_lookup(net, table, attr, genmask);
}
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(chain);
}
if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
chain->use > 0)
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
use = chain->use;
list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(net, rule))
continue;
use--;
err = nft_delrule(&ctx, rule);
if (err < 0)
return err;
}
/* There are rules and elements that are still holding references to us,
* we cannot do a recursive removal in this case.
*/
if (use > 0) {
NL_SET_BAD_ATTR(extack, attr);
return -EBUSY;
}
return nft_delchain(&ctx);
}
/*
* Expressions
*/
/**
* nft_register_expr - register nf_tables expr type
* @type: expr type
*
* Registers the expr type for use with nf_tables. Returns zero on
* success or a negative errno code otherwise.
*/
int nft_register_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (type->family == NFPROTO_UNSPEC)
list_add_tail_rcu(&type->list, &nf_tables_expressions);
else
list_add_rcu(&type->list, &nf_tables_expressions);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
return 0;
}
EXPORT_SYMBOL_GPL(nft_register_expr);
/**
* nft_unregister_expr - unregister nf_tables expr type
* @type: expr type
*
* Unregisters the expr typefor use with nf_tables.
*/
void nft_unregister_expr(struct nft_expr_type *type)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
list_del_rcu(&type->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_expr);
static const struct nft_expr_type *__nft_expr_type_get(u8 family,
struct nlattr *nla)
{
const struct nft_expr_type *type, *candidate = NULL;
list_for_each_entry(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name)) {
if (!type->family && !candidate)
candidate = type;
else if (type->family == family)
candidate = type;
}
}
return candidate;
}
#ifdef CONFIG_MODULES
static int nft_expr_type_request_module(struct net *net, u8 family,
struct nlattr *nla)
{
if (nft_request_module(net, "nft-expr-%u-%.*s", family,
nla_len(nla), (char *)nla_data(nla)) == -EAGAIN)
return -EAGAIN;
return 0;
}
#endif
static const struct nft_expr_type *nft_expr_type_get(struct net *net,
u8 family,
struct nlattr *nla)
{
const struct nft_expr_type *type;
if (nla == NULL)
return ERR_PTR(-EINVAL);
type = __nft_expr_type_get(family, nla);
if (type != NULL && try_module_get(type->owner))
return type;
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (type == NULL) {
if (nft_expr_type_request_module(net, family, nla) == -EAGAIN)
return ERR_PTR(-EAGAIN);
if (nft_request_module(net, "nft-expr-%.*s",
nla_len(nla),
(char *)nla_data(nla)) == -EAGAIN)
return ERR_PTR(-EAGAIN);
}
#endif
return ERR_PTR(-ENOENT);
}
static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
[NFTA_EXPR_NAME] = { .type = NLA_STRING,
.len = NFT_MODULE_AUTOLOAD_LIMIT },
[NFTA_EXPR_DATA] = { .type = NLA_NESTED },
};
static int nf_tables_fill_expr_info(struct sk_buff *skb,
const struct nft_expr *expr)
{
if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
goto nla_put_failure;
if (expr->ops->dump) {
struct nlattr *data = nla_nest_start_noflag(skb,
NFTA_EXPR_DATA);
if (data == NULL)
goto nla_put_failure;
if (expr->ops->dump(skb, expr) < 0)
goto nla_put_failure;
nla_nest_end(skb, data);
}
return skb->len;
nla_put_failure:
return -1;
};
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr)
{
struct nlattr *nest;
nest = nla_nest_start_noflag(skb, attr);
if (!nest)
goto nla_put_failure;
if (nf_tables_fill_expr_info(skb, expr) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
return 0;
nla_put_failure:
return -1;
}
struct nft_expr_info {
const struct nft_expr_ops *ops;
const struct nlattr *attr;
struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
};
static int nf_tables_expr_parse(const struct nft_ctx *ctx,
const struct nlattr *nla,
struct nft_expr_info *info)
{
const struct nft_expr_type *type;
const struct nft_expr_ops *ops;
struct nlattr *tb[NFTA_EXPR_MAX + 1];
int err;
err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla,
nft_expr_policy, NULL);
if (err < 0)
return err;
type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]);
if (IS_ERR(type))
return PTR_ERR(type);
if (tb[NFTA_EXPR_DATA]) {
err = nla_parse_nested_deprecated(info->tb, type->maxattr,
tb[NFTA_EXPR_DATA],
type->policy, NULL);
if (err < 0)
goto err1;
} else
memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));
if (type->select_ops != NULL) {
ops = type->select_ops(ctx,
(const struct nlattr * const *)info->tb);
if (IS_ERR(ops)) {
err = PTR_ERR(ops);
#ifdef CONFIG_MODULES
if (err == -EAGAIN)
if (nft_expr_type_request_module(ctx->net,
ctx->family,
tb[NFTA_EXPR_NAME]) != -EAGAIN)
err = -ENOENT;
#endif
goto err1;
}
} else
ops = type->ops;
info->attr = nla;
info->ops = ops;
return 0;
err1:
module_put(type->owner);
return err;
}
static int nf_tables_newexpr(const struct nft_ctx *ctx,
const struct nft_expr_info *expr_info,
struct nft_expr *expr)
{
const struct nft_expr_ops *ops = expr_info->ops;
int err;
expr->ops = ops;
if (ops->init) {
err = ops->init(ctx, expr, (const struct nlattr **)expr_info->tb);
if (err < 0)
goto err1;
}
return 0;
err1:
expr->ops = NULL;
return err;
}
static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
struct nft_expr *expr)
{
const struct nft_expr_type *type = expr->ops->type;
if (expr->ops->destroy)
expr->ops->destroy(ctx, expr);
module_put(type->owner);
}
static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
const struct nlattr *nla)
{
struct nft_expr_info expr_info;
struct nft_expr *expr;
struct module *owner;
int err;
err = nf_tables_expr_parse(ctx, nla, &expr_info);
if (err < 0)
goto err1;
err = -ENOMEM;
expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
if (expr == NULL)
goto err2;
err = nf_tables_newexpr(ctx, &expr_info, expr);
if (err < 0)
goto err3;
return expr;
err3:
kfree(expr);
err2:
owner = expr_info.ops->type->owner;
if (expr_info.ops->type->release_ops)
expr_info.ops->type->release_ops(expr_info.ops);
module_put(owner);
err1:
return ERR_PTR(err);
}
int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
{
int err;
if (src->ops->clone) {
dst->ops = src->ops;
err = src->ops->clone(dst, src);
if (err < 0)
return err;
} else {
memcpy(dst, src, src->ops->size);
}
__module_get(src->ops->type->owner);
return 0;
}
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
{
nf_tables_expr_destroy(ctx, expr);
kfree(expr);
}
/*
* Rules
*/
static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
u64 handle)
{
struct nft_rule *rule;
// FIXME: this sucks
list_for_each_entry_rcu(rule, &chain->rules, list) {
if (handle == rule->handle)
return rule;
}
return ERR_PTR(-ENOENT);
}
static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
const struct nlattr *nla)
{
if (nla == NULL)
return ERR_PTR(-EINVAL);
return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
}
static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_TABLE] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_RULE_CHAIN] = { .type = NLA_STRING,
.len = NFT_CHAIN_MAXNAMELEN - 1 },
[NFTA_RULE_HANDLE] = { .type = NLA_U64 },
[NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
[NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
[NFTA_RULE_POSITION] = { .type = NLA_U64 },
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
[NFTA_RULE_ID] = { .type = NLA_U32 },
[NFTA_RULE_POSITION_ID] = { .type = NLA_U32 },
[NFTA_RULE_CHAIN_ID] = { .type = NLA_U32 },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq, int event,
u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
const struct nft_rule *rule, u64 handle)
{
struct nlmsghdr *nlh;
const struct nft_expr *expr, *next;
struct nlattr *list;
u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
goto nla_put_failure;
if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle),
NFTA_RULE_PAD))
goto nla_put_failure;
if (event != NFT_MSG_DELRULE && handle) {
if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(handle),
NFTA_RULE_PAD))
goto nla_put_failure;
}
if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
nft_flow_rule_stats(chain, rule);
list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS);
if (list == NULL)
goto nla_put_failure;
nft_rule_for_each_expr(expr, next, rule) {
if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, list);
if (rule->udata) {
struct nft_userdata *udata = nft_userdata(rule);
if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1,
udata->data) < 0)
goto nla_put_failure;
}
nlmsg_end(skb, nlh);
return 0;
nla_put_failure:
nlmsg_trim(skb, nlh);
return -1;
}
static void nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule, int event)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_rule *prule;
struct sk_buff *skb;
u64 handle = 0;
u16 flags = 0;
int err;
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
return;
skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto err;
if (event == NFT_MSG_NEWRULE &&
!list_is_first(&rule->list, &ctx->chain->rules) &&
!list_is_last(&rule->list, &ctx->chain->rules)) {
prule = list_prev_entry(rule, list);
handle = prule->handle;
}
if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE</