bpf: Change uapi for bpf iterator map elements
Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
map elements") added bpf iterator support for
map elements. The map element bpf iterator requires
info to identify a particular map. In the above
commit, the attr->link_create.target_fd is used
to carry map_fd and an enum bpf_iter_link_info
is added to uapi to specify the target_fd actually
representing a map_fd:
enum bpf_iter_link_info {
BPF_ITER_LINK_UNSPEC = 0,
BPF_ITER_LINK_MAP_FD = 1,
MAX_BPF_ITER_LINK_INFO,
};
This is an extensible approach as we can grow
enumerator for pid, cgroup_id, etc. and we can
unionize target_fd for pid, cgroup_id, etc.
But in the future, there are chances that
more complex customization may happen, e.g.,
for tasks, it could be filtered based on
both cgroup_id and user_id.
This patch changed the uapi to have fields
__aligned_u64 iter_info;
__u32 iter_info_len;
for additional iter_info for link_create.
The iter_info is defined as
union bpf_iter_link_info {
struct {
__u32 map_fd;
} map;
};
So future extension for additional customization
will be easier. The bpf_iter_link_info will be
passed to target callback to validate and generic
bpf_iter framework does not need to deal it any
more.
Note that map_fd = 0 will be considered invalid
and -EBADF will be returned to user space.
Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index fbe1f55..af86048 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = {
.seq_info = &bpf_map_seq_info,
};
-static int bpf_iter_check_map(struct bpf_prog *prog,
- struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
{
u32 key_acc_size, value_acc_size, key_size, value_size;
- struct bpf_map *map = aux->map;
+ struct bpf_map *map;
bool is_percpu = false;
+ int err = -EINVAL;
+
+ if (!linfo->map.map_fd)
+ return -EBADF;
+
+ map = bpf_map_get_with_uref(linfo->map.map_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
else if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY)
- return -EINVAL;
+ goto put_map;
key_acc_size = prog->aux->max_rdonly_access;
value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
else
value_size = round_up(map->value_size, 8) * num_possible_cpus();
- if (key_acc_size > key_size || value_acc_size > value_size)
- return -EACCES;
+ if (key_acc_size > key_size || value_acc_size > value_size) {
+ err = -EACCES;
+ goto put_map;
+ }
+ aux->map = map;
return 0;
+
+put_map:
+ bpf_map_put_with_uref(map);
+ return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+ bpf_map_put_with_uref(aux->map);
}
DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
static const struct bpf_iter_reg bpf_map_elem_reg_info = {
.target = "bpf_map_elem",
- .check_target = bpf_iter_check_map,
- .req_linfo = BPF_ITER_LINK_MAP_FD,
+ .attach_target = bpf_iter_attach_map,
+ .detach_target = bpf_iter_detach_map,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map_elem, key),