blob: 546113430049d63ec178160d5a8b57e19e210505 [file] [log] [blame]
Daniel Mack30070982016-11-23 16:52:26 +01001/*
2 * Functions to manage eBPF programs attached to cgroups
3 *
4 * Copyright (c) 2016 Daniel Mack
5 *
6 * This file is subject to the terms and conditions of version 2 of the GNU
7 * General Public License. See the file COPYING in the main directory of the
8 * Linux distribution for more details.
9 */
10
11#include <linux/kernel.h>
12#include <linux/atomic.h>
13#include <linux/cgroup.h>
14#include <linux/slab.h>
15#include <linux/bpf.h>
16#include <linux/bpf-cgroup.h>
17#include <net/sock.h>
18
19DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
20EXPORT_SYMBOL(cgroup_bpf_enabled_key);
21
22/**
23 * cgroup_bpf_put() - put references of all bpf programs
24 * @cgrp: the cgroup to modify
25 */
26void cgroup_bpf_put(struct cgroup *cgrp)
27{
28 unsigned int type;
29
30 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
31 struct bpf_prog *prog = cgrp->bpf.prog[type];
32
33 if (prog) {
34 bpf_prog_put(prog);
35 static_branch_dec(&cgroup_bpf_enabled_key);
36 }
37 }
38}
39
40/**
41 * cgroup_bpf_inherit() - inherit effective programs from parent
42 * @cgrp: the cgroup to modify
43 * @parent: the parent to inherit from
44 */
45void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
46{
47 unsigned int type;
48
49 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
50 struct bpf_prog *e;
51
52 e = rcu_dereference_protected(parent->bpf.effective[type],
53 lockdep_is_held(&cgroup_mutex));
54 rcu_assign_pointer(cgrp->bpf.effective[type], e);
Alexei Starovoitov7f677632017-02-10 20:28:24 -080055 cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
Daniel Mack30070982016-11-23 16:52:26 +010056 }
57}
58
59/**
60 * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
61 * propagate the change to descendants
62 * @cgrp: The cgroup which descendants to traverse
63 * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
64 * @prog: A new program to pin
65 * @type: Type of pinning operation (ingress/egress)
66 *
67 * Each cgroup has a set of two pointers for bpf programs; one for eBPF
68 * programs it owns, and which is effective for execution.
69 *
Daniel Mack01ae87e2016-11-28 14:11:04 +010070 * If @prog is not %NULL, this function attaches a new program to the cgroup
71 * and releases the one that is currently attached, if any. @prog is then made
Daniel Mack30070982016-11-23 16:52:26 +010072 * the effective program of type @type in that cgroup.
73 *
74 * If @prog is %NULL, the currently attached program of type @type is released,
75 * and the effective program of the parent cgroup (if any) is inherited to
76 * @cgrp.
77 *
78 * Then, the descendants of @cgrp are walked and the effective program for
79 * each of them is set to the effective program of @cgrp unless the
80 * descendant has its own program attached, in which case the subbranch is
81 * skipped. This ensures that delegated subcgroups with own programs are left
82 * untouched.
83 *
84 * Must be called with cgroup_mutex held.
85 */
Alexei Starovoitov7f677632017-02-10 20:28:24 -080086int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
87 struct bpf_prog *prog, enum bpf_attach_type type,
88 bool new_overridable)
Daniel Mack30070982016-11-23 16:52:26 +010089{
Alexei Starovoitov7f677632017-02-10 20:28:24 -080090 struct bpf_prog *old_prog, *effective = NULL;
Daniel Mack30070982016-11-23 16:52:26 +010091 struct cgroup_subsys_state *pos;
Alexei Starovoitov7f677632017-02-10 20:28:24 -080092 bool overridable = true;
Daniel Mack30070982016-11-23 16:52:26 +010093
Alexei Starovoitov7f677632017-02-10 20:28:24 -080094 if (parent) {
95 overridable = !parent->bpf.disallow_override[type];
96 effective = rcu_dereference_protected(parent->bpf.effective[type],
97 lockdep_is_held(&cgroup_mutex));
98 }
Daniel Mack30070982016-11-23 16:52:26 +010099
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800100 if (prog && effective && !overridable)
101 /* if parent has non-overridable prog attached, disallow
102 * attaching new programs to descendent cgroup
103 */
104 return -EPERM;
105
106 if (prog && effective && overridable != new_overridable)
107 /* if parent has overridable prog attached, only
108 * allow overridable programs in descendent cgroup
109 */
110 return -EPERM;
111
112 old_prog = cgrp->bpf.prog[type];
113
114 if (prog) {
115 overridable = new_overridable;
116 effective = prog;
117 if (old_prog &&
118 cgrp->bpf.disallow_override[type] == new_overridable)
119 /* disallow attaching non-overridable on top
120 * of existing overridable in this cgroup
121 * and vice versa
122 */
123 return -EPERM;
124 }
125
126 if (!prog && !old_prog)
127 /* report error when trying to detach and nothing is attached */
128 return -ENOENT;
129
130 cgrp->bpf.prog[type] = prog;
Daniel Mack30070982016-11-23 16:52:26 +0100131
132 css_for_each_descendant_pre(pos, &cgrp->self) {
133 struct cgroup *desc = container_of(pos, struct cgroup, self);
134
135 /* skip the subtree if the descendant has its own program */
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800136 if (desc->bpf.prog[type] && desc != cgrp) {
Daniel Mack30070982016-11-23 16:52:26 +0100137 pos = css_rightmost_descendant(pos);
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800138 } else {
Daniel Mack30070982016-11-23 16:52:26 +0100139 rcu_assign_pointer(desc->bpf.effective[type],
140 effective);
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800141 desc->bpf.disallow_override[type] = !overridable;
142 }
Daniel Mack30070982016-11-23 16:52:26 +0100143 }
144
145 if (prog)
146 static_branch_inc(&cgroup_bpf_enabled_key);
147
148 if (old_prog) {
149 bpf_prog_put(old_prog);
150 static_branch_dec(&cgroup_bpf_enabled_key);
151 }
Alexei Starovoitov7f677632017-02-10 20:28:24 -0800152 return 0;
Daniel Mack30070982016-11-23 16:52:26 +0100153}
154
155/**
David Ahernb2cd1252016-12-01 08:48:03 -0800156 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
Willem de Bruijn8f917bb2017-04-11 14:08:08 -0400157 * @sk: The socket sending or receiving traffic
Daniel Mack30070982016-11-23 16:52:26 +0100158 * @skb: The skb that is being sent or received
159 * @type: The type of program to be exectuted
160 *
161 * If no socket is passed, or the socket is not of type INET or INET6,
162 * this function does nothing and returns 0.
163 *
164 * The program type passed in via @type must be suitable for network
165 * filtering. No further check is performed to assert that.
166 *
167 * This function will return %-EPERM if any if an attached program was found
168 * and if it returned != 1 during execution. In all other cases, 0 is returned.
169 */
David Ahernb2cd1252016-12-01 08:48:03 -0800170int __cgroup_bpf_run_filter_skb(struct sock *sk,
171 struct sk_buff *skb,
172 enum bpf_attach_type type)
Daniel Mack30070982016-11-23 16:52:26 +0100173{
174 struct bpf_prog *prog;
175 struct cgroup *cgrp;
176 int ret = 0;
177
178 if (!sk || !sk_fullsock(sk))
179 return 0;
180
181 if (sk->sk_family != AF_INET &&
182 sk->sk_family != AF_INET6)
183 return 0;
184
185 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
186
187 rcu_read_lock();
188
189 prog = rcu_dereference(cgrp->bpf.effective[type]);
190 if (prog) {
191 unsigned int offset = skb->data - skb_network_header(skb);
Willem de Bruijn8f917bb2017-04-11 14:08:08 -0400192 struct sock *save_sk = skb->sk;
Daniel Mack30070982016-11-23 16:52:26 +0100193
Willem de Bruijn8f917bb2017-04-11 14:08:08 -0400194 skb->sk = sk;
Daniel Mack30070982016-11-23 16:52:26 +0100195 __skb_push(skb, offset);
196 ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
197 __skb_pull(skb, offset);
Willem de Bruijn8f917bb2017-04-11 14:08:08 -0400198 skb->sk = save_sk;
Daniel Mack30070982016-11-23 16:52:26 +0100199 }
200
201 rcu_read_unlock();
202
203 return ret;
204}
David Ahernb2cd1252016-12-01 08:48:03 -0800205EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
David Ahern610236582016-12-01 08:48:04 -0800206
207/**
208 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
209 * @sk: sock structure to manipulate
210 * @type: The type of program to be exectuted
211 *
212 * socket is passed is expected to be of type INET or INET6.
213 *
214 * The program type passed in via @type must be suitable for sock
215 * filtering. No further check is performed to assert that.
216 *
217 * This function will return %-EPERM if any if an attached program was found
218 * and if it returned != 1 during execution. In all other cases, 0 is returned.
219 */
220int __cgroup_bpf_run_filter_sk(struct sock *sk,
221 enum bpf_attach_type type)
222{
223 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
224 struct bpf_prog *prog;
225 int ret = 0;
226
227
228 rcu_read_lock();
229
230 prog = rcu_dereference(cgrp->bpf.effective[type]);
231 if (prog)
232 ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
233
234 rcu_read_unlock();
235
236 return ret;
237}
238EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
Lawrence Brakmo40304b22017-06-30 20:02:40 -0700239
240/**
241 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
242 * @sk: socket to get cgroup from
243 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
244 * sk with connection information (IP addresses, etc.) May not contain
245 * cgroup info if it is a req sock.
246 * @type: The type of program to be exectuted
247 *
248 * socket passed is expected to be of type INET or INET6.
249 *
250 * The program type passed in via @type must be suitable for sock_ops
251 * filtering. No further check is performed to assert that.
252 *
253 * This function will return %-EPERM if any if an attached program was found
254 * and if it returned != 1 during execution. In all other cases, 0 is returned.
255 */
256int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
257 struct bpf_sock_ops_kern *sock_ops,
258 enum bpf_attach_type type)
259{
260 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
261 struct bpf_prog *prog;
262 int ret = 0;
263
264
265 rcu_read_lock();
266
267 prog = rcu_dereference(cgrp->bpf.effective[type]);
268 if (prog)
269 ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
270
271 rcu_read_unlock();
272
273 return ret;
274}
275EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);