blob: bca2d86ba97d8d428d63d912cc109397fc4bb122 [file] [log] [blame]
Cong Wangc6382912021-07-04 12:02:47 -07001// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
3
4#include <linux/skmsg.h>
5#include <linux/bpf.h>
6#include <net/sock.h>
7#include <net/af_unix.h>
8
Cong Wang9825d862021-07-04 12:02:48 -07009#define unix_sk_has_data(__sk, __psock) \
10 ({ !skb_queue_empty(&__sk->sk_receive_queue) || \
11 !skb_queue_empty(&__psock->ingress_skb) || \
12 !list_empty(&__psock->ingress_msg); \
13 })
14
15static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
16 long timeo)
17{
18 DEFINE_WAIT_FUNC(wait, woken_wake_function);
19 struct unix_sock *u = unix_sk(sk);
20 int ret = 0;
21
22 if (sk->sk_shutdown & RCV_SHUTDOWN)
23 return 1;
24
25 if (!timeo)
26 return ret;
27
28 add_wait_queue(sk_sleep(sk), &wait);
29 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
30 if (!unix_sk_has_data(sk, psock)) {
31 mutex_unlock(&u->iolock);
32 wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
33 mutex_lock(&u->iolock);
34 ret = unix_sk_has_data(sk, psock);
35 }
36 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
37 remove_wait_queue(sk_sleep(sk), &wait);
38 return ret;
39}
40
Jiang Wang94531cf2021-08-16 19:03:21 +000041static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
42 size_t len, int flags)
43{
44 if (sk->sk_type == SOCK_DGRAM)
45 return __unix_dgram_recvmsg(sk, msg, len, flags);
46 else
47 return __unix_stream_recvmsg(sk, msg, len, flags);
48}
49
50static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
Oliver Hartkoppec095262022-04-11 14:49:55 +020051 size_t len, int flags, int *addr_len)
Cong Wang9825d862021-07-04 12:02:48 -070052{
53 struct unix_sock *u = unix_sk(sk);
54 struct sk_psock *psock;
Cong Wang0b846442021-07-23 11:36:30 -070055 int copied;
Cong Wang9825d862021-07-04 12:02:48 -070056
Michal Luczaj638f3262024-07-13 21:41:38 +020057 if (flags & MSG_OOB)
58 return -EOPNOTSUPP;
59
Liu Jiand900f3d2023-03-03 16:09:46 +080060 if (!len)
61 return 0;
62
Cong Wang9825d862021-07-04 12:02:48 -070063 psock = sk_psock_get(sk);
64 if (unlikely(!psock))
Jiang Wang94531cf2021-08-16 19:03:21 +000065 return __unix_recvmsg(sk, msg, len, flags);
Cong Wang9825d862021-07-04 12:02:48 -070066
67 mutex_lock(&u->iolock);
68 if (!skb_queue_empty(&sk->sk_receive_queue) &&
69 sk_psock_queue_empty(psock)) {
Cong Wang0b846442021-07-23 11:36:30 -070070 mutex_unlock(&u->iolock);
71 sk_psock_put(sk, psock);
Jiang Wang94531cf2021-08-16 19:03:21 +000072 return __unix_recvmsg(sk, msg, len, flags);
Cong Wang9825d862021-07-04 12:02:48 -070073 }
74
75msg_bytes_ready:
76 copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
77 if (!copied) {
78 long timeo;
79 int data;
80
Oliver Hartkoppec095262022-04-11 14:49:55 +020081 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
Cong Wang9825d862021-07-04 12:02:48 -070082 data = unix_msg_wait_data(sk, psock, timeo);
83 if (data) {
84 if (!sk_psock_queue_empty(psock))
85 goto msg_bytes_ready;
Cong Wang0b846442021-07-23 11:36:30 -070086 mutex_unlock(&u->iolock);
87 sk_psock_put(sk, psock);
Jiang Wang94531cf2021-08-16 19:03:21 +000088 return __unix_recvmsg(sk, msg, len, flags);
Cong Wang9825d862021-07-04 12:02:48 -070089 }
90 copied = -EAGAIN;
91 }
Cong Wang9825d862021-07-04 12:02:48 -070092 mutex_unlock(&u->iolock);
93 sk_psock_put(sk, psock);
Cong Wang0b846442021-07-23 11:36:30 -070094 return copied;
Cong Wang9825d862021-07-04 12:02:48 -070095}
96
Jiang Wang94531cf2021-08-16 19:03:21 +000097static struct proto *unix_dgram_prot_saved __read_mostly;
98static DEFINE_SPINLOCK(unix_dgram_prot_lock);
99static struct proto unix_dgram_bpf_prot;
Cong Wangc6382912021-07-04 12:02:47 -0700100
Jiang Wang94531cf2021-08-16 19:03:21 +0000101static struct proto *unix_stream_prot_saved __read_mostly;
102static DEFINE_SPINLOCK(unix_stream_prot_lock);
103static struct proto unix_stream_bpf_prot;
104
105static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
Cong Wangc6382912021-07-04 12:02:47 -0700106{
107 *prot = *base;
108 prot->close = sock_map_close;
Jiang Wang94531cf2021-08-16 19:03:21 +0000109 prot->recvmsg = unix_bpf_recvmsg;
Cong Wangaf493382021-10-08 13:33:05 -0700110 prot->sock_is_readable = sk_msg_is_readable;
Cong Wangc6382912021-07-04 12:02:47 -0700111}
112
Jiang Wang94531cf2021-08-16 19:03:21 +0000113static void unix_stream_bpf_rebuild_protos(struct proto *prot,
114 const struct proto *base)
Cong Wangc6382912021-07-04 12:02:47 -0700115{
Jiang Wang94531cf2021-08-16 19:03:21 +0000116 *prot = *base;
117 prot->close = sock_map_close;
118 prot->recvmsg = unix_bpf_recvmsg;
Cong Wangaf493382021-10-08 13:33:05 -0700119 prot->sock_is_readable = sk_msg_is_readable;
Jiang Wang94531cf2021-08-16 19:03:21 +0000120 prot->unhash = sock_map_unhash;
121}
122
123static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
124{
125 if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
126 spin_lock_bh(&unix_dgram_prot_lock);
127 if (likely(ops != unix_dgram_prot_saved)) {
128 unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
129 smp_store_release(&unix_dgram_prot_saved, ops);
Cong Wangc6382912021-07-04 12:02:47 -0700130 }
Jiang Wang94531cf2021-08-16 19:03:21 +0000131 spin_unlock_bh(&unix_dgram_prot_lock);
Cong Wangc6382912021-07-04 12:02:47 -0700132 }
133}
134
Jiang Wang94531cf2021-08-16 19:03:21 +0000135static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
136{
137 if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
138 spin_lock_bh(&unix_stream_prot_lock);
139 if (likely(ops != unix_stream_prot_saved)) {
140 unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
141 smp_store_release(&unix_stream_prot_saved, ops);
142 }
143 spin_unlock_bh(&unix_stream_prot_lock);
144 }
145}
146
147int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
Cong Wangc6382912021-07-04 12:02:47 -0700148{
Cong Wang83f31532021-07-31 12:50:38 -0700149 if (sk->sk_type != SOCK_DGRAM)
150 return -EOPNOTSUPP;
151
Cong Wangc6382912021-07-04 12:02:47 -0700152 if (restore) {
153 sk->sk_write_space = psock->saved_write_space;
Pavel Begunkovfee9ac02022-10-27 00:25:57 +0100154 sock_replace_proto(sk, psock->sk_proto);
Cong Wangc6382912021-07-04 12:02:47 -0700155 return 0;
156 }
157
Jiang Wang94531cf2021-08-16 19:03:21 +0000158 unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
Pavel Begunkovfee9ac02022-10-27 00:25:57 +0100159 sock_replace_proto(sk, &unix_dgram_bpf_prot);
Jiang Wang94531cf2021-08-16 19:03:21 +0000160 return 0;
161}
162
163int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
164{
John Fastabend88667302023-11-28 17:25:56 -0800165 struct sock *sk_pair;
166
John Fastabend16b2f262023-12-21 15:23:23 -0800167 /* Restore does not decrement the sk_pair reference yet because we must
168 * keep the a reference to the socket until after an RCU grace period
169 * and any pending sends have completed.
170 */
Jiang Wang94531cf2021-08-16 19:03:21 +0000171 if (restore) {
172 sk->sk_write_space = psock->saved_write_space;
Pavel Begunkovfee9ac02022-10-27 00:25:57 +0100173 sock_replace_proto(sk, psock->sk_proto);
Jiang Wang94531cf2021-08-16 19:03:21 +0000174 return 0;
175 }
176
John Fastabend16b2f262023-12-21 15:23:23 -0800177 /* psock_update_sk_prot can be called multiple times if psock is
178 * added to multiple maps and/or slots in the same map. There is
179 * also an edge case where replacing a psock with itself can trigger
180 * an extra psock_update_sk_prot during the insert process. So it
181 * must be safe to do multiple calls. Here we need to ensure we don't
182 * increment the refcnt through sock_hold many times. There will only
183 * be a single matching destroy operation.
184 */
185 if (!psock->sk_pair) {
186 sk_pair = unix_peer(sk);
187 sock_hold(sk_pair);
188 psock->sk_pair = sk_pair;
189 }
190
Jiang Wang94531cf2021-08-16 19:03:21 +0000191 unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
Pavel Begunkovfee9ac02022-10-27 00:25:57 +0100192 sock_replace_proto(sk, &unix_stream_bpf_prot);
Cong Wangc6382912021-07-04 12:02:47 -0700193 return 0;
194}
195
196void __init unix_bpf_build_proto(void)
197{
Jiang Wang94531cf2021-08-16 19:03:21 +0000198 unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
199 unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
200
Cong Wangc6382912021-07-04 12:02:47 -0700201}