Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */ |
| 3 | |
| 4 | #include <linux/skmsg.h> |
| 5 | #include <linux/bpf.h> |
| 6 | #include <net/sock.h> |
| 7 | #include <net/af_unix.h> |
| 8 | |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 9 | #define unix_sk_has_data(__sk, __psock) \ |
| 10 | ({ !skb_queue_empty(&__sk->sk_receive_queue) || \ |
| 11 | !skb_queue_empty(&__psock->ingress_skb) || \ |
| 12 | !list_empty(&__psock->ingress_msg); \ |
| 13 | }) |
| 14 | |
| 15 | static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock, |
| 16 | long timeo) |
| 17 | { |
| 18 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 19 | struct unix_sock *u = unix_sk(sk); |
| 20 | int ret = 0; |
| 21 | |
| 22 | if (sk->sk_shutdown & RCV_SHUTDOWN) |
| 23 | return 1; |
| 24 | |
| 25 | if (!timeo) |
| 26 | return ret; |
| 27 | |
| 28 | add_wait_queue(sk_sleep(sk), &wait); |
| 29 | sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); |
| 30 | if (!unix_sk_has_data(sk, psock)) { |
| 31 | mutex_unlock(&u->iolock); |
| 32 | wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); |
| 33 | mutex_lock(&u->iolock); |
| 34 | ret = unix_sk_has_data(sk, psock); |
| 35 | } |
| 36 | sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); |
| 37 | remove_wait_queue(sk_sleep(sk), &wait); |
| 38 | return ret; |
| 39 | } |
| 40 | |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 41 | static int __unix_recvmsg(struct sock *sk, struct msghdr *msg, |
| 42 | size_t len, int flags) |
| 43 | { |
| 44 | if (sk->sk_type == SOCK_DGRAM) |
| 45 | return __unix_dgram_recvmsg(sk, msg, len, flags); |
| 46 | else |
| 47 | return __unix_stream_recvmsg(sk, msg, len, flags); |
| 48 | } |
| 49 | |
| 50 | static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, |
Oliver Hartkopp | ec09526 | 2022-04-11 14:49:55 +0200 | [diff] [blame] | 51 | size_t len, int flags, int *addr_len) |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 52 | { |
| 53 | struct unix_sock *u = unix_sk(sk); |
| 54 | struct sk_psock *psock; |
Cong Wang | 0b84644 | 2021-07-23 11:36:30 -0700 | [diff] [blame] | 55 | int copied; |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 56 | |
Michal Luczaj | 638f326 | 2024-07-13 21:41:38 +0200 | [diff] [blame] | 57 | if (flags & MSG_OOB) |
| 58 | return -EOPNOTSUPP; |
| 59 | |
Liu Jian | d900f3d | 2023-03-03 16:09:46 +0800 | [diff] [blame] | 60 | if (!len) |
| 61 | return 0; |
| 62 | |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 63 | psock = sk_psock_get(sk); |
| 64 | if (unlikely(!psock)) |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 65 | return __unix_recvmsg(sk, msg, len, flags); |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 66 | |
| 67 | mutex_lock(&u->iolock); |
| 68 | if (!skb_queue_empty(&sk->sk_receive_queue) && |
| 69 | sk_psock_queue_empty(psock)) { |
Cong Wang | 0b84644 | 2021-07-23 11:36:30 -0700 | [diff] [blame] | 70 | mutex_unlock(&u->iolock); |
| 71 | sk_psock_put(sk, psock); |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 72 | return __unix_recvmsg(sk, msg, len, flags); |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 73 | } |
| 74 | |
| 75 | msg_bytes_ready: |
| 76 | copied = sk_msg_recvmsg(sk, psock, msg, len, flags); |
| 77 | if (!copied) { |
| 78 | long timeo; |
| 79 | int data; |
| 80 | |
Oliver Hartkopp | ec09526 | 2022-04-11 14:49:55 +0200 | [diff] [blame] | 81 | timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 82 | data = unix_msg_wait_data(sk, psock, timeo); |
| 83 | if (data) { |
| 84 | if (!sk_psock_queue_empty(psock)) |
| 85 | goto msg_bytes_ready; |
Cong Wang | 0b84644 | 2021-07-23 11:36:30 -0700 | [diff] [blame] | 86 | mutex_unlock(&u->iolock); |
| 87 | sk_psock_put(sk, psock); |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 88 | return __unix_recvmsg(sk, msg, len, flags); |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 89 | } |
| 90 | copied = -EAGAIN; |
| 91 | } |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 92 | mutex_unlock(&u->iolock); |
| 93 | sk_psock_put(sk, psock); |
Cong Wang | 0b84644 | 2021-07-23 11:36:30 -0700 | [diff] [blame] | 94 | return copied; |
Cong Wang | 9825d86 | 2021-07-04 12:02:48 -0700 | [diff] [blame] | 95 | } |
| 96 | |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 97 | static struct proto *unix_dgram_prot_saved __read_mostly; |
| 98 | static DEFINE_SPINLOCK(unix_dgram_prot_lock); |
| 99 | static struct proto unix_dgram_bpf_prot; |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 100 | |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 101 | static struct proto *unix_stream_prot_saved __read_mostly; |
| 102 | static DEFINE_SPINLOCK(unix_stream_prot_lock); |
| 103 | static struct proto unix_stream_bpf_prot; |
| 104 | |
| 105 | static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base) |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 106 | { |
| 107 | *prot = *base; |
| 108 | prot->close = sock_map_close; |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 109 | prot->recvmsg = unix_bpf_recvmsg; |
Cong Wang | af49338 | 2021-10-08 13:33:05 -0700 | [diff] [blame] | 110 | prot->sock_is_readable = sk_msg_is_readable; |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 111 | } |
| 112 | |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 113 | static void unix_stream_bpf_rebuild_protos(struct proto *prot, |
| 114 | const struct proto *base) |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 115 | { |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 116 | *prot = *base; |
| 117 | prot->close = sock_map_close; |
| 118 | prot->recvmsg = unix_bpf_recvmsg; |
Cong Wang | af49338 | 2021-10-08 13:33:05 -0700 | [diff] [blame] | 119 | prot->sock_is_readable = sk_msg_is_readable; |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 120 | prot->unhash = sock_map_unhash; |
| 121 | } |
| 122 | |
| 123 | static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops) |
| 124 | { |
| 125 | if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) { |
| 126 | spin_lock_bh(&unix_dgram_prot_lock); |
| 127 | if (likely(ops != unix_dgram_prot_saved)) { |
| 128 | unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops); |
| 129 | smp_store_release(&unix_dgram_prot_saved, ops); |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 130 | } |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 131 | spin_unlock_bh(&unix_dgram_prot_lock); |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 132 | } |
| 133 | } |
| 134 | |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 135 | static void unix_stream_bpf_check_needs_rebuild(struct proto *ops) |
| 136 | { |
| 137 | if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) { |
| 138 | spin_lock_bh(&unix_stream_prot_lock); |
| 139 | if (likely(ops != unix_stream_prot_saved)) { |
| 140 | unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops); |
| 141 | smp_store_release(&unix_stream_prot_saved, ops); |
| 142 | } |
| 143 | spin_unlock_bh(&unix_stream_prot_lock); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 148 | { |
Cong Wang | 83f3153 | 2021-07-31 12:50:38 -0700 | [diff] [blame] | 149 | if (sk->sk_type != SOCK_DGRAM) |
| 150 | return -EOPNOTSUPP; |
| 151 | |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 152 | if (restore) { |
| 153 | sk->sk_write_space = psock->saved_write_space; |
Pavel Begunkov | fee9ac0 | 2022-10-27 00:25:57 +0100 | [diff] [blame] | 154 | sock_replace_proto(sk, psock->sk_proto); |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 155 | return 0; |
| 156 | } |
| 157 | |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 158 | unix_dgram_bpf_check_needs_rebuild(psock->sk_proto); |
Pavel Begunkov | fee9ac0 | 2022-10-27 00:25:57 +0100 | [diff] [blame] | 159 | sock_replace_proto(sk, &unix_dgram_bpf_prot); |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 160 | return 0; |
| 161 | } |
| 162 | |
| 163 | int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) |
| 164 | { |
John Fastabend | 8866730 | 2023-11-28 17:25:56 -0800 | [diff] [blame] | 165 | struct sock *sk_pair; |
| 166 | |
John Fastabend | 16b2f26 | 2023-12-21 15:23:23 -0800 | [diff] [blame] | 167 | /* Restore does not decrement the sk_pair reference yet because we must |
| 168 | * keep the a reference to the socket until after an RCU grace period |
| 169 | * and any pending sends have completed. |
| 170 | */ |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 171 | if (restore) { |
| 172 | sk->sk_write_space = psock->saved_write_space; |
Pavel Begunkov | fee9ac0 | 2022-10-27 00:25:57 +0100 | [diff] [blame] | 173 | sock_replace_proto(sk, psock->sk_proto); |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 174 | return 0; |
| 175 | } |
| 176 | |
John Fastabend | 16b2f26 | 2023-12-21 15:23:23 -0800 | [diff] [blame] | 177 | /* psock_update_sk_prot can be called multiple times if psock is |
| 178 | * added to multiple maps and/or slots in the same map. There is |
| 179 | * also an edge case where replacing a psock with itself can trigger |
| 180 | * an extra psock_update_sk_prot during the insert process. So it |
| 181 | * must be safe to do multiple calls. Here we need to ensure we don't |
| 182 | * increment the refcnt through sock_hold many times. There will only |
| 183 | * be a single matching destroy operation. |
| 184 | */ |
| 185 | if (!psock->sk_pair) { |
| 186 | sk_pair = unix_peer(sk); |
| 187 | sock_hold(sk_pair); |
| 188 | psock->sk_pair = sk_pair; |
| 189 | } |
| 190 | |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 191 | unix_stream_bpf_check_needs_rebuild(psock->sk_proto); |
Pavel Begunkov | fee9ac0 | 2022-10-27 00:25:57 +0100 | [diff] [blame] | 192 | sock_replace_proto(sk, &unix_stream_bpf_prot); |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 193 | return 0; |
| 194 | } |
| 195 | |
| 196 | void __init unix_bpf_build_proto(void) |
| 197 | { |
Jiang Wang | 94531cf | 2021-08-16 19:03:21 +0000 | [diff] [blame] | 198 | unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto); |
| 199 | unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto); |
| 200 | |
Cong Wang | c638291 | 2021-07-04 12:02:47 -0700 | [diff] [blame] | 201 | } |