Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 2 | /* |
| 3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 4 | * |
| 5 | * Manage RMBE |
| 6 | * copy new RMBE data into user space |
| 7 | * |
| 8 | * Copyright IBM Corp. 2016 |
| 9 | * |
| 10 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
| 11 | */ |
| 12 | |
| 13 | #include <linux/net.h> |
| 14 | #include <linux/rcupdate.h> |
Ingo Molnar | c3edc40 | 2017-02-02 08:35:14 +0100 | [diff] [blame] | 15 | #include <linux/sched/signal.h> |
Jakub Kicinski | 509f15b | 2023-01-25 23:14:21 -0800 | [diff] [blame] | 16 | #include <linux/splice.h> |
Ingo Molnar | c3edc40 | 2017-02-02 08:35:14 +0100 | [diff] [blame] | 17 | |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 18 | #include <net/sock.h> |
Peilin Ye | 40e0b09 | 2023-01-19 16:45:16 -0800 | [diff] [blame] | 19 | #include <trace/events/sock.h> |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 20 | |
| 21 | #include "smc.h" |
| 22 | #include "smc_core.h" |
| 23 | #include "smc_cdc.h" |
| 24 | #include "smc_tx.h" /* smc_tx_consumer_update() */ |
| 25 | #include "smc_rx.h" |
Guvenc Gulce | e0e4b8f | 2021-06-16 16:52:55 +0200 | [diff] [blame] | 26 | #include "smc_stats.h" |
Tony Lu | aff3083 | 2021-11-01 15:39:14 +0800 | [diff] [blame] | 27 | #include "smc_tracepoint.h" |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 28 | |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 29 | /* callback implementation to wakeup consumers blocked with smc_rx_wait(). |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 30 | * indirectly called by smc_cdc_msg_recv_action(). |
| 31 | */ |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 32 | static void smc_rx_wake_up(struct sock *sk) |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 33 | { |
| 34 | struct socket_wq *wq; |
| 35 | |
Peilin Ye | 40e0b09 | 2023-01-19 16:45:16 -0800 | [diff] [blame] | 36 | trace_sk_data_ready(sk); |
| 37 | |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 38 | /* derived from sock_def_readable() */ |
| 39 | /* called already in smc_listen_work() */ |
| 40 | rcu_read_lock(); |
| 41 | wq = rcu_dereference(sk->sk_wq); |
| 42 | if (skwq_has_sleeper(wq)) |
Linus Torvalds | a9a0884 | 2018-02-11 14:34:03 -0800 | [diff] [blame] | 43 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | |
| 44 | EPOLLRDNORM | EPOLLRDBAND); |
Ursula Braun | 90e9517e | 2017-04-10 14:58:00 +0200 | [diff] [blame] | 45 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 46 | if ((sk->sk_shutdown == SHUTDOWN_MASK) || |
| 47 | (sk->sk_state == SMC_CLOSED)) |
| 48 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 49 | rcu_read_unlock(); |
| 50 | } |
| 51 | |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 52 | /* Update consumer cursor |
| 53 | * @conn connection to update |
| 54 | * @cons consumer cursor |
| 55 | * @len number of Bytes consumed |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 56 | * Returns: |
| 57 | * 1 if we should end our receive, 0 otherwise |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 58 | */ |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 59 | static int smc_rx_update_consumer(struct smc_sock *smc, |
| 60 | union smc_host_cursor cons, size_t len) |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 61 | { |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 62 | struct smc_connection *conn = &smc->conn; |
| 63 | struct sock *sk = &smc->sk; |
| 64 | bool force = false; |
| 65 | int diff, rc = 0; |
| 66 | |
Hans Wippel | 69cb7dc | 2018-05-18 09:34:10 +0200 | [diff] [blame] | 67 | smc_curs_add(conn->rmb_desc->len, &cons, len); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 68 | |
| 69 | /* did we process urgent data? */ |
| 70 | if (conn->urg_state == SMC_URG_VALID || conn->urg_rx_skip_pend) { |
| 71 | diff = smc_curs_comp(conn->rmb_desc->len, &cons, |
| 72 | &conn->urg_curs); |
| 73 | if (sock_flag(sk, SOCK_URGINLINE)) { |
| 74 | if (diff == 0) { |
| 75 | force = true; |
| 76 | rc = 1; |
| 77 | conn->urg_state = SMC_URG_READ; |
| 78 | } |
| 79 | } else { |
| 80 | if (diff == 1) { |
| 81 | /* skip urgent byte */ |
| 82 | force = true; |
| 83 | smc_curs_add(conn->rmb_desc->len, &cons, 1); |
| 84 | conn->urg_rx_skip_pend = false; |
| 85 | } else if (diff < -1) |
| 86 | /* we read past urgent byte */ |
| 87 | conn->urg_state = SMC_URG_READ; |
| 88 | } |
| 89 | } |
| 90 | |
Stefan Raspl | bac6de7 | 2018-07-23 13:53:09 +0200 | [diff] [blame] | 91 | smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 92 | |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 93 | /* send consumer cursor update if required */ |
| 94 | /* similar to advertising new TCP rcv_wnd if required */ |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 95 | smc_tx_consumer_update(conn, force); |
| 96 | |
| 97 | return rc; |
| 98 | } |
| 99 | |
| 100 | static void smc_rx_update_cons(struct smc_sock *smc, size_t len) |
| 101 | { |
| 102 | struct smc_connection *conn = &smc->conn; |
| 103 | union smc_host_cursor cons; |
| 104 | |
Stefan Raspl | bac6de7 | 2018-07-23 13:53:09 +0200 | [diff] [blame] | 105 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 106 | smc_rx_update_consumer(smc, cons, len); |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 107 | } |
| 108 | |
| 109 | struct smc_spd_priv { |
| 110 | struct smc_sock *smc; |
| 111 | size_t len; |
| 112 | }; |
| 113 | |
| 114 | static void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe, |
| 115 | struct pipe_buffer *buf) |
| 116 | { |
| 117 | struct smc_spd_priv *priv = (struct smc_spd_priv *)buf->private; |
| 118 | struct smc_sock *smc = priv->smc; |
| 119 | struct smc_connection *conn; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 120 | struct sock *sk = &smc->sk; |
| 121 | |
| 122 | if (sk->sk_state == SMC_CLOSED || |
| 123 | sk->sk_state == SMC_PEERFINCLOSEWAIT || |
| 124 | sk->sk_state == SMC_APPFINCLOSEWAIT) |
| 125 | goto out; |
| 126 | conn = &smc->conn; |
| 127 | lock_sock(sk); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 128 | smc_rx_update_cons(smc, priv->len); |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 129 | release_sock(sk); |
| 130 | if (atomic_sub_and_test(priv->len, &conn->splice_pending)) |
| 131 | smc_rx_wake_up(sk); |
| 132 | out: |
| 133 | kfree(priv); |
| 134 | put_page(buf->page); |
| 135 | sock_put(sk); |
| 136 | } |
| 137 | |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 138 | static const struct pipe_buf_operations smc_pipe_ops = { |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 139 | .release = smc_rx_pipe_buf_release, |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 140 | .get = generic_pipe_buf_get |
| 141 | }; |
| 142 | |
| 143 | static void smc_rx_spd_release(struct splice_pipe_desc *spd, |
| 144 | unsigned int i) |
| 145 | { |
| 146 | put_page(spd->pages[i]); |
| 147 | } |
| 148 | |
| 149 | static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len, |
| 150 | struct smc_sock *smc) |
| 151 | { |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 152 | struct smc_link_group *lgr = smc->conn.lgr; |
| 153 | int offset = offset_in_page(src); |
| 154 | struct partial_page *partial; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 155 | struct splice_pipe_desc spd; |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 156 | struct smc_spd_priv **priv; |
| 157 | struct page **pages; |
| 158 | int bytes, nr_pages; |
| 159 | int i; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 160 | |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 161 | nr_pages = !lgr->is_smcd && smc->conn.rmb_desc->is_vm ? |
| 162 | PAGE_ALIGN(len + offset) / PAGE_SIZE : 1; |
| 163 | |
| 164 | pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); |
| 165 | if (!pages) |
| 166 | goto out; |
| 167 | partial = kcalloc(nr_pages, sizeof(*partial), GFP_KERNEL); |
| 168 | if (!partial) |
| 169 | goto out_page; |
| 170 | priv = kcalloc(nr_pages, sizeof(*priv), GFP_KERNEL); |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 171 | if (!priv) |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 172 | goto out_part; |
| 173 | for (i = 0; i < nr_pages; i++) { |
| 174 | priv[i] = kzalloc(sizeof(**priv), GFP_KERNEL); |
| 175 | if (!priv[i]) |
| 176 | goto out_priv; |
| 177 | } |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 178 | |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 179 | if (lgr->is_smcd || |
| 180 | (!lgr->is_smcd && !smc->conn.rmb_desc->is_vm)) { |
| 181 | /* smcd or smcr that uses physically contiguous RMBs */ |
| 182 | priv[0]->len = len; |
| 183 | priv[0]->smc = smc; |
| 184 | partial[0].offset = src - (char *)smc->conn.rmb_desc->cpu_addr; |
| 185 | partial[0].len = len; |
| 186 | partial[0].private = (unsigned long)priv[0]; |
| 187 | pages[0] = smc->conn.rmb_desc->pages; |
| 188 | } else { |
| 189 | int size, left = len; |
| 190 | void *buf = src; |
| 191 | /* smcr that uses virtually contiguous RMBs*/ |
| 192 | for (i = 0; i < nr_pages; i++) { |
| 193 | size = min_t(int, PAGE_SIZE - offset, left); |
| 194 | priv[i]->len = size; |
| 195 | priv[i]->smc = smc; |
| 196 | pages[i] = vmalloc_to_page(buf); |
| 197 | partial[i].offset = offset; |
| 198 | partial[i].len = size; |
| 199 | partial[i].private = (unsigned long)priv[i]; |
| 200 | buf += size / sizeof(*buf); |
| 201 | left -= size; |
| 202 | offset = 0; |
| 203 | } |
| 204 | } |
| 205 | spd.nr_pages_max = nr_pages; |
| 206 | spd.nr_pages = nr_pages; |
| 207 | spd.pages = pages; |
| 208 | spd.partial = partial; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 209 | spd.ops = &smc_pipe_ops; |
| 210 | spd.spd_release = smc_rx_spd_release; |
| 211 | |
| 212 | bytes = splice_to_pipe(pipe, &spd); |
| 213 | if (bytes > 0) { |
| 214 | sock_hold(&smc->sk); |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 215 | if (!lgr->is_smcd && smc->conn.rmb_desc->is_vm) { |
| 216 | for (i = 0; i < PAGE_ALIGN(bytes + offset) / PAGE_SIZE; i++) |
| 217 | get_page(pages[i]); |
| 218 | } else { |
| 219 | get_page(smc->conn.rmb_desc->pages); |
| 220 | } |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 221 | atomic_add(bytes, &smc->conn.splice_pending); |
| 222 | } |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 223 | kfree(priv); |
| 224 | kfree(partial); |
| 225 | kfree(pages); |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 226 | |
| 227 | return bytes; |
Wen Gu | b8d1994 | 2022-07-14 17:44:04 +0800 | [diff] [blame] | 228 | |
| 229 | out_priv: |
| 230 | for (i = (i - 1); i >= 0; i--) |
| 231 | kfree(priv[i]); |
| 232 | kfree(priv); |
| 233 | out_part: |
| 234 | kfree(partial); |
| 235 | out_page: |
| 236 | kfree(pages); |
| 237 | out: |
| 238 | return -ENOMEM; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 239 | } |
| 240 | |
| 241 | static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn) |
| 242 | { |
| 243 | return atomic_read(&conn->bytes_to_rcv) && |
| 244 | !atomic_read(&conn->splice_pending); |
| 245 | } |
| 246 | |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 247 | /* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted |
| 248 | * @smc smc socket |
| 249 | * @timeo pointer to max seconds to wait, pointer to value 0 for no timeout |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 250 | * @fcrit add'l criterion to evaluate as function pointer |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 251 | * Returns: |
| 252 | * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown. |
| 253 | * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted). |
| 254 | */ |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 255 | int smc_rx_wait(struct smc_sock *smc, long *timeo, |
| 256 | int (*fcrit)(struct smc_connection *conn)) |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 257 | { |
| 258 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 259 | struct smc_connection *conn = &smc->conn; |
Ursula Braun | b290098 | 2019-10-21 16:13:08 +0200 | [diff] [blame] | 260 | struct smc_cdc_conn_state_flags *cflags = |
| 261 | &conn->local_tx_ctrl.conn_state_flags; |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 262 | struct sock *sk = &smc->sk; |
| 263 | int rc; |
| 264 | |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 265 | if (fcrit(conn)) |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 266 | return 1; |
| 267 | sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); |
| 268 | add_wait_queue(sk_sleep(sk), &wait); |
| 269 | rc = sk_wait_event(sk, timeo, |
Eric Dumazet | d0ac89f | 2023-05-09 18:29:48 +0000 | [diff] [blame] | 270 | READ_ONCE(sk->sk_err) || |
Ursula Braun | b290098 | 2019-10-21 16:13:08 +0200 | [diff] [blame] | 271 | cflags->peer_conn_abort || |
Eric Dumazet | d0ac89f | 2023-05-09 18:29:48 +0000 | [diff] [blame] | 272 | READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN || |
Ursula Braun | b290098 | 2019-10-21 16:13:08 +0200 | [diff] [blame] | 273 | conn->killed || |
Karsten Graul | 882dcfe | 2019-10-10 10:16:10 +0200 | [diff] [blame] | 274 | fcrit(conn), |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 275 | &wait); |
| 276 | remove_wait_queue(sk_sleep(sk), &wait); |
| 277 | sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); |
| 278 | return rc; |
| 279 | } |
| 280 | |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 281 | static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len, |
| 282 | int flags) |
| 283 | { |
| 284 | struct smc_connection *conn = &smc->conn; |
| 285 | union smc_host_cursor cons; |
| 286 | struct sock *sk = &smc->sk; |
| 287 | int rc = 0; |
| 288 | |
| 289 | if (sock_flag(sk, SOCK_URGINLINE) || |
| 290 | !(conn->urg_state == SMC_URG_VALID) || |
| 291 | conn->urg_state == SMC_URG_READ) |
| 292 | return -EINVAL; |
| 293 | |
Guvenc Gulce | 194730a | 2021-06-16 16:52:58 +0200 | [diff] [blame] | 294 | SMC_STAT_INC(smc, urg_data_cnt); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 295 | if (conn->urg_state == SMC_URG_VALID) { |
| 296 | if (!(flags & MSG_PEEK)) |
| 297 | smc->conn.urg_state = SMC_URG_READ; |
| 298 | msg->msg_flags |= MSG_OOB; |
| 299 | if (len > 0) { |
| 300 | if (!(flags & MSG_TRUNC)) |
| 301 | rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1); |
| 302 | len = 1; |
Stefan Raspl | bac6de7 | 2018-07-23 13:53:09 +0200 | [diff] [blame] | 303 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 304 | if (smc_curs_diff(conn->rmb_desc->len, &cons, |
| 305 | &conn->urg_curs) > 1) |
| 306 | conn->urg_rx_skip_pend = true; |
| 307 | /* Urgent Byte was already accounted for, but trigger |
| 308 | * skipping the urgent byte in non-inline case |
| 309 | */ |
| 310 | if (!(flags & MSG_PEEK)) |
| 311 | smc_rx_update_consumer(smc, cons, 0); |
| 312 | } else { |
| 313 | msg->msg_flags |= MSG_TRUNC; |
| 314 | } |
| 315 | |
| 316 | return rc ? -EFAULT : len; |
| 317 | } |
| 318 | |
| 319 | if (sk->sk_state == SMC_CLOSED || sk->sk_shutdown & RCV_SHUTDOWN) |
| 320 | return 0; |
| 321 | |
| 322 | return -EAGAIN; |
| 323 | } |
| 324 | |
Karsten Graul | 107529e | 2019-10-10 10:16:11 +0200 | [diff] [blame] | 325 | static bool smc_rx_recvmsg_data_available(struct smc_sock *smc) |
| 326 | { |
| 327 | struct smc_connection *conn = &smc->conn; |
| 328 | |
| 329 | if (smc_rx_data_available(conn)) |
| 330 | return true; |
| 331 | else if (conn->urg_state == SMC_URG_VALID) |
| 332 | /* we received a single urgent Byte - skip */ |
| 333 | smc_rx_update_cons(smc, 0); |
| 334 | return false; |
| 335 | } |
| 336 | |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 337 | /* smc_rx_recvmsg - receive data from RMBE |
| 338 | * @msg: copy data to receive buffer |
| 339 | * @pipe: copy data to pipe if set - indicates splice() call |
| 340 | * |
| 341 | * rcvbuf consumer: main API called by socket layer. |
| 342 | * Called under sk lock. |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 343 | */ |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 344 | int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, |
| 345 | struct pipe_inode_info *pipe, size_t len, int flags) |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 346 | { |
| 347 | size_t copylen, read_done = 0, read_remaining = len; |
| 348 | size_t chunk_len, chunk_off, chunk_len_sum; |
| 349 | struct smc_connection *conn = &smc->conn; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 350 | int (*func)(struct smc_connection *conn); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 351 | union smc_host_cursor cons; |
| 352 | int readable, chunk; |
| 353 | char *rcvbuf_base; |
| 354 | struct sock *sk; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 355 | int splbytes; |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 356 | long timeo; |
| 357 | int target; /* Read at least these many bytes */ |
| 358 | int rc; |
| 359 | |
| 360 | if (unlikely(flags & MSG_ERRQUEUE)) |
| 361 | return -EINVAL; /* future work for sk.sk_family == AF_SMC */ |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 362 | |
| 363 | sk = &smc->sk; |
| 364 | if (sk->sk_state == SMC_LISTEN) |
| 365 | return -ENOTCONN; |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 366 | if (flags & MSG_OOB) |
| 367 | return smc_rx_recv_urg(smc, msg, len, flags); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 368 | timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
| 369 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); |
| 370 | |
Guvenc Gulce | e0e4b8f | 2021-06-16 16:52:55 +0200 | [diff] [blame] | 371 | readable = atomic_read(&conn->bytes_to_rcv); |
| 372 | if (readable >= conn->rmb_desc->len) |
Guvenc Gulce | 194730a | 2021-06-16 16:52:58 +0200 | [diff] [blame] | 373 | SMC_STAT_RMB_RX_FULL(smc, !conn->lnk); |
Guvenc Gulce | e0e4b8f | 2021-06-16 16:52:55 +0200 | [diff] [blame] | 374 | |
| 375 | if (len < readable) |
Guvenc Gulce | 194730a | 2021-06-16 16:52:58 +0200 | [diff] [blame] | 376 | SMC_STAT_RMB_RX_SIZE_SMALL(smc, !conn->lnk); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 377 | /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ |
Hans Wippel | be244f2 | 2018-06-28 19:05:10 +0200 | [diff] [blame] | 378 | rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr; |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 379 | |
| 380 | do { /* while (read_remaining) */ |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 381 | if (read_done >= target || (pipe && read_done)) |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 382 | break; |
| 383 | |
Ursula Braun | b290098 | 2019-10-21 16:13:08 +0200 | [diff] [blame] | 384 | if (conn->killed) |
| 385 | break; |
| 386 | |
Karsten Graul | 107529e | 2019-10-10 10:16:11 +0200 | [diff] [blame] | 387 | if (smc_rx_recvmsg_data_available(smc)) |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 388 | goto copy; |
| 389 | |
Ursula Braun | b290098 | 2019-10-21 16:13:08 +0200 | [diff] [blame] | 390 | if (sk->sk_shutdown & RCV_SHUTDOWN) { |
Karsten Graul | 107529e | 2019-10-10 10:16:11 +0200 | [diff] [blame] | 391 | /* smc_cdc_msg_recv_action() could have run after |
| 392 | * above smc_rx_recvmsg_data_available() |
| 393 | */ |
| 394 | if (smc_rx_recvmsg_data_available(smc)) |
| 395 | goto copy; |
Stefan Raspl | c8b8ec8 | 2018-05-03 18:12:36 +0200 | [diff] [blame] | 396 | break; |
Karsten Graul | 107529e | 2019-10-10 10:16:11 +0200 | [diff] [blame] | 397 | } |
Stefan Raspl | c8b8ec8 | 2018-05-03 18:12:36 +0200 | [diff] [blame] | 398 | |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 399 | if (read_done) { |
| 400 | if (sk->sk_err || |
| 401 | sk->sk_state == SMC_CLOSED || |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 402 | !timeo || |
Stefan Raspl | c8b8ec8 | 2018-05-03 18:12:36 +0200 | [diff] [blame] | 403 | signal_pending(current)) |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 404 | break; |
| 405 | } else { |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 406 | if (sk->sk_err) { |
| 407 | read_done = sock_error(sk); |
| 408 | break; |
| 409 | } |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 410 | if (sk->sk_state == SMC_CLOSED) { |
| 411 | if (!sock_flag(sk, SOCK_DONE)) { |
| 412 | /* This occurs when user tries to read |
| 413 | * from never connected socket. |
| 414 | */ |
| 415 | read_done = -ENOTCONN; |
| 416 | break; |
| 417 | } |
| 418 | break; |
| 419 | } |
Guangguan Wang | f3c46e4 | 2022-05-12 11:08:20 +0800 | [diff] [blame] | 420 | if (!timeo) |
| 421 | return -EAGAIN; |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 422 | if (signal_pending(current)) { |
| 423 | read_done = sock_intr_errno(timeo); |
| 424 | break; |
| 425 | } |
| 426 | } |
| 427 | |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 428 | if (!smc_rx_data_available(conn)) { |
| 429 | smc_rx_wait(smc, &timeo, smc_rx_data_available); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 430 | continue; |
| 431 | } |
| 432 | |
| 433 | copy: |
| 434 | /* initialize variables for 1st iteration of subsequent loop */ |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 435 | /* could be just 1 byte, even after waiting on data above */ |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 436 | readable = atomic_read(&conn->bytes_to_rcv); |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 437 | splbytes = atomic_read(&conn->splice_pending); |
| 438 | if (!readable || (msg && splbytes)) { |
| 439 | if (splbytes) |
| 440 | func = smc_rx_data_available_and_no_splice_pend; |
| 441 | else |
| 442 | func = smc_rx_data_available; |
| 443 | smc_rx_wait(smc, &timeo, func); |
| 444 | continue; |
| 445 | } |
| 446 | |
Stefan Raspl | bac6de7 | 2018-07-23 13:53:09 +0200 | [diff] [blame] | 447 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 448 | /* subsequent splice() calls pick up where previous left */ |
| 449 | if (splbytes) |
Hans Wippel | 69cb7dc | 2018-05-18 09:34:10 +0200 | [diff] [blame] | 450 | smc_curs_add(conn->rmb_desc->len, &cons, splbytes); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 451 | if (conn->urg_state == SMC_URG_VALID && |
| 452 | sock_flag(&smc->sk, SOCK_URGINLINE) && |
| 453 | readable > 1) |
| 454 | readable--; /* always stop at urgent Byte */ |
| 455 | /* not more than what user space asked for */ |
| 456 | copylen = min_t(size_t, read_remaining, readable); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 457 | /* determine chunks where to read from rcvbuf */ |
| 458 | /* either unwrapped case, or 1st chunk of wrapped case */ |
Hans Wippel | 69cb7dc | 2018-05-18 09:34:10 +0200 | [diff] [blame] | 459 | chunk_len = min_t(size_t, copylen, conn->rmb_desc->len - |
| 460 | cons.count); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 461 | chunk_len_sum = chunk_len; |
| 462 | chunk_off = cons.count; |
Ursula Braun | 10428dd | 2017-07-28 13:56:22 +0200 | [diff] [blame] | 463 | smc_rmb_sync_sg_for_cpu(conn); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 464 | for (chunk = 0; chunk < 2; chunk++) { |
| 465 | if (!(flags & MSG_TRUNC)) { |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 466 | if (msg) { |
| 467 | rc = memcpy_to_msg(msg, rcvbuf_base + |
| 468 | chunk_off, |
| 469 | chunk_len); |
| 470 | } else { |
| 471 | rc = smc_rx_splice(pipe, rcvbuf_base + |
| 472 | chunk_off, chunk_len, |
| 473 | smc); |
| 474 | } |
| 475 | if (rc < 0) { |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 476 | if (!read_done) |
| 477 | read_done = -EFAULT; |
| 478 | goto out; |
| 479 | } |
| 480 | } |
| 481 | read_remaining -= chunk_len; |
| 482 | read_done += chunk_len; |
| 483 | |
| 484 | if (chunk_len_sum == copylen) |
| 485 | break; /* either on 1st or 2nd iteration */ |
| 486 | /* prepare next (== 2nd) iteration */ |
| 487 | chunk_len = copylen - chunk_len; /* remainder */ |
| 488 | chunk_len_sum += chunk_len; |
| 489 | chunk_off = 0; /* modulo offset in recv ring buffer */ |
| 490 | } |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 491 | |
| 492 | /* update cursors */ |
| 493 | if (!(flags & MSG_PEEK)) { |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 494 | /* increased in recv tasklet smc_cdc_msg_rcv() */ |
| 495 | smp_mb__before_atomic(); |
| 496 | atomic_sub(copylen, &conn->bytes_to_rcv); |
Hans Wippel | 69cb7dc | 2018-05-18 09:34:10 +0200 | [diff] [blame] | 497 | /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */ |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 498 | smp_mb__after_atomic(); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 499 | if (msg && smc_rx_update_consumer(smc, cons, copylen)) |
| 500 | goto out; |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 501 | } |
Tony Lu | aff3083 | 2021-11-01 15:39:14 +0800 | [diff] [blame] | 502 | |
| 503 | trace_smc_rx_recvmsg(smc, copylen); |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 504 | } while (read_remaining); |
| 505 | out: |
| 506 | return read_done; |
| 507 | } |
| 508 | |
| 509 | /* Initialize receive properties on connection establishment. NB: not __init! */ |
| 510 | void smc_rx_init(struct smc_sock *smc) |
| 511 | { |
Stefan Raspl | b51fa1b | 2018-05-03 18:12:37 +0200 | [diff] [blame] | 512 | smc->sk.sk_data_ready = smc_rx_wake_up; |
Stefan Raspl | 9014db2 | 2018-05-03 18:12:39 +0200 | [diff] [blame] | 513 | atomic_set(&smc->conn.splice_pending, 0); |
Stefan Raspl | de8474eb | 2018-05-23 16:38:11 +0200 | [diff] [blame] | 514 | smc->conn.urg_state = SMC_URG_READ; |
Ursula Braun | 952310c | 2017-01-09 16:55:24 +0100 | [diff] [blame] | 515 | } |