Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/kernel.h> |
| 3 | #include <linux/errno.h> |
| 4 | #include <linux/file.h> |
| 5 | #include <linux/slab.h> |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 6 | #include <linux/nospec.h> |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 7 | #include <linux/io_uring.h> |
| 8 | |
| 9 | #include <uapi/linux/io_uring.h> |
| 10 | |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 11 | #include "io_uring.h" |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 12 | #include "rsrc.h" |
| 13 | #include "filetable.h" |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 14 | #include "msg_ring.h" |
| 15 | |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 16 | |
| 17 | /* All valid masks for MSG_RING */ |
| 18 | #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ |
| 19 | IORING_MSG_RING_FLAGS_PASS) |
| 20 | |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 21 | struct io_msg { |
| 22 | struct file *file; |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 23 | struct file *src_file; |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 24 | struct callback_head tw; |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 25 | u64 user_data; |
| 26 | u32 len; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 27 | u32 cmd; |
| 28 | u32 src_fd; |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 29 | union { |
| 30 | u32 dst_fd; |
| 31 | u32 cqe_flags; |
| 32 | }; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 33 | u32 flags; |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 34 | }; |
| 35 | |
Jens Axboe | 423d508 | 2023-01-19 09:01:27 -0700 | [diff] [blame] | 36 | static void io_double_unlock_ctx(struct io_ring_ctx *octx) |
| 37 | { |
| 38 | mutex_unlock(&octx->uring_lock); |
| 39 | } |
| 40 | |
| 41 | static int io_double_lock_ctx(struct io_ring_ctx *octx, |
| 42 | unsigned int issue_flags) |
| 43 | { |
| 44 | /* |
| 45 | * To ensure proper ordering between the two ctxs, we can only |
| 46 | * attempt a trylock on the target. If that fails and we already have |
| 47 | * the source ctx lock, punt to io-wq. |
| 48 | */ |
| 49 | if (!(issue_flags & IO_URING_F_UNLOCKED)) { |
| 50 | if (!mutex_trylock(&octx->uring_lock)) |
| 51 | return -EAGAIN; |
| 52 | return 0; |
| 53 | } |
| 54 | mutex_lock(&octx->uring_lock); |
| 55 | return 0; |
| 56 | } |
| 57 | |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 58 | void io_msg_ring_cleanup(struct io_kiocb *req) |
| 59 | { |
| 60 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 61 | |
| 62 | if (WARN_ON_ONCE(!msg->src_file)) |
| 63 | return; |
| 64 | |
| 65 | fput(msg->src_file); |
| 66 | msg->src_file = NULL; |
| 67 | } |
| 68 | |
Pavel Begunkov | 56d8e31 | 2023-01-20 16:38:05 +0000 | [diff] [blame] | 69 | static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) |
| 70 | { |
| 71 | if (!target_ctx->task_complete) |
| 72 | return false; |
| 73 | return current != target_ctx->submitter_task; |
| 74 | } |
| 75 | |
| 76 | static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) |
| 77 | { |
| 78 | struct io_ring_ctx *ctx = req->file->private_data; |
| 79 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
Pavel Begunkov | 8579538 | 2023-01-20 16:38:06 +0000 | [diff] [blame] | 80 | struct task_struct *task = READ_ONCE(ctx->submitter_task); |
| 81 | |
| 82 | if (unlikely(!task)) |
| 83 | return -EOWNERDEAD; |
Pavel Begunkov | 56d8e31 | 2023-01-20 16:38:05 +0000 | [diff] [blame] | 84 | |
| 85 | init_task_work(&msg->tw, func); |
| 86 | if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL)) |
| 87 | return -EOWNERDEAD; |
| 88 | |
| 89 | return IOU_ISSUE_SKIP_COMPLETE; |
| 90 | } |
| 91 | |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 92 | static void io_msg_tw_complete(struct callback_head *head) |
| 93 | { |
| 94 | struct io_msg *msg = container_of(head, struct io_msg, tw); |
| 95 | struct io_kiocb *req = cmd_to_io_kiocb(msg); |
| 96 | struct io_ring_ctx *target_ctx = req->file->private_data; |
| 97 | int ret = 0; |
| 98 | |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 99 | if (current->flags & PF_EXITING) { |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 100 | ret = -EOWNERDEAD; |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 101 | } else { |
Jens Axboe | 8572df9 | 2023-01-21 19:53:41 -0700 | [diff] [blame] | 102 | u32 flags = 0; |
| 103 | |
| 104 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
| 105 | flags = msg->cqe_flags; |
| 106 | |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 107 | /* |
| 108 | * If the target ring is using IOPOLL mode, then we need to be |
| 109 | * holding the uring_lock for posting completions. Other ring |
| 110 | * types rely on the regular completion locking, which is |
| 111 | * handled while posting. |
| 112 | */ |
| 113 | if (target_ctx->flags & IORING_SETUP_IOPOLL) |
| 114 | mutex_lock(&target_ctx->uring_lock); |
Jens Axboe | 8572df9 | 2023-01-21 19:53:41 -0700 | [diff] [blame] | 115 | if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 116 | ret = -EOVERFLOW; |
| 117 | if (target_ctx->flags & IORING_SETUP_IOPOLL) |
| 118 | mutex_unlock(&target_ctx->uring_lock); |
| 119 | } |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 120 | |
| 121 | if (ret < 0) |
| 122 | req_set_fail(req); |
| 123 | io_req_queue_tw_complete(req, ret); |
| 124 | } |
| 125 | |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 126 | static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 127 | { |
| 128 | struct io_ring_ctx *target_ctx = req->file->private_data; |
Stefan Metzmacher | f2ccb5a | 2022-08-11 09:11:15 +0200 | [diff] [blame] | 129 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 130 | u32 flags = 0; |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 131 | int ret; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 132 | |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 133 | if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) |
| 134 | return -EINVAL; |
| 135 | if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 136 | return -EINVAL; |
Pavel Begunkov | 8579538 | 2023-01-20 16:38:06 +0000 | [diff] [blame] | 137 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
| 138 | return -EBADFD; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 139 | |
Pavel Begunkov | 56d8e31 | 2023-01-20 16:38:05 +0000 | [diff] [blame] | 140 | if (io_msg_need_remote(target_ctx)) |
| 141 | return io_msg_exec_remote(req, io_msg_tw_complete); |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 142 | |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 143 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
| 144 | flags = msg->cqe_flags; |
| 145 | |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 146 | ret = -EOVERFLOW; |
| 147 | if (target_ctx->flags & IORING_SETUP_IOPOLL) { |
| 148 | if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) |
| 149 | return -EAGAIN; |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 150 | if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 151 | ret = 0; |
| 152 | io_double_unlock_ctx(target_ctx); |
| 153 | } else { |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 154 | if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 155 | ret = 0; |
| 156 | } |
| 157 | return ret; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 158 | } |
| 159 | |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 160 | static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) |
| 161 | { |
| 162 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 163 | struct io_ring_ctx *ctx = req->ctx; |
| 164 | struct file *file = NULL; |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 165 | int idx = msg->src_fd; |
| 166 | |
| 167 | io_ring_submit_lock(ctx, issue_flags); |
| 168 | if (likely(idx < ctx->nr_user_files)) { |
| 169 | idx = array_index_nospec(idx, ctx->nr_user_files); |
Christoph Hellwig | f432c8c | 2023-06-20 13:32:34 +0200 | [diff] [blame] | 170 | file = io_file_from_index(&ctx->file_table, idx); |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 171 | if (file) |
| 172 | get_file(file); |
| 173 | } |
| 174 | io_ring_submit_unlock(ctx, issue_flags); |
| 175 | return file; |
| 176 | } |
| 177 | |
Pavel Begunkov | 1721131 | 2022-12-07 03:53:35 +0000 | [diff] [blame] | 178 | static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 179 | { |
| 180 | struct io_ring_ctx *target_ctx = req->file->private_data; |
Stefan Metzmacher | f2ccb5a | 2022-08-11 09:11:15 +0200 | [diff] [blame] | 181 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 182 | struct file *src_file = msg->src_file; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 183 | int ret; |
| 184 | |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 185 | if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) |
| 186 | return -EAGAIN; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 187 | |
| 188 | ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 189 | if (ret < 0) |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 190 | goto out_unlock; |
Pavel Begunkov | 1721131 | 2022-12-07 03:53:35 +0000 | [diff] [blame] | 191 | |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 192 | msg->src_file = NULL; |
| 193 | req->flags &= ~REQ_F_NEED_CLEANUP; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 194 | |
| 195 | if (msg->flags & IORING_MSG_RING_CQE_SKIP) |
| 196 | goto out_unlock; |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 197 | /* |
| 198 | * If this fails, the target still received the file descriptor but |
| 199 | * wasn't notified of the fact. This means that if this request |
| 200 | * completes with -EOVERFLOW, then the sender must ensure that a |
| 201 | * later IORING_OP_MSG_RING delivers the message. |
| 202 | */ |
Pavel Begunkov | 5da28ed | 2023-03-16 12:11:42 +0000 | [diff] [blame] | 203 | if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0)) |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 204 | ret = -EOVERFLOW; |
| 205 | out_unlock: |
Jens Axboe | 423d508 | 2023-01-19 09:01:27 -0700 | [diff] [blame] | 206 | io_double_unlock_ctx(target_ctx); |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 207 | return ret; |
| 208 | } |
| 209 | |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 210 | static void io_msg_tw_fd_complete(struct callback_head *head) |
| 211 | { |
| 212 | struct io_msg *msg = container_of(head, struct io_msg, tw); |
| 213 | struct io_kiocb *req = cmd_to_io_kiocb(msg); |
| 214 | int ret = -EOWNERDEAD; |
| 215 | |
| 216 | if (!(current->flags & PF_EXITING)) |
| 217 | ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED); |
| 218 | if (ret < 0) |
| 219 | req_set_fail(req); |
| 220 | io_req_queue_tw_complete(req, ret); |
| 221 | } |
| 222 | |
Pavel Begunkov | 1721131 | 2022-12-07 03:53:35 +0000 | [diff] [blame] | 223 | static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) |
| 224 | { |
| 225 | struct io_ring_ctx *target_ctx = req->file->private_data; |
| 226 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
| 227 | struct io_ring_ctx *ctx = req->ctx; |
| 228 | struct file *src_file = msg->src_file; |
| 229 | |
Pavel Begunkov | 5da28ed | 2023-03-16 12:11:42 +0000 | [diff] [blame] | 230 | if (msg->len) |
| 231 | return -EINVAL; |
Pavel Begunkov | 1721131 | 2022-12-07 03:53:35 +0000 | [diff] [blame] | 232 | if (target_ctx == ctx) |
| 233 | return -EINVAL; |
Pavel Begunkov | 8579538 | 2023-01-20 16:38:06 +0000 | [diff] [blame] | 234 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
| 235 | return -EBADFD; |
Pavel Begunkov | 1721131 | 2022-12-07 03:53:35 +0000 | [diff] [blame] | 236 | if (!src_file) { |
| 237 | src_file = io_msg_grab_file(req, issue_flags); |
| 238 | if (!src_file) |
| 239 | return -EBADF; |
| 240 | msg->src_file = src_file; |
| 241 | req->flags |= REQ_F_NEED_CLEANUP; |
| 242 | } |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 243 | |
Pavel Begunkov | 56d8e31 | 2023-01-20 16:38:05 +0000 | [diff] [blame] | 244 | if (io_msg_need_remote(target_ctx)) |
| 245 | return io_msg_exec_remote(req, io_msg_tw_fd_complete); |
Pavel Begunkov | 1721131 | 2022-12-07 03:53:35 +0000 | [diff] [blame] | 246 | return io_msg_install_complete(req, issue_flags); |
| 247 | } |
| 248 | |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 249 | int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) |
| 250 | { |
Stefan Metzmacher | f2ccb5a | 2022-08-11 09:11:15 +0200 | [diff] [blame] | 251 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 252 | |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 253 | if (unlikely(sqe->buf_index || sqe->personality)) |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 254 | return -EINVAL; |
| 255 | |
Pavel Begunkov | 1137302 | 2022-12-07 03:53:34 +0000 | [diff] [blame] | 256 | msg->src_file = NULL; |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 257 | msg->user_data = READ_ONCE(sqe->off); |
| 258 | msg->len = READ_ONCE(sqe->len); |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 259 | msg->cmd = READ_ONCE(sqe->addr); |
| 260 | msg->src_fd = READ_ONCE(sqe->addr3); |
| 261 | msg->dst_fd = READ_ONCE(sqe->file_index); |
| 262 | msg->flags = READ_ONCE(sqe->msg_ring_flags); |
Breno Leitao | cbeb47a | 2023-01-03 08:05:07 -0800 | [diff] [blame] | 263 | if (msg->flags & ~IORING_MSG_RING_MASK) |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 264 | return -EINVAL; |
| 265 | |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 266 | return 0; |
| 267 | } |
| 268 | |
| 269 | int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) |
| 270 | { |
Stefan Metzmacher | f2ccb5a | 2022-08-11 09:11:15 +0200 | [diff] [blame] | 271 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 272 | int ret; |
| 273 | |
| 274 | ret = -EBADFD; |
| 275 | if (!io_is_uring_fops(req->file)) |
| 276 | goto done; |
| 277 | |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 278 | switch (msg->cmd) { |
| 279 | case IORING_MSG_DATA: |
Jens Axboe | e12d7a4 | 2023-01-19 09:04:40 -0700 | [diff] [blame] | 280 | ret = io_msg_ring_data(req, issue_flags); |
Jens Axboe | e6130eb | 2022-06-13 04:47:02 -0600 | [diff] [blame] | 281 | break; |
| 282 | case IORING_MSG_SEND_FD: |
| 283 | ret = io_msg_send_fd(req, issue_flags); |
| 284 | break; |
| 285 | default: |
| 286 | ret = -EINVAL; |
| 287 | break; |
| 288 | } |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 289 | |
| 290 | done: |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 291 | if (ret < 0) { |
| 292 | if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) |
| 293 | return ret; |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 294 | req_set_fail(req); |
Pavel Begunkov | 6d043ee | 2022-12-07 03:53:36 +0000 | [diff] [blame] | 295 | } |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 296 | io_req_set_res(req, ret, 0); |
Jens Axboe | 36404b0 | 2022-05-25 06:42:08 -0600 | [diff] [blame] | 297 | return IOU_OK; |
| 298 | } |