io_uring/msg_ring: Pass custom flags to the cqe

This patch adds a new flag (IORING_MSG_RING_FLAGS_PASS) in the message
ring operations (IORING_OP_MSG_RING). This new flag enables the sender
to specify custom flags, which will be copied over to cqe->flags in the
receiving ring.  These custom flags should be specified using the
sqe->file_index field.

This mechanism provides additional flexibility when sending messages
between rings.

Signed-off-by: Breno Leitao <leitao@debian.org>
Link: https://lore.kernel.org/r/20230103160507.617416-1-leitao@debian.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2780bce..636a4c2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -347,6 +347,8 @@ enum {
  *				applicable for IORING_MSG_DATA, obviously.
  */
 #define IORING_MSG_RING_CQE_SKIP	(1U << 0)
+/* Pass through the flags from sqe->file_index to cqe->flags */
+#define IORING_MSG_RING_FLAGS_PASS	(1U << 1)
 
 /*
  * IO completion data structure (Completion Queue Entry)
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 15602a1..27992bd 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -13,6 +13,11 @@
 #include "filetable.h"
 #include "msg_ring.h"
 
+
+/* All valid masks for MSG_RING */
+#define IORING_MSG_RING_MASK		(IORING_MSG_RING_CQE_SKIP | \
+					IORING_MSG_RING_FLAGS_PASS)
+
 struct io_msg {
 	struct file			*file;
 	struct file			*src_file;
@@ -21,7 +26,10 @@ struct io_msg {
 	u32 len;
 	u32 cmd;
 	u32 src_fd;
-	u32 dst_fd;
+	union {
+		u32 dst_fd;
+		u32 cqe_flags;
+	};
 	u32 flags;
 };
 
@@ -114,9 +122,12 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_ring_ctx *target_ctx = req->file->private_data;
 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
+	u32 flags = 0;
 	int ret;
 
-	if (msg->src_fd || msg->dst_fd || msg->flags)
+	if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
+		return -EINVAL;
+	if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
 		return -EINVAL;
 	if (target_ctx->flags & IORING_SETUP_R_DISABLED)
 		return -EBADFD;
@@ -124,15 +135,18 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 	if (io_msg_need_remote(target_ctx))
 		return io_msg_exec_remote(req, io_msg_tw_complete);
 
+	if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
+		flags = msg->cqe_flags;
+
 	ret = -EOVERFLOW;
 	if (target_ctx->flags & IORING_SETUP_IOPOLL) {
 		if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
 			return -EAGAIN;
-		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
 			ret = 0;
 		io_double_unlock_ctx(target_ctx);
 	} else {
-		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
 			ret = 0;
 	}
 	return ret;
@@ -241,7 +255,7 @@ int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	msg->src_fd = READ_ONCE(sqe->addr3);
 	msg->dst_fd = READ_ONCE(sqe->file_index);
 	msg->flags = READ_ONCE(sqe->msg_ring_flags);
-	if (msg->flags & ~IORING_MSG_RING_CQE_SKIP)
+	if (msg->flags & ~IORING_MSG_RING_MASK)
 		return -EINVAL;
 
 	return 0;