io_uring/rsrc: use wq for quiescing

Replace completions with waitqueues for rsrc data quiesce, the main
wakeup condition is when data refs hit zero. Note that data refs are
only changes under ->uring_lock, so we prepare before mutex_unlock()
reacquire it after taking the lock back. This change will be needed
in the next patch.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1d0dbc74b3b4fd67c8f01819e680c5e0da252956.1681395792.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 40cab42..5c96453 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -333,6 +333,7 @@ struct io_ring_ctx {
 	/* protected by ->uring_lock */
 	struct list_head		rsrc_ref_list;
 	struct io_alloc_cache		rsrc_node_cache;
+	struct wait_queue_head		rsrc_quiesce_wq;
 
 	struct list_head		io_buffers_pages;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9083a84..3c1c8c7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -321,6 +321,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->cq_wait);
 	init_waitqueue_head(&ctx->poll_wq);
+	init_waitqueue_head(&ctx->rsrc_quiesce_wq);
 	spin_lock_init(&ctx->completion_lock);
 	spin_lock_init(&ctx->timeout_lock);
 	INIT_WQ_LIST(&ctx->iopoll_list);
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d7e7528f..f9ce407 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -158,6 +158,7 @@ static void io_rsrc_put_work_one(struct io_rsrc_data *rsrc_data,
 static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
 {
 	struct io_rsrc_data *rsrc_data = ref_node->rsrc_data;
+	struct io_ring_ctx *ctx = rsrc_data->ctx;
 	struct io_rsrc_put *prsrc, *tmp;
 
 	if (ref_node->inline_items)
@@ -171,13 +172,13 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
 
 	io_rsrc_node_destroy(rsrc_data->ctx, ref_node);
 	if (io_put_rsrc_data_ref(rsrc_data))
-		complete(&rsrc_data->done);
+		wake_up_all(&ctx->rsrc_quiesce_wq);
 }
 
 void io_wait_rsrc_data(struct io_rsrc_data *data)
 {
-	if (data && !io_put_rsrc_data_ref(data))
-		wait_for_completion(&data->done);
+	if (data)
+		WARN_ON_ONCE(!io_put_rsrc_data_ref(data));
 }
 
 void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
@@ -257,6 +258,7 @@ int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
 __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
 				      struct io_ring_ctx *ctx)
 {
+	DEFINE_WAIT(we);
 	int ret;
 
 	/* As we may drop ->uring_lock, other task may have started quiesce */
@@ -273,7 +275,9 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
 
 	data->quiesce = true;
 	do {
+		prepare_to_wait(&ctx->rsrc_quiesce_wq, &we, TASK_INTERRUPTIBLE);
 		mutex_unlock(&ctx->uring_lock);
+
 		ret = io_run_task_work_sig(ctx);
 		if (ret < 0) {
 			mutex_lock(&ctx->uring_lock);
@@ -285,12 +289,15 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
 			}
 			break;
 		}
-		wait_for_completion_interruptible(&data->done);
+
+		schedule();
+		__set_current_state(TASK_RUNNING);
 		mutex_lock(&ctx->uring_lock);
 		ret = 0;
 	} while (data->refs);
-	data->quiesce = false;
 
+	finish_wait(&ctx->rsrc_quiesce_wq, &we);
+	data->quiesce = false;
 	return ret;
 }
 
@@ -366,7 +373,6 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 				goto fail;
 		}
 	}
-	init_completion(&data->done);
 	*pdata = data;
 	return 0;
 fail:
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 88adcb0..d93ba4e9 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -35,7 +35,6 @@ struct io_rsrc_data {
 	u64				**tags;
 	unsigned int			nr;
 	rsrc_put_fn			*do_put;
-	struct completion		done;
 	int				refs;
 	bool				quiesce;
 };