io_uring/rsrc: use wq for quiescing
Replace completions with waitqueues for rsrc data quiesce, the main
wakeup condition is when data refs hit zero. Note that data refs are
only changes under ->uring_lock, so we prepare before mutex_unlock()
reacquire it after taking the lock back. This change will be needed
in the next patch.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1d0dbc74b3b4fd67c8f01819e680c5e0da252956.1681395792.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 40cab42..5c96453 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -333,6 +333,7 @@ struct io_ring_ctx {
/* protected by ->uring_lock */
struct list_head rsrc_ref_list;
struct io_alloc_cache rsrc_node_cache;
+ struct wait_queue_head rsrc_quiesce_wq;
struct list_head io_buffers_pages;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9083a84..3c1c8c7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -321,6 +321,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->cq_wait);
init_waitqueue_head(&ctx->poll_wq);
+ init_waitqueue_head(&ctx->rsrc_quiesce_wq);
spin_lock_init(&ctx->completion_lock);
spin_lock_init(&ctx->timeout_lock);
INIT_WQ_LIST(&ctx->iopoll_list);
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d7e7528f..f9ce407 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -158,6 +158,7 @@ static void io_rsrc_put_work_one(struct io_rsrc_data *rsrc_data,
static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
{
struct io_rsrc_data *rsrc_data = ref_node->rsrc_data;
+ struct io_ring_ctx *ctx = rsrc_data->ctx;
struct io_rsrc_put *prsrc, *tmp;
if (ref_node->inline_items)
@@ -171,13 +172,13 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
io_rsrc_node_destroy(rsrc_data->ctx, ref_node);
if (io_put_rsrc_data_ref(rsrc_data))
- complete(&rsrc_data->done);
+ wake_up_all(&ctx->rsrc_quiesce_wq);
}
void io_wait_rsrc_data(struct io_rsrc_data *data)
{
- if (data && !io_put_rsrc_data_ref(data))
- wait_for_completion(&data->done);
+ if (data)
+ WARN_ON_ONCE(!io_put_rsrc_data_ref(data));
}
void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
@@ -257,6 +258,7 @@ int io_rsrc_node_switch_start(struct io_ring_ctx *ctx)
__cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
struct io_ring_ctx *ctx)
{
+ DEFINE_WAIT(we);
int ret;
/* As we may drop ->uring_lock, other task may have started quiesce */
@@ -273,7 +275,9 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
data->quiesce = true;
do {
+ prepare_to_wait(&ctx->rsrc_quiesce_wq, &we, TASK_INTERRUPTIBLE);
mutex_unlock(&ctx->uring_lock);
+
ret = io_run_task_work_sig(ctx);
if (ret < 0) {
mutex_lock(&ctx->uring_lock);
@@ -285,12 +289,15 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
}
break;
}
- wait_for_completion_interruptible(&data->done);
+
+ schedule();
+ __set_current_state(TASK_RUNNING);
mutex_lock(&ctx->uring_lock);
ret = 0;
} while (data->refs);
- data->quiesce = false;
+ finish_wait(&ctx->rsrc_quiesce_wq, &we);
+ data->quiesce = false;
return ret;
}
@@ -366,7 +373,6 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx,
goto fail;
}
}
- init_completion(&data->done);
*pdata = data;
return 0;
fail:
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 88adcb0..d93ba4e9 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -35,7 +35,6 @@ struct io_rsrc_data {
u64 **tags;
unsigned int nr;
rsrc_put_fn *do_put;
- struct completion done;
int refs;
bool quiesce;
};