io_uring: improve ctx hang handling
If io_ring_exit_work() can't get it done in 5 minutes, something is
going very wrong, don't keep spinning at HZ / 20 rate, it doesn't help
and it may take much of CPU time if there is a lot of workers stuck as
such.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9e2d1ca81d569f6bc628af1a42ff6663bff7ce9c.1628471125.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 26a3ab9..af9472e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8795,6 +8795,7 @@ static void io_ring_exit_work(struct work_struct *work)
{
struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
unsigned long timeout = jiffies + HZ * 60 * 5;
+ unsigned long interval = HZ / 20;
struct io_tctx_exit exit;
struct io_tctx_node *node;
int ret;
@@ -8819,8 +8820,11 @@ static void io_ring_exit_work(struct work_struct *work)
io_sq_thread_unpark(sqd);
}
- WARN_ON_ONCE(time_after(jiffies, timeout));
- } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+ if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
+ /* there is little hope left, don't run it too often */
+ interval = HZ * 60;
+ }
+ } while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
init_completion(&exit.completion);
init_task_work(&exit.task_work, io_tctx_exit_cb);