| // SPDX-License-Identifier: GPL-2.0 |
| |
| #include "io_uring.h" |
| #include "napi.h" |
| |
| #ifdef CONFIG_NET_RX_BUSY_POLL |
| |
| /* Timeout for cleanout of stale entries. */ |
| #define NAPI_TIMEOUT (60 * SEC_CONVERSION) |
| |
| struct io_napi_entry { |
| unsigned int napi_id; |
| struct list_head list; |
| |
| unsigned long timeout; |
| struct hlist_node node; |
| |
| struct rcu_head rcu; |
| }; |
| |
| static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, |
| unsigned int napi_id) |
| { |
| struct io_napi_entry *e; |
| |
| hlist_for_each_entry_rcu(e, hash_list, node) { |
| if (e->napi_id != napi_id) |
| continue; |
| e->timeout = jiffies + NAPI_TIMEOUT; |
| return e; |
| } |
| |
| return NULL; |
| } |
| |
| static inline ktime_t net_to_ktime(unsigned long t) |
| { |
| /* napi approximating usecs, reverse busy_loop_current_time */ |
| return ns_to_ktime(t << 10); |
| } |
| |
| void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) |
| { |
| struct hlist_head *hash_list; |
| unsigned int napi_id; |
| struct sock *sk; |
| struct io_napi_entry *e; |
| |
| sk = sock->sk; |
| if (!sk) |
| return; |
| |
| napi_id = READ_ONCE(sk->sk_napi_id); |
| |
| /* Non-NAPI IDs can be rejected. */ |
| if (napi_id < MIN_NAPI_ID) |
| return; |
| |
| hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; |
| |
| rcu_read_lock(); |
| e = io_napi_hash_find(hash_list, napi_id); |
| if (e) { |
| e->timeout = jiffies + NAPI_TIMEOUT; |
| rcu_read_unlock(); |
| return; |
| } |
| rcu_read_unlock(); |
| |
| e = kmalloc(sizeof(*e), GFP_NOWAIT); |
| if (!e) |
| return; |
| |
| e->napi_id = napi_id; |
| e->timeout = jiffies + NAPI_TIMEOUT; |
| |
| spin_lock(&ctx->napi_lock); |
| if (unlikely(io_napi_hash_find(hash_list, napi_id))) { |
| spin_unlock(&ctx->napi_lock); |
| kfree(e); |
| return; |
| } |
| |
| hlist_add_tail_rcu(&e->node, hash_list); |
| list_add_tail(&e->list, &ctx->napi_list); |
| spin_unlock(&ctx->napi_lock); |
| } |
| |
| static void __io_napi_remove_stale(struct io_ring_ctx *ctx) |
| { |
| struct io_napi_entry *e; |
| unsigned int i; |
| |
| spin_lock(&ctx->napi_lock); |
| hash_for_each(ctx->napi_ht, i, e, node) { |
| if (time_after(jiffies, e->timeout)) { |
| list_del(&e->list); |
| hash_del_rcu(&e->node); |
| kfree_rcu(e, rcu); |
| } |
| } |
| spin_unlock(&ctx->napi_lock); |
| } |
| |
| static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) |
| { |
| if (is_stale) |
| __io_napi_remove_stale(ctx); |
| } |
| |
| static inline bool io_napi_busy_loop_timeout(ktime_t start_time, |
| ktime_t bp) |
| { |
| if (bp) { |
| ktime_t end_time = ktime_add(start_time, bp); |
| ktime_t now = net_to_ktime(busy_loop_current_time()); |
| |
| return ktime_after(now, end_time); |
| } |
| |
| return true; |
| } |
| |
| static bool io_napi_busy_loop_should_end(void *data, |
| unsigned long start_time) |
| { |
| struct io_wait_queue *iowq = data; |
| |
| if (signal_pending(current)) |
| return true; |
| if (io_should_wake(iowq) || io_has_work(iowq->ctx)) |
| return true; |
| if (io_napi_busy_loop_timeout(net_to_ktime(start_time), |
| iowq->napi_busy_poll_dt)) |
| return true; |
| |
| return false; |
| } |
| |
| static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, |
| void *loop_end_arg) |
| { |
| struct io_napi_entry *e; |
| bool (*loop_end)(void *, unsigned long) = NULL; |
| bool is_stale = false; |
| |
| if (loop_end_arg) |
| loop_end = io_napi_busy_loop_should_end; |
| |
| list_for_each_entry_rcu(e, &ctx->napi_list, list) { |
| napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, |
| ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); |
| |
| if (time_after(jiffies, e->timeout)) |
| is_stale = true; |
| } |
| |
| return is_stale; |
| } |
| |
| static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, |
| struct io_wait_queue *iowq) |
| { |
| unsigned long start_time = busy_loop_current_time(); |
| void *loop_end_arg = NULL; |
| bool is_stale = false; |
| |
| /* Singular lists use a different napi loop end check function and are |
| * only executed once. |
| */ |
| if (list_is_singular(&ctx->napi_list)) |
| loop_end_arg = iowq; |
| |
| rcu_read_lock(); |
| do { |
| is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); |
| } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); |
| rcu_read_unlock(); |
| |
| io_napi_remove_stale(ctx, is_stale); |
| } |
| |
| /* |
| * io_napi_init() - Init napi settings |
| * @ctx: pointer to io-uring context structure |
| * |
| * Init napi settings in the io-uring context. |
| */ |
| void io_napi_init(struct io_ring_ctx *ctx) |
| { |
| u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; |
| |
| INIT_LIST_HEAD(&ctx->napi_list); |
| spin_lock_init(&ctx->napi_lock); |
| ctx->napi_prefer_busy_poll = false; |
| ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); |
| } |
| |
| /* |
| * io_napi_free() - Deallocate napi |
| * @ctx: pointer to io-uring context structure |
| * |
| * Free the napi list and the hash table in the io-uring context. |
| */ |
| void io_napi_free(struct io_ring_ctx *ctx) |
| { |
| struct io_napi_entry *e; |
| LIST_HEAD(napi_list); |
| unsigned int i; |
| |
| spin_lock(&ctx->napi_lock); |
| hash_for_each(ctx->napi_ht, i, e, node) { |
| hash_del_rcu(&e->node); |
| kfree_rcu(e, rcu); |
| } |
| spin_unlock(&ctx->napi_lock); |
| } |
| |
| /* |
| * io_napi_register() - Register napi with io-uring |
| * @ctx: pointer to io-uring context structure |
| * @arg: pointer to io_uring_napi structure |
| * |
| * Register napi in the io-uring context. |
| */ |
| int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) |
| { |
| const struct io_uring_napi curr = { |
| .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), |
| .prefer_busy_poll = ctx->napi_prefer_busy_poll |
| }; |
| struct io_uring_napi napi; |
| |
| if (ctx->flags & IORING_SETUP_IOPOLL) |
| return -EINVAL; |
| if (copy_from_user(&napi, arg, sizeof(napi))) |
| return -EFAULT; |
| if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) |
| return -EINVAL; |
| |
| if (copy_to_user(arg, &curr, sizeof(curr))) |
| return -EFAULT; |
| |
| WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); |
| WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); |
| WRITE_ONCE(ctx->napi_enabled, true); |
| return 0; |
| } |
| |
| /* |
| * io_napi_unregister() - Unregister napi with io-uring |
| * @ctx: pointer to io-uring context structure |
| * @arg: pointer to io_uring_napi structure |
| * |
| * Unregister napi. If arg has been specified copy the busy poll timeout and |
| * prefer busy poll setting to the passed in structure. |
| */ |
| int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) |
| { |
| const struct io_uring_napi curr = { |
| .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), |
| .prefer_busy_poll = ctx->napi_prefer_busy_poll |
| }; |
| |
| if (arg && copy_to_user(arg, &curr, sizeof(curr))) |
| return -EFAULT; |
| |
| WRITE_ONCE(ctx->napi_busy_poll_dt, 0); |
| WRITE_ONCE(ctx->napi_prefer_busy_poll, false); |
| WRITE_ONCE(ctx->napi_enabled, false); |
| return 0; |
| } |
| |
| /* |
| * __io_napi_adjust_timeout() - adjust busy loop timeout |
| * @ctx: pointer to io-uring context structure |
| * @iowq: pointer to io wait queue |
| * @ts: pointer to timespec or NULL |
| * |
| * Adjust the busy loop timeout according to timespec and busy poll timeout. |
| * If the specified NAPI timeout is bigger than the wait timeout, then adjust |
| * the NAPI timeout accordingly. |
| */ |
| void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, |
| ktime_t to_wait) |
| { |
| ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); |
| |
| if (to_wait) |
| poll_dt = min(poll_dt, to_wait); |
| |
| iowq->napi_busy_poll_dt = poll_dt; |
| } |
| |
| /* |
| * __io_napi_busy_loop() - execute busy poll loop |
| * @ctx: pointer to io-uring context structure |
| * @iowq: pointer to io wait queue |
| * |
| * Execute the busy poll loop and merge the spliced off list. |
| */ |
| void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) |
| { |
| iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); |
| |
| if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) |
| io_napi_blocking_busy_loop(ctx, iowq); |
| } |
| |
| /* |
| * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll |
| * @ctx: pointer to io-uring context structure |
| * |
| * Splice of the napi list and execute the napi busy poll loop. |
| */ |
| int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) |
| { |
| LIST_HEAD(napi_list); |
| bool is_stale = false; |
| |
| if (!READ_ONCE(ctx->napi_busy_poll_dt)) |
| return 0; |
| if (list_empty_careful(&ctx->napi_list)) |
| return 0; |
| |
| rcu_read_lock(); |
| is_stale = __io_napi_do_busy_loop(ctx, NULL); |
| rcu_read_unlock(); |
| |
| io_napi_remove_stale(ctx, is_stale); |
| return 1; |
| } |
| |
| #endif |