blob: e37fddd5d9ce8e7279a4d30def8001e87b65a192 [file] [log] [blame]
Jens Axboe200f3ab2024-06-03 11:51:19 -06001// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/mm.h>
5#include <linux/slab.h>
6#include <linux/eventfd.h>
7#include <linux/eventpoll.h>
8#include <linux/io_uring.h>
9#include <linux/io_uring_types.h>
10
11#include "io-wq.h"
12#include "eventfd.h"
13
14struct io_ev_fd {
15 struct eventfd_ctx *cq_ev_fd;
16 unsigned int eventfd_async: 1;
17 struct rcu_head rcu;
Jens Axboe0e0bcf02024-09-08 16:34:55 -060018 refcount_t refs;
Jens Axboe200f3ab2024-06-03 11:51:19 -060019 atomic_t ops;
20};
21
22enum {
23 IO_EVENTFD_OP_SIGNAL_BIT,
24};
25
26static void io_eventfd_free(struct rcu_head *rcu)
27{
28 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
29
30 eventfd_ctx_put(ev_fd->cq_ev_fd);
31 kfree(ev_fd);
32}
33
34static void io_eventfd_do_signal(struct rcu_head *rcu)
35{
36 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
37
38 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
39
Jens Axboe0e0bcf02024-09-08 16:34:55 -060040 if (refcount_dec_and_test(&ev_fd->refs))
Jens Axboe200f3ab2024-06-03 11:51:19 -060041 io_eventfd_free(rcu);
42}
43
44void io_eventfd_signal(struct io_ring_ctx *ctx)
45{
46 struct io_ev_fd *ev_fd = NULL;
47
48 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
49 return;
50
51 guard(rcu)();
52
53 /*
54 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
55 * and eventfd_signal
56 */
57 ev_fd = rcu_dereference(ctx->io_ev_fd);
58
59 /*
60 * Check again if ev_fd exists incase an io_eventfd_unregister call
61 * completed between the NULL check of ctx->io_ev_fd at the start of
62 * the function and rcu_read_lock.
63 */
64 if (unlikely(!ev_fd))
65 return;
Jens Axboe0e0bcf02024-09-08 16:34:55 -060066 if (!refcount_inc_not_zero(&ev_fd->refs))
Jens Axboe200f3ab2024-06-03 11:51:19 -060067 return;
68 if (ev_fd->eventfd_async && !io_wq_current_is_worker())
69 goto out;
70
71 if (likely(eventfd_signal_allowed())) {
72 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
73 } else {
74 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
75 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
76 return;
77 }
78 }
79out:
Jens Axboe0e0bcf02024-09-08 16:34:55 -060080 if (refcount_dec_and_test(&ev_fd->refs))
Jens Axboe200f3ab2024-06-03 11:51:19 -060081 call_rcu(&ev_fd->rcu, io_eventfd_free);
82}
83
84void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
85{
86 bool skip;
87
88 spin_lock(&ctx->completion_lock);
89
90 /*
91 * Eventfd should only get triggered when at least one event has been
92 * posted. Some applications rely on the eventfd notification count
93 * only changing IFF a new CQE has been added to the CQ ring. There's
94 * no depedency on 1:1 relationship between how many times this
95 * function is called (and hence the eventfd count) and number of CQEs
96 * posted to the CQ ring.
97 */
98 skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
99 ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
100 spin_unlock(&ctx->completion_lock);
101 if (skip)
102 return;
103
104 io_eventfd_signal(ctx);
105}
106
107int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
108 unsigned int eventfd_async)
109{
110 struct io_ev_fd *ev_fd;
111 __s32 __user *fds = arg;
112 int fd;
113
114 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
115 lockdep_is_held(&ctx->uring_lock));
116 if (ev_fd)
117 return -EBUSY;
118
119 if (copy_from_user(&fd, fds, sizeof(*fds)))
120 return -EFAULT;
121
122 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
123 if (!ev_fd)
124 return -ENOMEM;
125
126 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
127 if (IS_ERR(ev_fd->cq_ev_fd)) {
128 int ret = PTR_ERR(ev_fd->cq_ev_fd);
Anuj Gupta6cf52b42024-09-02 11:51:33 +0530129
Jens Axboe200f3ab2024-06-03 11:51:19 -0600130 kfree(ev_fd);
131 return ret;
132 }
133
134 spin_lock(&ctx->completion_lock);
135 ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
136 spin_unlock(&ctx->completion_lock);
137
138 ev_fd->eventfd_async = eventfd_async;
139 ctx->has_evfd = true;
Jens Axboe0e0bcf02024-09-08 16:34:55 -0600140 refcount_set(&ev_fd->refs, 1);
Jens Axboe200f3ab2024-06-03 11:51:19 -0600141 atomic_set(&ev_fd->ops, 0);
142 rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
143 return 0;
144}
145
146int io_eventfd_unregister(struct io_ring_ctx *ctx)
147{
148 struct io_ev_fd *ev_fd;
149
150 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
151 lockdep_is_held(&ctx->uring_lock));
152 if (ev_fd) {
153 ctx->has_evfd = false;
154 rcu_assign_pointer(ctx->io_ev_fd, NULL);
Jens Axboe0e0bcf02024-09-08 16:34:55 -0600155 if (refcount_dec_and_test(&ev_fd->refs))
Jens Axboe200f3ab2024-06-03 11:51:19 -0600156 call_rcu(&ev_fd->rcu, io_eventfd_free);
157 return 0;
158 }
159
160 return -ENXIO;
161}