| /* SPDX-License-Identifier: MIT */ |
| |
| #include <linux/io_uring.h> |
| #include <sys/mman.h> |
| #include <sys/syscall.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <unistd.h> |
| |
| struct io_sq_ring { |
| unsigned int *head; |
| unsigned int *tail; |
| unsigned int *ring_mask; |
| unsigned int *ring_entries; |
| unsigned int *flags; |
| unsigned int *array; |
| }; |
| |
| struct io_cq_ring { |
| unsigned int *head; |
| unsigned int *tail; |
| unsigned int *ring_mask; |
| unsigned int *ring_entries; |
| struct io_uring_cqe *cqes; |
| }; |
| |
| struct io_uring_sq { |
| unsigned int *khead; |
| unsigned int *ktail; |
| unsigned int *kring_mask; |
| unsigned int *kring_entries; |
| unsigned int *kflags; |
| unsigned int *kdropped; |
| unsigned int *array; |
| struct io_uring_sqe *sqes; |
| |
| unsigned int sqe_head; |
| unsigned int sqe_tail; |
| |
| size_t ring_sz; |
| }; |
| |
| struct io_uring_cq { |
| unsigned int *khead; |
| unsigned int *ktail; |
| unsigned int *kring_mask; |
| unsigned int *kring_entries; |
| unsigned int *koverflow; |
| struct io_uring_cqe *cqes; |
| |
| size_t ring_sz; |
| }; |
| |
| struct io_uring { |
| struct io_uring_sq sq; |
| struct io_uring_cq cq; |
| int ring_fd; |
| }; |
| |
| #if defined(__x86_64) || defined(__i386__) |
| #define read_barrier() __asm__ __volatile__("":::"memory") |
| #define write_barrier() __asm__ __volatile__("":::"memory") |
| #else |
| #define read_barrier() __sync_synchronize() |
| #define write_barrier() __sync_synchronize() |
| #endif |
| |
| static inline int io_uring_mmap(int fd, struct io_uring_params *p, |
| struct io_uring_sq *sq, struct io_uring_cq *cq) |
| { |
| size_t size; |
| void *ptr; |
| int ret; |
| |
| sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int); |
| ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, |
| MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); |
| if (ptr == MAP_FAILED) |
| return -errno; |
| sq->khead = ptr + p->sq_off.head; |
| sq->ktail = ptr + p->sq_off.tail; |
| sq->kring_mask = ptr + p->sq_off.ring_mask; |
| sq->kring_entries = ptr + p->sq_off.ring_entries; |
| sq->kflags = ptr + p->sq_off.flags; |
| sq->kdropped = ptr + p->sq_off.dropped; |
| sq->array = ptr + p->sq_off.array; |
| |
| size = p->sq_entries * sizeof(struct io_uring_sqe); |
| sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, |
| MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); |
| if (sq->sqes == MAP_FAILED) { |
| ret = -errno; |
| err: |
| munmap(sq->khead, sq->ring_sz); |
| return ret; |
| } |
| |
| cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); |
| ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, |
| MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); |
| if (ptr == MAP_FAILED) { |
| ret = -errno; |
| munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); |
| goto err; |
| } |
| cq->khead = ptr + p->cq_off.head; |
| cq->ktail = ptr + p->cq_off.tail; |
| cq->kring_mask = ptr + p->cq_off.ring_mask; |
| cq->kring_entries = ptr + p->cq_off.ring_entries; |
| cq->koverflow = ptr + p->cq_off.overflow; |
| cq->cqes = ptr + p->cq_off.cqes; |
| return 0; |
| } |
| |
| static inline int io_uring_setup(unsigned int entries, |
| struct io_uring_params *p) |
| { |
| return syscall(__NR_io_uring_setup, entries, p); |
| } |
| |
| static inline int io_uring_enter(int fd, unsigned int to_submit, |
| unsigned int min_complete, |
| unsigned int flags, sigset_t *sig) |
| { |
| return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, |
| flags, sig, _NSIG / 8); |
| } |
| |
| static inline int io_uring_queue_init(unsigned int entries, |
| struct io_uring *ring, |
| unsigned int flags) |
| { |
| struct io_uring_params p; |
| int fd, ret; |
| |
| memset(ring, 0, sizeof(*ring)); |
| memset(&p, 0, sizeof(p)); |
| p.flags = flags; |
| |
| fd = io_uring_setup(entries, &p); |
| if (fd < 0) |
| return fd; |
| ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); |
| if (!ret) |
| ring->ring_fd = fd; |
| else |
| close(fd); |
| return ret; |
| } |
| |
| /* Get a sqe */ |
| static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) |
| { |
| struct io_uring_sq *sq = &ring->sq; |
| |
| if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) |
| return NULL; |
| return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; |
| } |
| |
| static inline int io_uring_wait_cqe(struct io_uring *ring, |
| struct io_uring_cqe **cqe_ptr) |
| { |
| struct io_uring_cq *cq = &ring->cq; |
| const unsigned int mask = *cq->kring_mask; |
| unsigned int head = *cq->khead; |
| int ret; |
| |
| *cqe_ptr = NULL; |
| do { |
| read_barrier(); |
| if (head != *cq->ktail) { |
| *cqe_ptr = &cq->cqes[head & mask]; |
| break; |
| } |
| ret = io_uring_enter(ring->ring_fd, 0, 1, |
| IORING_ENTER_GETEVENTS, NULL); |
| if (ret < 0) |
| return -errno; |
| } while (1); |
| |
| return 0; |
| } |
| |
| static inline int io_uring_submit(struct io_uring *ring) |
| { |
| struct io_uring_sq *sq = &ring->sq; |
| const unsigned int mask = *sq->kring_mask; |
| unsigned int ktail, submitted, to_submit; |
| int ret; |
| |
| read_barrier(); |
| if (*sq->khead != *sq->ktail) { |
| submitted = *sq->kring_entries; |
| goto submit; |
| } |
| if (sq->sqe_head == sq->sqe_tail) |
| return 0; |
| |
| ktail = *sq->ktail; |
| to_submit = sq->sqe_tail - sq->sqe_head; |
| for (submitted = 0; submitted < to_submit; submitted++) { |
| read_barrier(); |
| sq->array[ktail++ & mask] = sq->sqe_head++ & mask; |
| } |
| if (!submitted) |
| return 0; |
| |
| if (*sq->ktail != ktail) { |
| write_barrier(); |
| *sq->ktail = ktail; |
| write_barrier(); |
| } |
| submit: |
| ret = io_uring_enter(ring->ring_fd, submitted, 0, |
| IORING_ENTER_GETEVENTS, NULL); |
| return ret < 0 ? -errno : ret; |
| } |
| |
| static inline void io_uring_queue_exit(struct io_uring *ring) |
| { |
| struct io_uring_sq *sq = &ring->sq; |
| |
| munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); |
| munmap(sq->khead, sq->ring_sz); |
| close(ring->ring_fd); |
| } |
| |
| /* Prepare and send the SQE */ |
| static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op, |
| int sockfd, |
| int level, int optname, |
| const void *optval, |
| int optlen) |
| { |
| memset(sqe, 0, sizeof(*sqe)); |
| sqe->opcode = (__u8)IORING_OP_URING_CMD; |
| sqe->fd = sockfd; |
| sqe->cmd_op = op; |
| |
| sqe->level = level; |
| sqe->optname = optname; |
| sqe->optval = (unsigned long long)optval; |
| sqe->optlen = optlen; |
| } |
| |
| static inline int io_uring_register_buffers(struct io_uring *ring, |
| const struct iovec *iovecs, |
| unsigned int nr_iovecs) |
| { |
| int ret; |
| |
| ret = syscall(__NR_io_uring_register, ring->ring_fd, |
| IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); |
| return (ret < 0) ? -errno : ret; |
| } |
| |
| static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, |
| const void *buf, size_t len, int flags) |
| { |
| memset(sqe, 0, sizeof(*sqe)); |
| sqe->opcode = (__u8)IORING_OP_SEND; |
| sqe->fd = sockfd; |
| sqe->addr = (unsigned long)buf; |
| sqe->len = len; |
| sqe->msg_flags = (__u32)flags; |
| } |
| |
| static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, |
| const void *buf, size_t len, int flags, |
| unsigned int zc_flags) |
| { |
| io_uring_prep_send(sqe, sockfd, buf, len, flags); |
| sqe->opcode = (__u8)IORING_OP_SEND_ZC; |
| sqe->ioprio = zc_flags; |
| } |
| |
| static inline void io_uring_cqe_seen(struct io_uring *ring) |
| { |
| *(&ring->cq)->khead += 1; |
| write_barrier(); |
| } |