io_uring: add a few test tools

This adds two test programs in tools/io_uring/ that demonstrate both
the raw io_uring API (and all features) through a small benchmark
app, io_uring-bench, and the liburing exposed API in a simplified
cp(1) implementation through io_uring-cp.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/tools/io_uring/setup.c b/tools/io_uring/setup.c
new file mode 100644
index 0000000..4da19a7
--- /dev/null
+++ b/tools/io_uring/setup.c
@@ -0,0 +1,103 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#include "liburing.h"
+
+static int io_uring_mmap(int fd, struct io_uring_params *p,
+			 struct io_uring_sq *sq, struct io_uring_cq *cq)
+{
+	size_t size;
+	void *ptr;
+	int ret;
+
+	sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
+	ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
+	if (ptr == MAP_FAILED)
+		return -errno;
+	sq->khead = ptr + p->sq_off.head;
+	sq->ktail = ptr + p->sq_off.tail;
+	sq->kring_mask = ptr + p->sq_off.ring_mask;
+	sq->kring_entries = ptr + p->sq_off.ring_entries;
+	sq->kflags = ptr + p->sq_off.flags;
+	sq->kdropped = ptr + p->sq_off.dropped;
+	sq->array = ptr + p->sq_off.array;
+
+	size = p->sq_entries * sizeof(struct io_uring_sqe),
+	sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
+				MAP_SHARED | MAP_POPULATE, fd,
+				IORING_OFF_SQES);
+	if (sq->sqes == MAP_FAILED) {
+		ret = -errno;
+err:
+		munmap(sq->khead, sq->ring_sz);
+		return ret;
+	}
+
+	cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
+	ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
+	if (ptr == MAP_FAILED) {
+		ret = -errno;
+		munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
+		goto err;
+	}
+	cq->khead = ptr + p->cq_off.head;
+	cq->ktail = ptr + p->cq_off.tail;
+	cq->kring_mask = ptr + p->cq_off.ring_mask;
+	cq->kring_entries = ptr + p->cq_off.ring_entries;
+	cq->koverflow = ptr + p->cq_off.overflow;
+	cq->cqes = ptr + p->cq_off.cqes;
+	return 0;
+}
+
+/*
+ * For users that want to specify sq_thread_cpu or sq_thread_idle, this
+ * interface is a convenient helper for mmap()ing the rings.
+ * Returns -1 on error, or zero on success.  On success, 'ring'
+ * contains the necessary information to read/write to the rings.
+ */
+int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring)
+{
+	int ret;
+
+	memset(ring, 0, sizeof(*ring));
+	ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq);
+	if (!ret)
+		ring->ring_fd = fd;
+	return ret;
+}
+
+/*
+ * Returns -1 on error, or zero on success. On success, 'ring'
+ * contains the necessary information to read/write to the rings.
+ */
+int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
+{
+	struct io_uring_params p;
+	int fd;
+
+	memset(&p, 0, sizeof(p));
+	p.flags = flags;
+
+	fd = io_uring_setup(entries, &p);
+	if (fd < 0)
+		return fd;
+
+	return io_uring_queue_mmap(fd, &p, ring);
+}
+
+void io_uring_queue_exit(struct io_uring *ring)
+{
+	struct io_uring_sq *sq = &ring->sq;
+	struct io_uring_cq *cq = &ring->cq;
+
+	munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe));
+	munmap(sq->khead, sq->ring_sz);
+	munmap(cq->khead, cq->ring_sz);
+	close(ring->ring_fd);
+}