bpf: Add support for a KVM_DEV_TYPE_BPF device

Add support for instantiating a KVM_DEV_TYPE_BPF device and attaching
BPF programs in the form of an ELF binary accepeted by the new '--bpf'
command-line option.

Signed-off-by: Will Deacon <will@kernel.org>
diff --git a/Makefile b/Makefile
index ed2414b..f2706c3 100644
--- a/Makefile
+++ b/Makefile
@@ -314,6 +314,32 @@
 	endif
 endif
 
+ifeq ($(call try-build,$(SOURCE_LIBBPF),$(CFLAGS),$(LDFLAGS) -lbpf),y)
+	CFLAGS_DYNOPT	+= -DCONFIG_HAS_LIBBPF
+	LIBS_DYNOPT	+= -lbpf
+	OBJS_DYNOPT	+= bpf.o
+else
+	ifeq ($(call try-build,$(SOURCE_LIBBPF),$(CFLAGS),$(LDFLAGS) -lbpf -static),y)
+		CFLAGS_STATOPT	+= -DCONFIG_HAS_LIBBPF
+		LIBS_STATOPT	+= -lbpf
+		OBJS_STATOPT	+= bpf.o
+	else
+		NOTFOUND	+= libbpf
+	endif
+endif
+
+ifeq ($(call try-build,$(SOURCE_ELFUTILS_LIBELF),$(CFLAGS),$(LDFLAGS) -lelf),y)
+	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ELFUTILS_LIBELF
+	LIBS_DYNOPT	+= -lelf
+else
+	ifeq ($(call try-build,$(SOURCE_ELFUTILS_LIBELF),$(CFLAGS),$(LDFLAGS) -lelf -static),y)
+		CFLAGS_STATOPT	+= -DCONFIG_HAS_ELFUTILS_LIBELF
+		LIBS_STATOPT	+= -lelf
+	else
+		NOTFOUND	+= elfutils-libelf
+	endif
+endif
+
 ifeq ($(call try-build,$(SOURCE_AIO),$(CFLAGS),$(LDFLAGS) -laio),y)
 	CFLAGS_DYNOPT	+= -DCONFIG_HAS_AIO
 	LIBS_DYNOPT	+= -laio
diff --git a/arm/include/arm-common/fdt-arch.h b/arm/include/arm-common/fdt-arch.h
index 60c2d40..a53266f 100644
--- a/arm/include/arm-common/fdt-arch.h
+++ b/arm/include/arm-common/fdt-arch.h
@@ -1,6 +1,6 @@
 #ifndef ARM__FDT_H
 #define ARM__FDT_H
 
-enum phandles {PHANDLE_RESERVED = 0, PHANDLE_GIC, PHANDLE_MSI, PHANDLES_MAX};
+enum phandles {PHANDLE_RESERVED = 0, PHANDLE_GIC, PHANDLE_MSI, PHANDLE_CLK, PHANDLES_MAX};
 
 #endif /* ARM__FDT_H */
diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
index 60eec02..a04d531 100644
--- a/arm/include/arm-common/kvm-arch.h
+++ b/arm/include/arm-common/kvm-arch.h
@@ -46,7 +46,10 @@
 #define KVM_FLASH_MMIO_BASE	(ARM_MMIO_AREA + 0x1000000)
 #define KVM_FLASH_MAX_SIZE	0x1000000
 
-#define KVM_VIRTIO_MMIO_AREA	(KVM_FLASH_MMIO_BASE + KVM_FLASH_MAX_SIZE)
+#define KVM_BPF_MMIO_BASE	(KVM_FLASH_MMIO_BASE + KVM_FLASH_MAX_SIZE)
+#define KVM_BPF_MMIO_SIZE	0x10000
+
+#define KVM_VIRTIO_MMIO_AREA	(KVM_BPF_MMIO_BASE + KVM_BPF_MMIO_SIZE)
 #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - \
 				(KVM_VIRTIO_MMIO_AREA + ARM_GIC_SIZE))
 
diff --git a/bpf.c b/bpf.c
new file mode 100644
index 0000000..cf3fa52
--- /dev/null
+++ b/bpf.c
@@ -0,0 +1,369 @@
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <gelf.h>
+#include <libelf.h>
+
+#include "kvm/kvm.h"
+#include <linux/stringify.h>
+#include "kvm-bpf-elf.h"
+
+/* XXX: Replace with BPF_PROG_TYPE_STRUCT_OPS */
+#define BPF_PROG_TYPE_KVM_IO_READ	(BPF_PROG_TYPE_SK_LOOKUP + 2)
+#define BPF_PROG_TYPE_KVM_IO_WRITE	(BPF_PROG_TYPE_KVM_IO_READ + 1)
+
+struct kvm_bpf_device {
+	struct device_header		dev_hdr;
+	const char			*compatible;
+	struct kvm_bpf_user_region	reg;
+};
+
+struct kvm_bpf_note_mmio_desc {
+	u32	subtype;
+
+	union {
+		/* NT_KVM_BPF_DEVICE_MMIO_DT_COMPATIBLE */
+		struct {
+			/* Embedded, NUL-terminated string */
+			const char 	str[1];
+		} compatible;
+
+		/* NT_KVM_BPF_DEVICE_MMIO_SIZE */
+		struct {
+			u32		val;
+		} size;
+	};
+} __attribute__((packed));
+
+static int bpf__create_device(struct kvm *kvm)
+{
+	struct kvm_create_device bpf_device = {
+		.type	= KVM_DEV_TYPE_BPF,
+	};
+	int err;
+
+	err = ioctl(kvm->vm_fd, KVM_CREATE_DEVICE, &bpf_device);
+	if (err)
+		return err;
+
+	return bpf_device.fd;
+}
+
+#ifdef CONFIG_HAS_ELFUTILS_LIBELF
+static int bpf__parse_kvm_elf_note(const char *objfile,
+				   struct kvm_bpf_device *dev)
+{
+	size_t nameoff, descoff, off, shstrndx;
+	int fd, ret = -EINVAL;
+	GElf_Nhdr nhdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *scn;
+	Elf *elf;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_err("Failed to initialise libelf\n");
+		goto out;
+	}
+
+	fd = open(objfile, O_RDONLY);
+	if (fd < 0) {
+		perror("bpf: open");
+		goto out;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf) {
+		pr_err("elf_begin() failed for %s\n", objfile);
+		goto out_close;
+	}
+
+	if (elf_kind(elf) != ELF_K_ELF) {
+		pr_err("%s is not an ELF object\n", objfile);
+		goto out_elf_end;
+	}
+
+	if (gelf_getclass(elf) != ELFCLASS64) {
+		pr_err("%s is not a 64-bit ELF object\n", objfile);
+		goto out_elf_end;
+	}
+
+	if (elf_getshdrstrndx(elf, &shstrndx)) {
+		pr_err("Failed to find string table in %s\n", objfile);
+		goto out_elf_end;
+	}
+
+	scn = NULL;
+	while ((scn = elf_nextscn(elf, scn))) {
+		const char *secname;
+
+		if (gelf_getshdr(scn, &shdr) != &shdr) {
+			pr_err("Failed to get section headers at index %zu\n",
+				elf_ndxscn(scn));
+			goto out_elf_end;
+		}
+
+		secname = elf_strptr(elf, shstrndx, shdr.sh_name);
+		if (!secname) {
+			pr_err("Failed to get section name at index %zu\n",
+				elf_ndxscn(scn));
+			goto out_elf_end;
+		}
+
+		if (!strcmp(secname, NT_KVM_BPF_SECTION_NAME))
+			break;
+	}
+
+	if (!scn) {
+		pr_err("No %s section found in %s\n",
+			NT_KVM_BPF_SECTION_NAME, objfile);
+		ret = 0;
+		goto out_elf_end;
+	}
+
+	data = elf_getdata(scn, NULL);
+	if (!data) {
+		pr_err("Failed to pull data from %s section\n",
+			NT_KVM_BPF_SECTION_NAME);
+		goto out_elf_end;
+	}
+
+	off = 0;
+	while ((off = gelf_getnote(data, off,  &nhdr, &nameoff, &descoff))) {
+		const char *name = (char *)data->d_buf + nameoff;
+		struct kvm_bpf_note_mmio_desc *desc = data->d_buf + descoff;
+
+		if (strcmp(name, NT_KVM_BPF_DEVICE_NAME)) {
+			pr_warning("Ignoring unknown ELF note (found %s expected %s)\n",
+				   name, NT_KVM_BPF_DEVICE_NAME);
+			continue;
+		}
+
+		if (nhdr.n_type != NT_KVM_BPF_DEVICE_PROP_TYPE_MMIO) {
+			pr_warning("Ignoring unknown ELF note type (found %u expected %u)\n",
+				   nhdr.n_type, NT_KVM_BPF_DEVICE_PROP_TYPE_MMIO);
+			continue;
+		}
+
+		switch (desc->subtype) {
+		case NT_KVM_BPF_DEVICE_MMIO_DT_COMPATIBLE:
+			if (!dev->compatible)
+				dev->compatible = strdup(desc->compatible.str);
+			else
+				pr_warning("Ignoring duplicate MMIO_DT_COMPATIBLE in ELF note!\n");
+			break;
+		case NT_KVM_BPF_DEVICE_MMIO_SIZE:
+			if (!dev->reg.size)
+				dev->reg.size = desc->size.val;
+			else
+				pr_warning("Ignoring duplicate MMIO_SIZE in ELF note!\n");
+			break;
+		default:
+			pr_warning("Ignoring unknown MMIO note subtype %u\n",
+				   desc->subtype);
+		}
+	}
+
+	ret = 0;
+out_elf_end:
+	elf_end(elf);
+out_close:
+	close(fd);
+out:
+	return ret;
+}
+#else
+static int bpf__parse_kvm_elf_note(const char *objfile,
+				   struct kvm_bpf_device *dev)
+{
+	pr_info("libelf missing: skipping ELF note parsing\n");
+	return 0;
+}
+#endif
+
+static int bpf__load_progs(const char *objfile, struct kvm_bpf_device *dev)
+{
+	struct kvm_bpf_user_region *reg = &dev->reg;
+	struct bpf_program *rprog, *wprog;
+	struct bpf_object *obj;
+	int err;
+
+	err = bpf__parse_kvm_elf_note(objfile, dev);
+	if (err)
+		return err;
+
+	obj = bpf_object__open(objfile);
+	if (!obj) {
+		perror("bpf: bpf_object__open");
+		return -EINVAL;
+	}
+
+	rprog = bpf_object__find_program_by_name(obj, __stringify(KVM_BPF_READ_PROG_NAME));
+	if (!rprog) {
+		perror("bpf: bpf_object__find_program_by_name (read)");
+		return -EINVAL;
+	}
+
+	err = bpf_program__set_type(rprog, BPF_PROG_TYPE_KVM_IO_READ);
+	if (err)
+		return err;
+
+	wprog = bpf_object__find_program_by_name(obj, __stringify(KVM_BPF_WRITE_PROG_NAME));
+	if (!wprog) {
+		perror("bpf: bpf_object__find_program_by_name (write)");
+		return -EINVAL;
+	}
+
+	err = bpf_program__set_type(wprog, BPF_PROG_TYPE_KVM_IO_WRITE);
+	if (err)
+		return err;
+
+	err = bpf_object__load(obj);
+	if (err)
+		return err;
+
+	reg->bpf_readfd = bpf_program__fd(rprog);
+	reg->bpf_writefd = bpf_program__fd(wprog);
+	return 0;
+}
+
+static int bpf__allocate_mmio_region(struct kvm *kvm,
+				     struct kvm_bpf_user_region *reg)
+{
+	reg->addr = KVM_BPF_MMIO_BASE;
+	return 0;
+}
+
+static int bpf__attach_progs(int devfd, struct kvm_bpf_user_region *reg)
+{
+	struct kvm_device_attr bpf_dev_attr = {
+		.group	= KVM_DEV_BPF_ATTR_GROUP_REGION,
+		.attr	= 0, /* Region index */
+		.addr	= (u64)(unsigned long)reg,
+	};
+	int err;
+
+	err = ioctl(devfd, KVM_SET_DEVICE_ATTR, &bpf_dev_attr);
+	return err;
+}
+
+#ifdef CONFIG_HAS_LIBFDT
+static u32 get_amba_clock(void *fdt)
+{
+	static bool created;
+
+	if (!created) {
+		_FDT(fdt_begin_node(fdt, "clock"));
+		_FDT(fdt_property_string(fdt, "compatible", "fixed-clock"));
+		_FDT(fdt_property_cell(fdt, "#clock-cells", 0));
+		_FDT(fdt_property_cell(fdt, "clock-frequency", 100000000));
+		_FDT(fdt_property_string(fdt, "clock-output-names", "apb_pclk"));
+		_FDT(fdt_property_cell(fdt, "phandle", PHANDLE_CLK));
+		_FDT(fdt_end_node(fdt));
+		created = true;
+	}
+
+	return PHANDLE_CLK;
+}
+
+static void generate_bpf_fdt_node(void *fdt,
+				  struct device_header *dev_hdr,
+				  void (*generate_irq_prop)(void *fdt,
+							    u8 irq,
+							    enum irq_type))
+{
+	struct kvm_bpf_device *dev = container_of(dev_hdr,
+						  struct kvm_bpf_device,
+						  dev_hdr);
+	bool is_amba_device = !strcmp(dev->compatible, "arm,primecell");
+	u64 reg_prop[2] = { cpu_to_fdt64(dev->reg.addr),
+			    cpu_to_fdt64(dev->reg.size) };
+	u32 clk_phandle;
+
+	if (is_amba_device)
+		clk_phandle = get_amba_clock(fdt);
+
+	_FDT(fdt_begin_node(fdt, "bpf"));
+	_FDT(fdt_property_string(fdt, "compatible", dev->compatible));
+	_FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
+
+	if (is_amba_device) {
+		_FDT(fdt_property_cell(fdt, "clocks", clk_phandle));
+		_FDT(fdt_property_string(fdt, "clock-names", "apb_pclk"));
+	}
+
+	_FDT(fdt_end_node(fdt));
+}
+
+#else
+#define generate_bpf_fdt_node	NULL
+#endif
+
+static int bpf__device_register(struct kvm *kvm, struct kvm_bpf_device *dev)
+{
+	int err;
+
+	dev->dev_hdr = (struct device_header) {
+		.bus_type	= DEVICE_BUS_MMIO,
+		.data		= generate_bpf_fdt_node,
+	};
+
+	err = device__register(&dev->dev_hdr);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int bpf__init(struct kvm *kvm)
+{
+	static struct kvm_bpf_device dev = { };
+
+	const char *bpf_objfile = kvm->cfg.bpf_filename;
+	int err = 0, devfd;
+
+	if (!bpf_objfile)
+		return 0;
+
+	err = libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS |
+				     LIBBPF_STRICT_CLEAN_PTRS);
+	if (err) {
+		pr_err("Failed to initialise libbpf (%d).\n", err);
+		goto out;
+	}
+
+	err = bpf__load_progs(bpf_objfile, &dev);
+	if (err) {
+		pr_err("Failed to load BPF programs (%d).\n", err);
+		goto out;
+	}
+
+	err = bpf__create_device(kvm);
+	if (err < 0) {
+		pr_err("Failed to create BPF device (%d).\n", err);
+		goto out;
+	}
+	devfd = err;
+
+	err = bpf__allocate_mmio_region(kvm, &dev.reg);
+	if (err) {
+		pr_err("Failed to allocate space in MMIO region (%d).\n", err);
+		goto out;
+	}
+
+	err = bpf__attach_progs(devfd, &dev.reg);
+	if (err) {
+		pr_err("Failed to attach BPF programs to device (%d).\n", err);
+		goto out;
+	}
+
+	err = bpf__device_register(kvm, &dev);
+	if (err) {
+		pr_err("Failed to register BPF device (%d).\n", err);
+		goto out;
+	}
+
+out:
+	return err;
+}
+dev_base_init(bpf__init);
diff --git a/builtin-run.c b/builtin-run.c
index bd0d0b9..8653817 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -168,6 +168,14 @@
 #define VIRTIO_TRANS_OPT_HELP_SHORT    "[pci|pci-legacy]"
 #endif
 
+#ifdef CONFIG_HAS_LIBBPF
+#define BPF_OPT(cfg)	OPT_STRING('\0', "bpf", &(cfg)->bpf_filename,	\
+			   "bpf object",				\
+			   "BPF ELF file to load into an MMIO device"),
+#else
+#define BPF_OPT(...)
+#endif
+
 #define BUILD_OPTIONS(name, cfg, kvm)					\
 	struct option name[] = {					\
 	OPT_GROUP("Basic options:"),					\
@@ -215,6 +223,7 @@
 		     VIRTIO_TRANS_OPT_HELP_SHORT,		        \
 		     "Type of virtio transport",			\
 		     virtio_transport_parser, NULL),			\
+	BPF_OPT(cfg)							\
 									\
 	OPT_GROUP("Kernel options:"),					\
 	OPT_STRING('k', "kernel", &(cfg)->kernel_filename, "kernel",	\
diff --git a/config/feature-tests.mak b/config/feature-tests.mak
index 03cdb42..8d82122 100644
--- a/config/feature-tests.mak
+++ b/config/feature-tests.mak
@@ -23,11 +23,13 @@
 endef
 endif
 
-define SOURCE_LIBELF
+define SOURCE_ELFUTILS_LIBELF
+#include <gelf.h>
 #include <libelf.h>
 
 int main(void)
 {
+	GElf_Nhdr nhdr;
 	Elf *elf = elf_begin(0, ELF_C_READ, 0);
 	return (long)elf;
 }
@@ -206,3 +208,13 @@
 	return 0;
 }
 endef
+
+define SOURCE_LIBBPF
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	return 0;
+}
+endef
diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
index 592b035..24cd8a1 100644
--- a/include/kvm/kvm-config.h
+++ b/include/kvm/kvm-config.h
@@ -52,6 +52,7 @@
 	const char *hugetlbfs_path;
 	const char *custom_rootfs_name;
 	const char *real_cmdline;
+	const char *bpf_filename;
 	struct virtio_net_params *net_params;
 	bool single_step;
 	bool vnc;
diff --git a/kvm-bpf-elf.h b/kvm-bpf-elf.h
new file mode 100644
index 0000000..39d47cf
--- /dev/null
+++ b/kvm-bpf-elf.h
@@ -0,0 +1,23 @@
+#ifndef __KVM_BPF_ELF_H
+#define __KVM_BPF_ELF_H
+
+/* BPF program names */
+#define KVM_BPF_READ_PROG_NAME				kvm_io_read
+#define KVM_BPF_WRITE_PROG_NAME				kvm_io_write
+
+/* Name of the ELF PT_NOTE section */
+#define NT_KVM_BPF_SECTION_NAME				".note.kvm-bpf.mmio-device"
+
+/* Owner name, as per ELF specification of Note sections */
+#define NT_KVM_BPF_DEVICE_NAME				"kvm-bpf"
+
+/* Descriptor type for MMIO devices */
+#define NT_KVM_BPF_DEVICE_PROP_TYPE_MMIO		0
+
+/* MMIO: DT compatible string */
+#define NT_KVM_BPF_DEVICE_MMIO_DT_COMPATIBLE		0
+
+/* MMIO: region size */
+#define NT_KVM_BPF_DEVICE_MMIO_SIZE			1
+
+#endif /* __KVM_BPF_ELF_H */