bpf: Add support for a KVM_DEV_TYPE_BPF device
Add support for instantiating a KVM_DEV_TYPE_BPF device and attaching
BPF programs in the form of an ELF binary accepeted by the new '--bpf'
command-line option.
Signed-off-by: Will Deacon <will@kernel.org>
diff --git a/Makefile b/Makefile
index ed2414b..f2706c3 100644
--- a/Makefile
+++ b/Makefile
@@ -314,6 +314,32 @@
endif
endif
+ifeq ($(call try-build,$(SOURCE_LIBBPF),$(CFLAGS),$(LDFLAGS) -lbpf),y)
+ CFLAGS_DYNOPT += -DCONFIG_HAS_LIBBPF
+ LIBS_DYNOPT += -lbpf
+ OBJS_DYNOPT += bpf.o
+else
+ ifeq ($(call try-build,$(SOURCE_LIBBPF),$(CFLAGS),$(LDFLAGS) -lbpf -static),y)
+ CFLAGS_STATOPT += -DCONFIG_HAS_LIBBPF
+ LIBS_STATOPT += -lbpf
+ OBJS_STATOPT += bpf.o
+ else
+ NOTFOUND += libbpf
+ endif
+endif
+
+ifeq ($(call try-build,$(SOURCE_ELFUTILS_LIBELF),$(CFLAGS),$(LDFLAGS) -lelf),y)
+ CFLAGS_DYNOPT += -DCONFIG_HAS_ELFUTILS_LIBELF
+ LIBS_DYNOPT += -lelf
+else
+ ifeq ($(call try-build,$(SOURCE_ELFUTILS_LIBELF),$(CFLAGS),$(LDFLAGS) -lelf -static),y)
+ CFLAGS_STATOPT += -DCONFIG_HAS_ELFUTILS_LIBELF
+ LIBS_STATOPT += -lelf
+ else
+ NOTFOUND += elfutils-libelf
+ endif
+endif
+
ifeq ($(call try-build,$(SOURCE_AIO),$(CFLAGS),$(LDFLAGS) -laio),y)
CFLAGS_DYNOPT += -DCONFIG_HAS_AIO
LIBS_DYNOPT += -laio
diff --git a/arm/include/arm-common/fdt-arch.h b/arm/include/arm-common/fdt-arch.h
index 60c2d40..a53266f 100644
--- a/arm/include/arm-common/fdt-arch.h
+++ b/arm/include/arm-common/fdt-arch.h
@@ -1,6 +1,6 @@
#ifndef ARM__FDT_H
#define ARM__FDT_H
-enum phandles {PHANDLE_RESERVED = 0, PHANDLE_GIC, PHANDLE_MSI, PHANDLES_MAX};
+enum phandles {PHANDLE_RESERVED = 0, PHANDLE_GIC, PHANDLE_MSI, PHANDLE_CLK, PHANDLES_MAX};
#endif /* ARM__FDT_H */
diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
index 60eec02..a04d531 100644
--- a/arm/include/arm-common/kvm-arch.h
+++ b/arm/include/arm-common/kvm-arch.h
@@ -46,7 +46,10 @@
#define KVM_FLASH_MMIO_BASE (ARM_MMIO_AREA + 0x1000000)
#define KVM_FLASH_MAX_SIZE 0x1000000
-#define KVM_VIRTIO_MMIO_AREA (KVM_FLASH_MMIO_BASE + KVM_FLASH_MAX_SIZE)
+#define KVM_BPF_MMIO_BASE (KVM_FLASH_MMIO_BASE + KVM_FLASH_MAX_SIZE)
+#define KVM_BPF_MMIO_SIZE 0x10000
+
+#define KVM_VIRTIO_MMIO_AREA (KVM_BPF_MMIO_BASE + KVM_BPF_MMIO_SIZE)
#define ARM_VIRTIO_MMIO_SIZE (ARM_AXI_AREA - \
(KVM_VIRTIO_MMIO_AREA + ARM_GIC_SIZE))
diff --git a/bpf.c b/bpf.c
new file mode 100644
index 0000000..cf3fa52
--- /dev/null
+++ b/bpf.c
@@ -0,0 +1,369 @@
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <gelf.h>
+#include <libelf.h>
+
+#include "kvm/kvm.h"
+#include <linux/stringify.h>
+#include "kvm-bpf-elf.h"
+
+/* XXX: Replace with BPF_PROG_TYPE_STRUCT_OPS */
+#define BPF_PROG_TYPE_KVM_IO_READ (BPF_PROG_TYPE_SK_LOOKUP + 2)
+#define BPF_PROG_TYPE_KVM_IO_WRITE (BPF_PROG_TYPE_KVM_IO_READ + 1)
+
+struct kvm_bpf_device {
+ struct device_header dev_hdr;
+ const char *compatible;
+ struct kvm_bpf_user_region reg;
+};
+
+struct kvm_bpf_note_mmio_desc {
+ u32 subtype;
+
+ union {
+ /* NT_KVM_BPF_DEVICE_MMIO_DT_COMPATIBLE */
+ struct {
+ /* Embedded, NUL-terminated string */
+ const char str[1];
+ } compatible;
+
+ /* NT_KVM_BPF_DEVICE_MMIO_SIZE */
+ struct {
+ u32 val;
+ } size;
+ };
+} __attribute__((packed));
+
+static int bpf__create_device(struct kvm *kvm)
+{
+ struct kvm_create_device bpf_device = {
+ .type = KVM_DEV_TYPE_BPF,
+ };
+ int err;
+
+ err = ioctl(kvm->vm_fd, KVM_CREATE_DEVICE, &bpf_device);
+ if (err)
+ return err;
+
+ return bpf_device.fd;
+}
+
+#ifdef CONFIG_HAS_ELFUTILS_LIBELF
+static int bpf__parse_kvm_elf_note(const char *objfile,
+ struct kvm_bpf_device *dev)
+{
+ size_t nameoff, descoff, off, shstrndx;
+ int fd, ret = -EINVAL;
+ GElf_Nhdr nhdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *scn;
+ Elf *elf;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_err("Failed to initialise libelf\n");
+ goto out;
+ }
+
+ fd = open(objfile, O_RDONLY);
+ if (fd < 0) {
+ perror("bpf: open");
+ goto out;
+ }
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (!elf) {
+ pr_err("elf_begin() failed for %s\n", objfile);
+ goto out_close;
+ }
+
+ if (elf_kind(elf) != ELF_K_ELF) {
+ pr_err("%s is not an ELF object\n", objfile);
+ goto out_elf_end;
+ }
+
+ if (gelf_getclass(elf) != ELFCLASS64) {
+ pr_err("%s is not a 64-bit ELF object\n", objfile);
+ goto out_elf_end;
+ }
+
+ if (elf_getshdrstrndx(elf, &shstrndx)) {
+ pr_err("Failed to find string table in %s\n", objfile);
+ goto out_elf_end;
+ }
+
+ scn = NULL;
+ while ((scn = elf_nextscn(elf, scn))) {
+ const char *secname;
+
+ if (gelf_getshdr(scn, &shdr) != &shdr) {
+ pr_err("Failed to get section headers at index %zu\n",
+ elf_ndxscn(scn));
+ goto out_elf_end;
+ }
+
+ secname = elf_strptr(elf, shstrndx, shdr.sh_name);
+ if (!secname) {
+ pr_err("Failed to get section name at index %zu\n",
+ elf_ndxscn(scn));
+ goto out_elf_end;
+ }
+
+ if (!strcmp(secname, NT_KVM_BPF_SECTION_NAME))
+ break;
+ }
+
+ if (!scn) {
+ pr_err("No %s section found in %s\n",
+ NT_KVM_BPF_SECTION_NAME, objfile);
+ ret = 0;
+ goto out_elf_end;
+ }
+
+ data = elf_getdata(scn, NULL);
+ if (!data) {
+ pr_err("Failed to pull data from %s section\n",
+ NT_KVM_BPF_SECTION_NAME);
+ goto out_elf_end;
+ }
+
+ off = 0;
+ while ((off = gelf_getnote(data, off, &nhdr, &nameoff, &descoff))) {
+ const char *name = (char *)data->d_buf + nameoff;
+ struct kvm_bpf_note_mmio_desc *desc = data->d_buf + descoff;
+
+ if (strcmp(name, NT_KVM_BPF_DEVICE_NAME)) {
+ pr_warning("Ignoring unknown ELF note (found %s expected %s)\n",
+ name, NT_KVM_BPF_DEVICE_NAME);
+ continue;
+ }
+
+ if (nhdr.n_type != NT_KVM_BPF_DEVICE_PROP_TYPE_MMIO) {
+ pr_warning("Ignoring unknown ELF note type (found %u expected %u)\n",
+ nhdr.n_type, NT_KVM_BPF_DEVICE_PROP_TYPE_MMIO);
+ continue;
+ }
+
+ switch (desc->subtype) {
+ case NT_KVM_BPF_DEVICE_MMIO_DT_COMPATIBLE:
+ if (!dev->compatible)
+ dev->compatible = strdup(desc->compatible.str);
+ else
+ pr_warning("Ignoring duplicate MMIO_DT_COMPATIBLE in ELF note!\n");
+ break;
+ case NT_KVM_BPF_DEVICE_MMIO_SIZE:
+ if (!dev->reg.size)
+ dev->reg.size = desc->size.val;
+ else
+ pr_warning("Ignoring duplicate MMIO_SIZE in ELF note!\n");
+ break;
+ default:
+ pr_warning("Ignoring unknown MMIO note subtype %u\n",
+ desc->subtype);
+ }
+ }
+
+ ret = 0;
+out_elf_end:
+ elf_end(elf);
+out_close:
+ close(fd);
+out:
+ return ret;
+}
+#else
+static int bpf__parse_kvm_elf_note(const char *objfile,
+ struct kvm_bpf_device *dev)
+{
+ pr_info("libelf missing: skipping ELF note parsing\n");
+ return 0;
+}
+#endif
+
+static int bpf__load_progs(const char *objfile, struct kvm_bpf_device *dev)
+{
+ struct kvm_bpf_user_region *reg = &dev->reg;
+ struct bpf_program *rprog, *wprog;
+ struct bpf_object *obj;
+ int err;
+
+ err = bpf__parse_kvm_elf_note(objfile, dev);
+ if (err)
+ return err;
+
+ obj = bpf_object__open(objfile);
+ if (!obj) {
+ perror("bpf: bpf_object__open");
+ return -EINVAL;
+ }
+
+ rprog = bpf_object__find_program_by_name(obj, __stringify(KVM_BPF_READ_PROG_NAME));
+ if (!rprog) {
+ perror("bpf: bpf_object__find_program_by_name (read)");
+ return -EINVAL;
+ }
+
+ err = bpf_program__set_type(rprog, BPF_PROG_TYPE_KVM_IO_READ);
+ if (err)
+ return err;
+
+ wprog = bpf_object__find_program_by_name(obj, __stringify(KVM_BPF_WRITE_PROG_NAME));
+ if (!wprog) {
+ perror("bpf: bpf_object__find_program_by_name (write)");
+ return -EINVAL;
+ }
+
+ err = bpf_program__set_type(wprog, BPF_PROG_TYPE_KVM_IO_WRITE);
+ if (err)
+ return err;
+
+ err = bpf_object__load(obj);
+ if (err)
+ return err;
+
+ reg->bpf_readfd = bpf_program__fd(rprog);
+ reg->bpf_writefd = bpf_program__fd(wprog);
+ return 0;
+}
+
+static int bpf__allocate_mmio_region(struct kvm *kvm,
+ struct kvm_bpf_user_region *reg)
+{
+ reg->addr = KVM_BPF_MMIO_BASE;
+ return 0;
+}
+
+static int bpf__attach_progs(int devfd, struct kvm_bpf_user_region *reg)
+{
+ struct kvm_device_attr bpf_dev_attr = {
+ .group = KVM_DEV_BPF_ATTR_GROUP_REGION,
+ .attr = 0, /* Region index */
+ .addr = (u64)(unsigned long)reg,
+ };
+ int err;
+
+ err = ioctl(devfd, KVM_SET_DEVICE_ATTR, &bpf_dev_attr);
+ return err;
+}
+
+#ifdef CONFIG_HAS_LIBFDT
+static u32 get_amba_clock(void *fdt)
+{
+ static bool created;
+
+ if (!created) {
+ _FDT(fdt_begin_node(fdt, "clock"));
+ _FDT(fdt_property_string(fdt, "compatible", "fixed-clock"));
+ _FDT(fdt_property_cell(fdt, "#clock-cells", 0));
+ _FDT(fdt_property_cell(fdt, "clock-frequency", 100000000));
+ _FDT(fdt_property_string(fdt, "clock-output-names", "apb_pclk"));
+ _FDT(fdt_property_cell(fdt, "phandle", PHANDLE_CLK));
+ _FDT(fdt_end_node(fdt));
+ created = true;
+ }
+
+ return PHANDLE_CLK;
+}
+
+static void generate_bpf_fdt_node(void *fdt,
+ struct device_header *dev_hdr,
+ void (*generate_irq_prop)(void *fdt,
+ u8 irq,
+ enum irq_type))
+{
+ struct kvm_bpf_device *dev = container_of(dev_hdr,
+ struct kvm_bpf_device,
+ dev_hdr);
+ bool is_amba_device = !strcmp(dev->compatible, "arm,primecell");
+ u64 reg_prop[2] = { cpu_to_fdt64(dev->reg.addr),
+ cpu_to_fdt64(dev->reg.size) };
+ u32 clk_phandle;
+
+ if (is_amba_device)
+ clk_phandle = get_amba_clock(fdt);
+
+ _FDT(fdt_begin_node(fdt, "bpf"));
+ _FDT(fdt_property_string(fdt, "compatible", dev->compatible));
+ _FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
+
+ if (is_amba_device) {
+ _FDT(fdt_property_cell(fdt, "clocks", clk_phandle));
+ _FDT(fdt_property_string(fdt, "clock-names", "apb_pclk"));
+ }
+
+ _FDT(fdt_end_node(fdt));
+}
+
+#else
+#define generate_bpf_fdt_node NULL
+#endif
+
+static int bpf__device_register(struct kvm *kvm, struct kvm_bpf_device *dev)
+{
+ int err;
+
+ dev->dev_hdr = (struct device_header) {
+ .bus_type = DEVICE_BUS_MMIO,
+ .data = generate_bpf_fdt_node,
+ };
+
+ err = device__register(&dev->dev_hdr);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int bpf__init(struct kvm *kvm)
+{
+ static struct kvm_bpf_device dev = { };
+
+ const char *bpf_objfile = kvm->cfg.bpf_filename;
+ int err = 0, devfd;
+
+ if (!bpf_objfile)
+ return 0;
+
+ err = libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS |
+ LIBBPF_STRICT_CLEAN_PTRS);
+ if (err) {
+ pr_err("Failed to initialise libbpf (%d).\n", err);
+ goto out;
+ }
+
+ err = bpf__load_progs(bpf_objfile, &dev);
+ if (err) {
+ pr_err("Failed to load BPF programs (%d).\n", err);
+ goto out;
+ }
+
+ err = bpf__create_device(kvm);
+ if (err < 0) {
+ pr_err("Failed to create BPF device (%d).\n", err);
+ goto out;
+ }
+ devfd = err;
+
+ err = bpf__allocate_mmio_region(kvm, &dev.reg);
+ if (err) {
+ pr_err("Failed to allocate space in MMIO region (%d).\n", err);
+ goto out;
+ }
+
+ err = bpf__attach_progs(devfd, &dev.reg);
+ if (err) {
+ pr_err("Failed to attach BPF programs to device (%d).\n", err);
+ goto out;
+ }
+
+ err = bpf__device_register(kvm, &dev);
+ if (err) {
+ pr_err("Failed to register BPF device (%d).\n", err);
+ goto out;
+ }
+
+out:
+ return err;
+}
+dev_base_init(bpf__init);
diff --git a/builtin-run.c b/builtin-run.c
index bd0d0b9..8653817 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -168,6 +168,14 @@
#define VIRTIO_TRANS_OPT_HELP_SHORT "[pci|pci-legacy]"
#endif
+#ifdef CONFIG_HAS_LIBBPF
+#define BPF_OPT(cfg) OPT_STRING('\0', "bpf", &(cfg)->bpf_filename, \
+ "bpf object", \
+ "BPF ELF file to load into an MMIO device"),
+#else
+#define BPF_OPT(...)
+#endif
+
#define BUILD_OPTIONS(name, cfg, kvm) \
struct option name[] = { \
OPT_GROUP("Basic options:"), \
@@ -215,6 +223,7 @@
VIRTIO_TRANS_OPT_HELP_SHORT, \
"Type of virtio transport", \
virtio_transport_parser, NULL), \
+ BPF_OPT(cfg) \
\
OPT_GROUP("Kernel options:"), \
OPT_STRING('k', "kernel", &(cfg)->kernel_filename, "kernel", \
diff --git a/config/feature-tests.mak b/config/feature-tests.mak
index 03cdb42..8d82122 100644
--- a/config/feature-tests.mak
+++ b/config/feature-tests.mak
@@ -23,11 +23,13 @@
endef
endif
-define SOURCE_LIBELF
+define SOURCE_ELFUTILS_LIBELF
+#include <gelf.h>
#include <libelf.h>
int main(void)
{
+ GElf_Nhdr nhdr;
Elf *elf = elf_begin(0, ELF_C_READ, 0);
return (long)elf;
}
@@ -206,3 +208,13 @@
return 0;
}
endef
+
+define SOURCE_LIBBPF
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ return 0;
+}
+endef
diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
index 592b035..24cd8a1 100644
--- a/include/kvm/kvm-config.h
+++ b/include/kvm/kvm-config.h
@@ -52,6 +52,7 @@
const char *hugetlbfs_path;
const char *custom_rootfs_name;
const char *real_cmdline;
+ const char *bpf_filename;
struct virtio_net_params *net_params;
bool single_step;
bool vnc;
diff --git a/kvm-bpf-elf.h b/kvm-bpf-elf.h
new file mode 100644
index 0000000..39d47cf
--- /dev/null
+++ b/kvm-bpf-elf.h
@@ -0,0 +1,23 @@
+#ifndef __KVM_BPF_ELF_H
+#define __KVM_BPF_ELF_H
+
+/* BPF program names */
+#define KVM_BPF_READ_PROG_NAME kvm_io_read
+#define KVM_BPF_WRITE_PROG_NAME kvm_io_write
+
+/* Name of the ELF PT_NOTE section */
+#define NT_KVM_BPF_SECTION_NAME ".note.kvm-bpf.mmio-device"
+
+/* Owner name, as per ELF specification of Note sections */
+#define NT_KVM_BPF_DEVICE_NAME "kvm-bpf"
+
+/* Descriptor type for MMIO devices */
+#define NT_KVM_BPF_DEVICE_PROP_TYPE_MMIO 0
+
+/* MMIO: DT compatible string */
+#define NT_KVM_BPF_DEVICE_MMIO_DT_COMPATIBLE 0
+
+/* MMIO: region size */
+#define NT_KVM_BPF_DEVICE_MMIO_SIZE 1
+
+#endif /* __KVM_BPF_ELF_H */