kvm-unit-tests: Gtests Framework and Example Tests
Test framework for KVM and the Linux kernel that enables standalone
tests to exercise host, guest, and nested guest functionality without
requiring a separate .flat file. The framework and tests are written
in C and assembly.
Signed-off-by: Ken Hofsass <hofsass@google.com>
Message-Id: <20180125184803.228086-1-hofsass@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/configure b/configure
index dd9d361..3739238 100755
--- a/configure
+++ b/configure
@@ -142,6 +142,9 @@
fi
rm -f lib-test.c
fi
+if [ "$arch" = "x86_64" ]; then
+ gtests=yes
+fi
# Are we in a separate build tree? If so, link the Makefile
# and shared stuff so that 'make' and run_tests.sh work.
@@ -187,6 +190,7 @@
AR=$cross_prefix$ar
ADDR2LINE=$cross_prefix$addr2line
API=$api
+GTESTS=$gtests
TEST_DIR=$testdir
FIRMWARE=$firmware
ENDIAN=$endian
diff --git a/gtests/.gitignore b/gtests/.gitignore
new file mode 100644
index 0000000..7785445
--- /dev/null
+++ b/gtests/.gitignore
@@ -0,0 +1,2 @@
+tests/set_sregs_test
+tests/vmx_tsc_adjust_test
diff --git a/gtests/include/kvm_util.h b/gtests/include/kvm_util.h
new file mode 100644
index 0000000..9674fc5
--- /dev/null
+++ b/gtests/include/kvm_util.h
@@ -0,0 +1,346 @@
+/*
+ * gtests/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#include <stdio.h>
+#include <inttypes.h>
+
+#ifdef __LINUX_KVM_H
+#error "Do not #include <linux/kvm.h> in the kvm tests"
+#endif
+
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+
+#include "test_sparsebit.h"
+
+#define KVM_DEV_PATH "/dev/kvm"
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+
+/*
+ * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
+ * created. Only applies to VMs using EPT.
+ */
+#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
+
+
+/* Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_util_vm;
+typedef struct kvm_util_vm kvm_util_vm_t;
+
+typedef void *host_vaddr_t; /* Host virtual address */
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_UTIL_MIN_PADDR 0x5000
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_UTIL_VIRT_MIN_PADDR 0x180000
+
+#define KVM_UTIL_MAGIC001 0x3a60
+#define KVM_UTIL_MAGIC002 0x5c70
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum guest_mode {
+ VM_MODE_FLAT48PG = KVM_UTIL_MAGIC001,
+};
+
+enum vm_mem_backing_src_type {
+ VM_MEM_SRC_CALLER_MAINTAINED = KVM_UTIL_MAGIC002,
+ VM_MEM_SRC_ANONYMOUS,
+ VM_MEM_SRC_ANONYMOUS_THP,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ VM_MEM_SRC_DIR,
+ VM_MEM_SRC_FD_PRIVATE,
+ VM_MEM_SRC_PMEM_HUGE,
+ VM_MEM_SRC_PMEM_SMALL,
+};
+struct vm_mem_backing_src {
+ enum vm_mem_backing_src_type type;
+ union {
+ struct { /* VM_MEM_SRC_CALLER_MAINTAINED */
+ void *mem_start;
+ } caller_maintained;
+
+ struct { /* VM_MEM_SRC_DIR */
+ const char *path;
+ } dir;
+
+ struct { /* VM_MEM_SRC_FD_PRIVATE */
+ int fd;
+ off_t offset;
+ } fd_private;
+ struct { /* VM_MEM_SRC_PMEM_{HUGE, SMALL} */
+ int pmem_fd;
+ } pmem;
+ };
+};
+struct vm_mem_backing_src_alias {
+ const char *name;
+ enum vm_mem_backing_src_type type;
+};
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias_first(void);
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias_next(
+ const struct vm_mem_backing_src_alias *current);
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias_find(
+ const char *name);
+void vm_mem_backing_src_alias_setup(const char *alias_name,
+ struct vm_mem_backing_src *backing_src);
+void vm_mem_backing_src_alias_cleanup(
+ const struct vm_mem_backing_src *backing_src);
+void vm_mem_backing_src_caller_maintained(struct vm_mem_backing_src *out,
+ void *mem_start);
+void vm_mem_backing_src_anonymous(struct vm_mem_backing_src *out);
+void vm_mem_backing_src_anonymous_thp(struct vm_mem_backing_src *out);
+void vm_mem_backing_src_anonymous_hugetlb(struct vm_mem_backing_src *out);
+void vm_mem_backing_src_dir(struct vm_mem_backing_src *out,
+ const char *path);
+void vm_mem_backing_src_fd(struct vm_mem_backing_src *out,
+ int fd, off_t offset);
+bool vm_mem_backing_src_uses_lpages(enum vm_mem_backing_src_type type);
+
+int kvm_util_cap(long cap);
+
+kvm_util_vm_t *vm_create(enum guest_mode mode, uint64_t phy_pages, int perm);
+int vm_fd(const kvm_util_vm_t *vm);
+void kvm_util_vm_free(kvm_util_vm_t **vmpp);
+
+int kvm_util_memcmp_hvirt_gvirt(const host_vaddr_t hvirt,
+ const kvm_util_vm_t *vm, const vm_vaddr_t vmvirt, size_t len);
+
+void kvm_util_vm_elf_load(kvm_util_vm_t *vm, const char *filename,
+ uint32_t data_memslot, uint32_t vttbl_memslot);
+
+void vm_dump(FILE *stream, const kvm_util_vm_t *vm, uint8_t indent);
+void vcpu_dump(FILE *stream, const kvm_util_vm_t *vm,
+ uint32_t vcpuid, uint8_t indent);
+void regs_dump(FILE *stream, const struct kvm_regs *regs,
+ uint8_t indent);
+void segment_dump(FILE *stream, const struct kvm_segment *segment,
+ uint8_t indent);
+void dtable_dump(FILE *stream, const struct kvm_dtable *dtable,
+ uint8_t indent);
+void sregs_dump(FILE *stream, const struct kvm_sregs *sregs,
+ uint8_t indent);
+
+int vm_clock_get(const kvm_util_vm_t *vm, struct kvm_clock_data *clockp);
+int vm_clock_set(kvm_util_vm_t *vm, const struct kvm_clock_data *clockp);
+
+void vm_create_irqchip(kvm_util_vm_t *vm);
+
+void vm_userspace_mem_region_add(kvm_util_vm_t *vm,
+ struct vm_mem_backing_src *backing_src,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(kvm_util_vm_t *vm,
+ uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vm_ioctl(kvm_util_vm_t *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(kvm_util_vm_t *vm, uint32_t slot, uint32_t flags);
+void vm_vcpu_add(kvm_util_vm_t *vm, uint32_t vcpuid);
+void vm_vcpu_rm(kvm_util_vm_t *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(kvm_util_vm_t *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t vttbl_memslot);
+vm_vaddr_t vm_vaddr_unused_gap(const kvm_util_vm_t *vm, size_t sz,
+ vm_vaddr_t vaddr_min);
+host_vaddr_t addr_vmphy2hvirt(const kvm_util_vm_t *vm, vm_paddr_t vmphy);
+host_vaddr_t addr_vmvirt2hvirt(const kvm_util_vm_t *vm, vm_vaddr_t vmvirt);
+vm_paddr_t addr_hvirt2vmphy(const kvm_util_vm_t *vm, host_vaddr_t hvirt);
+vm_paddr_t addr_vmvirt2vmphy(const kvm_util_vm_t *vm, vm_vaddr_t vmvirt);
+
+struct kvm_run *vcpu_state(const kvm_util_vm_t *vm, uint32_t vcpuid);
+void vcpu_run(kvm_util_vm_t *vm, uint32_t vcpuid);
+void vcpu_set_mp_state(kvm_util_vm_t *vm, uint32_t vcpuid,
+ const struct kvm_mp_state *mp_state);
+void vcpu_regs_get(const kvm_util_vm_t *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(kvm_util_vm_t *vm,
+ uint32_t vcpuid, const struct kvm_regs *regs);
+void vcpu_args_set(kvm_util_vm_t *vm, uint32_t vcpuid, unsigned int num, ...);
+void vcpu_sregs_get(const kvm_util_vm_t *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_set(kvm_util_vm_t *vm,
+ uint32_t vcpuid, const struct kvm_sregs *sregs);
+void vcpu_xcrs_get(kvm_util_vm_t *vm,
+ uint32_t vcpuid, struct kvm_xcrs *xcrs);
+void vcpu_xcrs_set(kvm_util_vm_t *vm,
+ uint32_t vcpuid, const struct kvm_xcrs *xcrs);
+void vcpu_events_get(const kvm_util_vm_t *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(kvm_util_vm_t *vm, uint32_t vcpuid,
+ const struct kvm_vcpu_events *events);
+
+const char *exit_reason_str(unsigned int exit_reason);
+int exit_reason_val(const char *name);
+void exit_reasons_list(FILE *stream, unsigned int indent);
+
+void virt_pg_map(kvm_util_vm_t *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t vttbl_memslot);
+void virt_dump(FILE *stream, const kvm_util_vm_t *vm, uint8_t indent);
+void setUnusableSegment(struct kvm_segment *segp);
+void setLongModeFlatKernelCodeSegment(uint16_t selector,
+ struct kvm_segment *segp);
+void setLongModeFlatKernelDataSegment(uint16_t selector,
+ struct kvm_segment *segp);
+
+uint64_t vm_read_proc_field(const char *name);
+uint64_t vcpu_read_proc_field(const char *name, int index);
+void vm_read_proc_array(const char *name, uint64_t *out, int len);
+void vcpu_read_proc_array(const char *name, int index, uint64_t *out, int len);
+
+int get_num_metrics(unsigned long kind);
+uint64_t vm_get_metric(const kvm_util_vm_t *vm, uint32_t id);
+uint64_t vcpu_get_metric(const kvm_util_vm_t *vm, uint32_t vcpu_index,
+ uint32_t id);
+void vcpu_get_metric_array(const kvm_util_vm_t *vm, uint32_t vcpu_index,
+ uint32_t start_id, uint64_t *out, int len);
+void vm_get_metric_array(const kvm_util_vm_t *vm, uint32_t start_id,
+ uint64_t *out, int len);
+
+#define __ASSERT_STAT_ARRAY(desc, op) do { \
+ int found = 0; \
+ int i; \
+ int start = _start >= 0 ? _start : _len + _start; \
+ /* _len, _data, _val defined by caller */ \
+ \
+ for (i = start; i < _len; i++) { \
+ if (_data[i] op _val) \
+ found += 1; \
+ else if (_all) \
+ fprintf(stderr, "%s data[%d]=%" PRIu64 "\n", desc, i, \
+ _data[i]); \
+ } \
+ if (_all) { \
+ int required = _len - start; \
+ \
+ TEST_ASSERT(found == required, \
+ "Only %d of %d in %s data were %s %" PRIu64, \
+ found, required, desc, #op, _val); \
+ } \
+ else \
+ TEST_ASSERT(found, "Nothing in %s data was %s %" PRIu64, \
+ desc, #op, _val); \
+} while (0)
+
+#define __ASSERT_VCPU_STAT(all, op, val, vm, vcpu, proc_name, metric_id, len, \
+ start) \
+do { \
+ uint64_t _val = (val); \
+ kvm_util_vm_t *_vm = (vm); \
+ uint32_t _vcpu = (vcpu); \
+ int _len = (len); \
+ uint64_t _data[_len]; \
+ bool _all = (all); \
+ int _start = (start); \
+ \
+ vcpu_get_metric_array(_vm, _vcpu, (metric_id), _data, _len); \
+ __ASSERT_STAT_ARRAY("metric", op); \
+ \
+ vcpu_read_proc_array((proc_name), _vcpu, _data, _len); \
+ __ASSERT_STAT_ARRAY("proc", op); \
+} while (0)
+
+#define __ASSERT_VM_STAT(all, op, val, vm, proc_name, metric_id, len, start) \
+do { \
+ uint64_t _val = (val); \
+ kvm_util_vm_t *_vm = (vm); \
+ int _len = (len); \
+ uint64_t _data[_len]; \
+ bool _all = (all); \
+ int _start = (start); \
+ \
+ vm_get_metric_array(_vm, (metric_id), _data, _len); \
+ __ASSERT_STAT_ARRAY("metric", op); \
+ \
+ vm_read_proc_array((proc_name), _data, _len); \
+ __ASSERT_STAT_ARRAY("proc", op); \
+} while (0)
+
+/* vm == */
+#define ASSERT_VM_STAT_ALL_EQ(val, vm, name, metric, len) \
+ __ASSERT_VM_STAT(true, ==, val, vm, name, metric, len, 0)
+#define ASSERT_VM_STAT_SOME_EQ(val, vm, name, metric, len) \
+ __ASSERT_VM_STAT(false, ==, val, vm, name, metric, len, 0)
+#define ASSERT_VM_STAT_EQ(val, vm, name, metric) \
+ __ASSERT_VM_STAT(true, ==, val, vm, name, metric, 1, 0)
+#define ASSERT_VM_STAT_ELEM_EQ(val, vm, name, metric, i) \
+ __ASSERT_VM_STAT(true, ==, val, vm, name, metric, (i + 1), -1)
+
+/* vm > */
+#define ASSERT_VM_STAT_ALL_GT(val, vm, name, metric, len) \
+ __ASSERT_VM_STAT(true, >, val, vm, name, metric, len, 0)
+#define ASSERT_VM_STAT_SOME_GT(val, vm, name, metric, len) \
+ __ASSERT_VM_STAT(false, >, val, vm, name, metric, len, 0)
+#define ASSERT_VM_STAT_GT(val, vm, name, metric) \
+ __ASSERT_VM_STAT(true, >, val, vm, name, metric, 1, 0)
+#define ASSERT_VM_STAT_ELEM_GT(val, vm, name, metric, i) \
+ __ASSERT_VM_STAT(true, >, val, vm, name, metric, (i + 1), -1)
+
+/* vcpu == */
+#define ASSERT_VCPU_STAT_ALL_EQ(val, vm, vcpu, name, metric, len) \
+ __ASSERT_VCPU_STAT(true, ==, val, vm, vcpu, name, metric, len, 0)
+#define ASSERT_VCPU_STAT_SOME_EQ(val, vm, vcpu, name, metric, len) \
+ __ASSERT_VCPU_STAT(false, ==, val, vm, vcpu, name, metric, len, 0)
+#define ASSERT_VCPU_STAT_EQ(val, vm, vcpu, name, metric) \
+ __ASSERT_VCPU_STAT(true, ==, val, vm, vcpu, name, metric, 1, 0)
+#define ASSERT_VCPU_STAT_ELEM_EQ(val, vm, vcpu, name, metric, i) \
+ __ASSERT_VCPU_STAT(true, ==, val, vm, vcpu, name, metric, (i + 1), -1)
+
+/* vcpu > */
+#define ASSERT_VCPU_STAT_ALL_GT(val, vm, vcpu, name, metric, len) \
+ __ASSERT_VCPU_STAT(true, >, val, vm, vcpu, name, metric, len, 0)
+#define ASSERT_VCPU_STAT_SOME_GT(val, vm, vcpu, name, metric, len) \
+ __ASSERT_VCPU_STAT(false, >, val, vm, vcpu, name, metric, len, 0)
+#define ASSERT_VCPU_STAT_GT(val, vm, vcpu, name, metric) \
+ __ASSERT_VCPU_STAT(true, >, val, vm, vcpu, name, metric, 1, 0)
+#define ASSERT_VCPU_STAT_ELEM_GT(val, vm, vcpu, name, metric, i) \
+ __ASSERT_VCPU_STAT(true, >, val, vm, vcpu, name, metric, (i + 1), -1)
+
+void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid);
+void vcpu_set_cpuid(
+ kvm_util_vm_t *vm, uint32_t vcpuid, const struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid2 *allocate_kvm_cpuid2(void);
+struct kvm_cpuid_entry2 *
+find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function)
+{
+ return find_cpuid_index_entry(cpuid, function, 0);
+}
+
+int vcpu_fd(const kvm_util_vm_t *vm, uint32_t vcpuid);
+
+kvm_util_vm_t *vm_create_default(uint32_t vcpuid, void *guest_code);
+void vm_vcpu_add_default(kvm_util_vm_t *vm, uint32_t vcpuid, void *guest_code);
+
+typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
+ vm_paddr_t vmxon_paddr,
+ vm_vaddr_t vmcs_vaddr,
+ vm_paddr_t vmcs_paddr);
+
+kvm_util_vm_t *
+vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code);
+
+const struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(const kvm_util_vm_t *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(const struct kvm_userspace_memory_region *region);
+
+int vm_create_device(const kvm_util_vm_t *vm, struct kvm_create_device *cd);
diff --git a/gtests/include/test_sparsebit.h b/gtests/include/test_sparsebit.h
new file mode 100644
index 0000000..d2c190d
--- /dev/null
+++ b/gtests/include/test_sparsebit.h
@@ -0,0 +1,82 @@
+/*
+ * gtests/include/test_sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Header file that describes API to the test_sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t. Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other. This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef _TEST_SPARSEBIT_H_
+#define _TEST_SPARSEBIT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct test_sparsebit;
+typedef struct test_sparsebit test_sparsebit_t;
+typedef uint64_t test_sparsebit_idx_t;
+typedef uint64_t test_sparsebit_num_t;
+
+test_sparsebit_t *test_sparsebit_alloc(void);
+void test_sparsebit_free(test_sparsebit_t **sbitp);
+void test_sparsebit_copy(test_sparsebit_t *dstp, const test_sparsebit_t *src);
+
+bool test_sparsebit_is_set(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx);
+bool test_sparsebit_is_set_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx, test_sparsebit_num_t num);
+bool test_sparsebit_is_clear(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx);
+bool test_sparsebit_is_clear_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx, test_sparsebit_num_t num);
+test_sparsebit_num_t test_sparsebit_num_set(const test_sparsebit_t *sbit);
+bool test_sparsebit_any_set(const test_sparsebit_t *sbit);
+bool test_sparsebit_any_clear(const test_sparsebit_t *sbit);
+bool test_sparsebit_all_set(const test_sparsebit_t *sbit);
+bool test_sparsebit_all_clear(const test_sparsebit_t *sbit);
+test_sparsebit_idx_t test_sparsebit_first_set(const test_sparsebit_t *sbit);
+test_sparsebit_idx_t test_sparsebit_first_clear(const test_sparsebit_t *sbit);
+test_sparsebit_idx_t test_sparsebit_next_set(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t prev);
+test_sparsebit_idx_t test_sparsebit_next_clear(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t prev);
+test_sparsebit_idx_t test_sparsebit_next_set_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t start, test_sparsebit_num_t num);
+test_sparsebit_idx_t test_sparsebit_next_clear_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t start, test_sparsebit_num_t num);
+
+void test_sparsebit_set(test_sparsebit_t *sbitp, test_sparsebit_idx_t idx);
+void test_sparsebit_set_num(test_sparsebit_t *sbitp, test_sparsebit_idx_t start,
+ test_sparsebit_num_t num);
+void test_sparsebit_set_all(test_sparsebit_t *sbitp);
+
+void test_sparsebit_clear(test_sparsebit_t *sbitp, test_sparsebit_idx_t idx);
+void test_sparsebit_clear_num(test_sparsebit_t *sbitp,
+ test_sparsebit_idx_t start, test_sparsebit_num_t num);
+void test_sparsebit_clear_all(test_sparsebit_t *sbitp);
+
+void test_sparsebit_dump(FILE *stream, const test_sparsebit_t *sbit,
+ unsigned int indent);
+void test_sparsebit_dump_internal(FILE *stream, const test_sparsebit_t *sbit,
+ unsigned int indent);
+void test_sparsebit_validate_internal(const test_sparsebit_t *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/gtests/include/test_util.h b/gtests/include/test_util.h
new file mode 100644
index 0000000..6002fa1
--- /dev/null
+++ b/gtests/include/test_util.h
@@ -0,0 +1,602 @@
+/*
+ * gtests/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef _GTESTS_TEST_UTIL_H
+#define _GTESTS_TEST_UTIL_H
+
+#include <errno.h>
+#include <regex.h>
+#include <signal.h> /* For siginfo_t */
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h> /* For struct timespec */
+#include <unistd.h>
+
+#include <sys/wait.h>
+#include <sys/types.h>
+
+#include <linux/capability.h>
+#include <linux/elf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For portability with use of __func__. */
+#if __STDC_VERSION__ < 199901L
+# if __GNUC__ >= 2
+# define __func__ __FUNCTION__
+# else
+# define __func__ "<unknown>"
+# endif
+#endif
+
+#define TEST_UTIL_SUCCESS 0
+#define TEST_UTIL_SYNTAX_ERR 1
+#define TEST_UTIL_VALUE_ERR 2
+
+#define TEST_MALLOC_PROT_BEFORE (1 << 0)
+#define TEST_MALLOC_PROT_AFTER (1 << 1)
+#define TEST_MALLOC_ALIGN (1 << 2)
+#define TEST_MALLOC_ALLOW_PROT_CHG (1 << 3)
+#define TEST_MALLOC_MMAP_FD (1 << 4)
+#define TEST_MALLOC_MMAP_FD_OFFSET (TEST_MALLOC_MMAP_FD | (1 << 5))
+
+enum test_malloc_ctl_cmd {
+ CHG_PROT, /* Change the protections of test_malloc'd memory.
+ * Memory must have been allocated with the
+ * TEST_MALLOC_PROT_CHG flag in order to use this command. */
+ GET_FLAGS, /* Retrieve flags with which memory was test_malloc'd. */
+};
+
+struct test_rng {
+ uint64_t low;
+ uint64_t high; /* Inclusive */
+};
+
+struct test_symb {
+ const char *name;
+ uint64_t val;
+};
+struct test_symbi {
+ const char *name;
+ int64_t val;
+};
+
+/* Wraps information retrieved from the proc/<pid>/maps
+ * file concerning a specific mapped memory address.
+ */
+struct test_pg_info {
+ uint64_t start; /* The starting address of the mapping, inclusive. */
+ uint64_t end; /* The ending address of the mapping, inclusive. */
+ size_t size; /* The size of the mapping. */
+ int prot; /* The protections of the mapping,
+ * one or more of PROT_READ, PROT_WRITE, PROT_EXEC,
+ * or PROT_NONE as defined in sys/mman.h. */
+ bool shared; /* Whether the mapping is shared or private. */
+};
+
+typedef struct __user_cap_data_struct *test_cap_t;
+typedef enum {
+ TEST_CAP_EFFECTIVE = 0x2bc0,
+ TEST_CAP_PERMITTED,
+ TEST_CAP_INHERITABLE,
+} test_cap_group_t;
+
+extern const struct test_symb test_symb_infinity[];
+extern const struct test_symb test_known_errno[];
+extern const struct test_symb test_known_sig[];
+
+char *test_get_opt_str(const char *arg1, char *args[]);
+int test_parse_i64(const char *str, int64_t *val, int64_t min,
+ int64_t max, const struct test_symbi symb[]);
+int test_parse_u32(const char *str, uint32_t *val, uint32_t max,
+ const struct test_symb symb[]);
+int test_parse_u64(const char *str, uint64_t *val, uint64_t max,
+ const struct test_symb symb[]);
+int test_parse_float(const char *str, float *val);
+int test_parse_rngs(const char *str, struct test_rng **rngs, unsigned int *num,
+ uint64_t max, const struct test_symb symb[]);
+char *test_rngs2str(const struct test_rng *rngs, unsigned int num,
+ unsigned int radix);
+bool test_rngs_idx_isset(unsigned long long idx, const struct test_rng *rngs,
+ unsigned int num);
+void test_rngs_idx_set(unsigned long long idx, struct test_rng **rngs,
+ unsigned int *num);
+
+char *test_dyn_sprintf(const char *fmt, ...);
+/* Don't inline so we can omit from stack dumps. See test_dump_stack. */
+void __attribute__((noinline)) __attribute__ ((format (printf, 5, 6)))
+ test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...);
+uint32_t test_rand32(void);
+bool test_rand_bool(void);
+uint32_t test_rand32_mod(uint32_t mod);
+unsigned int test_rand_choice(unsigned int num, const float weights[]);
+
+void test_delay(double amt);
+void test_delay_ts(const struct timespec *amt);
+int test_delay_until(const struct timespec *end, pid_t pid);
+double test_ts2double(const struct timespec *val);
+struct timespec test_double2ts(double amt);
+
+/* Current implementation of test_ts_delta requires *second >= *first */
+struct timespec test_ts_delta(const struct timespec *first,
+ const struct timespec *second);
+void test_ts_sum(struct timespec *sum, const struct timespec *t1,
+ const struct timespec *t2);
+/* Current implementation of test_ts_minums requires *t1 >= * t2 */
+void test_ts_minus(struct timespec *minus, const struct timespec *t1,
+ const struct timespec *t2);
+int test_ts_cmp(const struct timespec *t1, const struct timespec *t2);
+
+char *test_debugfs_mnt_point(void);
+void test_dump_siginfo(FILE *file, siginfo_t *sig);
+
+uint64_t test_tsc_freq(int cpu);
+void test_xdump(FILE *stream, const void *buf, size_t size,
+ intptr_t addr_start, uint8_t indent);
+
+char *test_config_str(const char *name);
+
+int test_cap_get(pid_t pid, test_cap_t *cap);
+int test_cap_set(pid_t pid, const test_cap_t *cap);
+bool test_cap_flag_fetch(const test_cap_t *cap, test_cap_group_t group,
+ unsigned int trait);
+void test_cap_flag_assign(test_cap_t *cap, test_cap_group_t group,
+ unsigned int trait, bool rval);
+
+float test_sgniff(long double expected, float actual);
+float test_sgnif(long double expected, double actual);
+float test_sgnifl(long double expected, long double actual);
+
+int test_pg_info(pid_t pid, uint64_t addr,
+ struct test_pg_info *info);
+int test_pg_info_map(const char *map, uint64_t addr,
+ struct test_pg_info *info);
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void *test_malloc(size_t size, uint32_t flags, ...);
+void test_malloc_free(void *addr);
+void test_malloc_chg_prot(const void *addr, int prot);
+uint32_t test_malloc_get_flags(const void *addr);
+
+#define TEST_ASSERT(e, fmt, ...) \
+ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+/* Shorthand for TEST_ASSERT(e, "%s", "") */
+#define ASSERT(e) \
+ TEST_ASSERT(e, "ASSERT(%s) failed.", #e)
+
+#define ASSERT_EQ(a, b) do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ TEST_ASSERT(__a == __b, \
+ "ASSERT_EQ(%s, %s) failed.\n" \
+ "\t%s is %#lx\n" \
+ "\t%s is %#lx", \
+ #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+int __attribute__ ((format (printf, 1, 2))) test_printk(const char *fmt, ...);
+
+struct test_elfsymb {
+ uintmax_t value;
+ size_t size;
+};
+void test_elfhdr_get(const char *filename, Elf64_Ehdr *hdrp);
+int test_elfsymb_get(const char *filename, const char *name,
+ struct test_elfsymb *symbp);
+
+void extract_pageflags(void *addr, unsigned int count, uint64_t *buffer);
+
+/* Architecture dependent inline functions.
+ *
+ * For each architecutre the following inline functions are provided:
+ *
+ * void test_barrier_read(void)
+ * Delay until the current processor has completed all outstanding
+ * cache coherence read operations.
+ *
+ * void test_barrier_write(void)
+ * Delay until the current processor has completed all outstanding
+ * cache coherence write operations.
+ *
+ * void test_barrier_read_write(void)
+ * Delay until the current processor has completed all outstanding
+ * cache coherence read and write operations.
+ *
+ * void test_serialize(void)
+ * Delay until the current processor has completed all outstanding
+ * operations. At a minimum, this includes the following:
+ *
+ * + Background cache coherence read operations.
+ * + Background cache coherence write operations.
+ * + Flush instruction pipeline.
+ *
+ * uint64_t test_rdtsc(bool skip_isync)
+ * Reads the processors time-stamp counter and returns its value.
+ * Each implementation assures that instructions before reading
+ * the time-stamp counter have completed (e.g. instruction pipe-line
+ * flush), although there may be pending backgroud operations
+ * (e.g. backgroup cache-coherence operations). Note, an instruction
+ * pipe-line flush is not performed after reading the time-stamp
+ * counter, because it is assumed that there is a time-stamp counter
+ * per processor and thus it is not a shared resource, which could
+ * delay the ability to read the counter value.
+ */
+#if defined(__x86_64__)
+static inline void test_barrier_read(void)
+{
+ __asm__ __volatile__("lfence" : : : "memory");
+}
+
+static inline void test_barrier_write(void)
+{
+ __asm__ __volatile__("sfence" : : : "memory");
+}
+
+static inline void test_barrier_read_write(void)
+{
+ __asm__ __volatile__("mfence" : : : "memory");
+}
+
+static inline void test_serialize(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ __asm__ __volatile__ (
+ "cpuid\n"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a" (0), "c" (0)
+ : "memory"
+ );
+}
+
+static inline uint64_t test_rdtsc(void)
+{
+ uint32_t low, high;
+
+ asm volatile("mfence; lfence");
+ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
+ asm volatile("lfence");
+
+ return ((uint64_t) high << 32) | low;
+}
+
+
+#elif defined(__PPC64__)
+
+static inline void test_barrier_read(void)
+{
+ __asm__ volatile ("sync" : : : "memory");
+}
+
+static inline void test_barrier_write(void)
+{
+ __asm__ volatile ("sync" : : : "memory");
+}
+
+static inline void test_serialize(void)
+{
+ __asm__ volatile ("sync");
+}
+
+static inline uint64_t test_rdtsc(void)
+{
+ uint64_t upper, lower, upper_again;
+
+ /* Can only read the upper or lower half of the time-stamp counter
+ * with a single instruction. To handle this, will read the
+ * upper half, then the lower half, then the upper half again.
+ * Will combine and return the upper and lower half only when
+ * the first and second read of the upper half return the same
+ * value. In cases where the upper half is different between
+ * the two times it is read, the whole process is repeated, until
+ * a case occurs when the two reads of the upper half return the
+ * same value.
+ */
+ __asm__ volatile(
+ "0:\n"
+ "\tmftbu %0\n"
+ "\tmftb %1\n"
+ "\tmftbu %2\n"
+ "\tcmpw %2, %0\n"
+ "\tbne 0b\n"
+ : "=r"(upper), "=r"(lower),"=r"(upper_again)
+ );
+
+ return (upper << 32) | lower;
+}
+
+#elif defined(__aarch64__)
+
+static inline void test_barrier_read(void)
+{
+ /* "memory" to prevent compiler reordering.
+ * "dsb ld" == data synchronization barrier, full system, reads.
+ * This instruction completes after all prior memory reads issued by
+ * this CPU to any observer in the system, are complete.
+ * See ARMv8 Architecture Reference Manual sections B2.7.3 and C6.6.62.
+ */
+ __asm__ volatile ("dsb ld" : : : "memory");
+}
+
+static inline void test_barrier_write(void)
+{
+ /* "memory" to prevent compiler reordering.
+ * "dsb st" == data synchronization barrier, full system, writes.
+ * This instruction completes after all prior memory writes issued by
+ * this CPU are visible to all observers in the system.
+ * See ARMv8 Architecture Reference Manual sections B2.7.3 and C6.6.62.
+ */
+ __asm__ volatile ("dsb st" : : : "memory");
+}
+
+static inline void test_serialize(void)
+{
+ /* "memory" to prevent compiler reordering.
+ * "dsb sy" == data synchronization barrier, full system, all types.
+ * "isb sy" == instruction synchronization barrier, full system.
+ * See ARMv8 Architecture Reference Manual sections B2.7.3, C6.6.62 and
+ * C6.6.72.
+ */
+ __asm__ volatile (
+ "dsb sy\n\t"
+ "isb sy"
+ : : : "memory");
+}
+
+static inline uint64_t test_rdtsc(void)
+{
+ uint64_t value;
+
+ /*
+ * Read the virtual timer, to ensure this will work correctly within
+ * VMs as well. Note that the timer typically runs at a lower frequency
+ * than the CPU.
+ */
+ test_serialize();
+ __asm__ volatile("mrs %0, cntvct_el0" : "=r"(value));
+
+ return value;
+}
+
+#else
+#error "Unknown architecture"
+#endif
+
+/* Metrics - C Language Interface
+ *
+ * Note: C++-language portion of interface described below, within
+ * "__cplusplus" portion of this header. See below,
+ * class GtestsUtilMetrics..
+ */
+void metrics_post_uint64(const char *name, uint64_t value, const char *units);
+void metrics_post_int64(const char *name, int64_t value, const char *units);
+void metrics_post_float(const char *name, float value, const char *units);
+void metrics_post_double(const char *name, double value, const char *units);
+void metrics_post_uint64_array(const char *name,
+ const uint64_t *values, unsigned int num, const char *units);
+void metrics_post_int64_array(const char *name,
+ const int64_t *values, unsigned int num, const char *units);
+void metrics_post_float_array(const char *name,
+ const float *values, unsigned int num, const char *units);
+void metrics_post_double_array(const char *name,
+ const double *values, unsigned int num, const char *units);
+
+#ifdef __cplusplus
+} /* Closing brace for extern "C" */
+
+/* Declarations for C++ only. */
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <vector>
+
+const unsigned int GtestsUtil_Magic001 = 0x3a40;
+
+std::string StringPrintf(const char* format, ...);
+
+class GtestsUtil_Log {
+ public:
+ static const unsigned int magic001 = 0x3a40;
+
+ enum LogSeverity {
+ INFO = magic001,
+ WARNING,
+ ERROR,
+ FATAL,
+ };
+
+ GtestsUtil_Log(const char* file, unsigned int line,
+ enum LogSeverity severity);
+ ~GtestsUtil_Log();
+
+ template <typename T> GtestsUtil_Log& operator<<(const T& v) {
+ /* Right-hand-side operand is inserted to the string_
+ * variable, instead of directly to the stream, so
+ * that the current state of std::cout and std::cerr
+ * don't effect how the log entry is formatted. On
+ * destruction the contents of string_, plus a newline,
+ * is inserted to the underlying stream.
+ */
+ string_ << v;
+
+ return *this;
+ }
+
+ /* Handle stream manipulators. */
+ typedef ::std::ostream& (StdManip1)(::std::ostream &os);
+ typedef ::std::ios_base& (StdManip2)(::std::ios_base &os);
+ GtestsUtil_Log& operator<<(StdManip1 m);
+ GtestsUtil_Log& operator<<(StdManip2 m);
+
+ private:
+ GtestsUtil_Log();
+ std::ostringstream string_;
+ std::ostream& stream_;
+ const enum LogSeverity severity_;
+};
+
+/* Log Macro
+ *
+ * Caller is expected to use one of:
+ *
+ * LOG(INFO)
+ * LOG(WARNING)
+ * LOG(ERROR)
+ * LOG(FATAL)
+ *
+ * All four of the above forms of the LOG macro cause a prefix of
+ * the following format:
+ *
+ * SMMDD hh:mm:ss.uuuuuu ttttttt ffffff:llll]
+ *
+ * where:
+ *
+ * S - severity - one of I (info), W (warning), E (error), or F (fatal)
+ * MM - month - 01 (January) to (12) December
+ * DD - day of month
+ * hh - hour
+ * mm - minute
+ * ss - seconds
+ * uuuuuu - micro-seconds
+ * ttttttt - thread ID
+ * ffffff - filename
+ * llll - line number
+ *
+ * the MM, DD, hh, mm, ss, and uuuuuu fields above are prefixed with
+ * zeros as needed to be of the length shown above. While the thread
+ * ID is prefixed with space characters as needed to be a total of 7
+ * characters long. The filename and line numbers are displayed in
+ * whatever minimal number of characters needed to display their entire
+ * value. The INFO and WARNING forms of the LOG macros send their
+ * output to std::cout, while the ERROR and FATAL forms send it to
+ * std::stderr.
+ *
+ * Although not derived from any of the standard stream classes, the
+ * underlying GtestsUtil_Log class overloads the << operator so that
+ * ostream objects and stream manipulators can be used to the temporary
+ * object created by the various forms of the LOG macro. For example,
+ * the following will display to std::cout the hex value of a variable
+ * named foo:
+ *
+ * LOG(INFO) << "foo: 0x" << std::hex << foo;
+ *
+ * After displaying the log message prefix and anything pushed via the
+ * << operator, a newline is displayed and the stream is flushed.
+ * Additionally, the FATAL form of the LOG macro causes a TEST_ASSERT failure.
+ */
+#define LOG(severity) \
+ GtestsUtil_Log(__FILE__, __LINE__, GtestsUtil_Log::severity)
+
+/* CHECK Macros
+ *
+ * A minimal implementation of the CHECK macros, from google3. As with
+ * the LOG macro, in the future these macros and underlying classes may
+ * be enhanced, but in general tests that need more than this minimal
+ * implementation should be implemented and maintained within google3.
+ *
+ * The basic form of the CHECK macro takes a single boolean expression.
+ * Under normal conditions this expression should evaluate to true. When
+ * true, the CHECK macro effectively becomes a null-operation. When
+ * true, expressions on the right-hand-side of a << operator are not
+ * even evaluated. In contrast, when the expression is false, a LOG(FATAL)
+ * is used to display a log prefix and the values pushed to the CHECK
+ * macro, via the << operator. Further, the use of LOG(FATAL) causes
+ * a TEST_ASSERT failure, after the values are displayed.
+ *
+ * There are additional forms of the CHECK macro that instead of taking
+ * a Boolean expression, take two values. Those two values are compared
+ * via an operation specified by a suffix to the CHECK macro name. The
+ * supported forms of these macros and the operation performed are:
+ *
+ * CHECK_EQ == (Equal)
+ * CHECK_NE != (Not Equal)
+ * CHECK_LE <= (Less Than or Equal)
+ * CHECK_LT < (Less Than)
+ * CHECK_GE >= (Greater Than or Equal)
+ * CHECK_GT > (Greater Than)
+ *
+ * Note: The CHECK macro intentionally uses a while() with
+ * no braces. This effectively forms what appears
+ * to be a short-circuit evaluation of the << operator.
+ * For example, caller might use this macro as:
+ *
+ * CHECK(cond) << "n: " << n++;
+ *
+ * Because no braces were used, the caller can
+ * add << operators to the right of the macro
+ * expansion. While the use of while causes the
+ * << operands to only be evaluated when the condition
+ * is false. In the above example, n is only increamented
+ * when cond is false.
+ */
+#define CHECK(condition) \
+ while (!(condition)) \
+ LOG(FATAL) << "FAILED: " #condition << std::endl
+
+#define CHECK_EQ(val1, val2) GTESTSUTIL_CHECK_OP(==, val1, val2)
+#define CHECK_NE(val1, val2) GTESTSUTIL_CHECK_OP(!=, val1, val2)
+#define CHECK_LE(val1, val2) GTESTSUTIL_CHECK_OP(<=, val1, val2)
+#define CHECK_LT(val1, val2) GTESTSUTIL_CHECK_OP(< , val1, val2)
+#define CHECK_GE(val1, val2) GTESTSUTIL_CHECK_OP(>=, val1, val2)
+#define CHECK_GT(val1, val2) GTESTSUTIL_CHECK_OP(> , val1, val2)
+#define GTESTSUTIL_CHECK_OP(condition, val1, val2) \
+ /* Note: Intentional use of while() with no braces. \
+ * See description of CHECK macro above for \
+ * explanation. \
+ */ \
+ while (!(val1 condition val2)) \
+ LOG(FATAL) << "FAILED: " << #val1 << ' ' << #condition << ' ' \
+ << #val2 << std::endl \
+ << "val1: " << val1 << std::endl \
+ << "val2: " << val2 << std::endl \
+
+/* Metrics - C++ Language Interface
+ *
+ * Note: C-language portion of interface described above, within "extern C"
+ * portion of this header. See above, metric_post_.
+ */
+class GtestsUtilMetrics {
+ public:
+ /* Post a set of metrics.
+ *
+ * name: ^[a-zA-Z_]+[0-9a-zA-Z_]*$
+ * values: vector of type uint64_t, int64_t, float, or double.
+ * units: ^[a-zA-Z_%]+[0-9.a-zA-Z_/%^]*$
+ * Characters of /, ^, and % provided to support units
+ * like "meters/second^2" and "%_of_baseline".
+ */
+ static const std::string kMetricsNameRe;
+ static const std::string kMetricsUnitsRe;
+
+ template <typename T>
+ static void Post(const std::string& name, const std::vector<T>& values,
+ const std::string& units) {
+ Post(name, values.cbegin(), values.cend(), units);
+ }
+ template <typename T>
+ static void Post(const std::string& name,
+ const T begin, const T end, const std::string& units);
+
+ private:
+ static void Initialize();
+};
+
+#endif /* __cplusplus */
+#endif /* _GTESTS_TEST_UTIL_H */
diff --git a/gtests/include/vmx.h b/gtests/include/vmx.h
new file mode 100644
index 0000000..9ac00ce
--- /dev/null
+++ b/gtests/include/vmx.h
@@ -0,0 +1,674 @@
+/*
+ * gtests/include/vmx.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef GTESTS_KVM_VMX_H
+#define GTESTS_KVM_VMX_H
+
+#include <asm/msr-index.h>
+#include <stdint.h>
+#include "x86.h"
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+#define CPU_BASED_HLT_EXITING 0x00000080
+#define CPU_BASED_INVLPG_EXITING 0x00000200
+#define CPU_BASED_MWAIT_EXITING 0x00000400
+#define CPU_BASED_RDPMC_EXITING 0x00000800
+#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
+#define CPU_BASED_CR3_STORE_EXITING 0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+#define CPU_BASED_CR8_STORE_EXITING 0x00100000
+#define CPU_BASED_TPR_SHADOW 0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
+#define CPU_BASED_MOV_DR_EXITING 0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_MONITOR_TRAP 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
+#define CPU_BASED_MONITOR_EXITING 0x20000000
+#define CPU_BASED_PAUSE_EXITING 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
+#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
+#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EPT_VE 0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
+#define SECONDARY_EXEC_TSC_SCALING 0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK 0x00000001
+#define PIN_BASED_NMI_EXITING 0x00000008
+#define PIN_BASED_VIRTUAL_NMIS 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+#define PIN_BASED_POSTED_INTR 0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_IA32_PAT 0x00040000
+#define VM_EXIT_LOAD_IA32_PAT 0x00080000
+#define VM_EXIT_SAVE_IA32_EFER 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
+#define VM_ENTRY_IA32E_MODE 0x00000200
+#define VM_ENTRY_SMM 0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
+#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_PML_FULL 62
+#define EXIT_REASON_XSAVES 63
+#define EXIT_REASON_XRSTORS 64
+#define LAST_EXIT_REASON 64
+
+enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
+ POSTED_INTR_NV = 0x00000002,
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
+ GUEST_PML_INDEX = 0x00000812,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ MSR_BITMAP = 0x00002004,
+ MSR_BITMAP_HIGH = 0x00002005,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ PML_ADDRESS = 0x0000200e,
+ PML_ADDRESS_HIGH = 0x0000200f,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ APIC_ACCESS_ADDR = 0x00002014,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ POSTED_INTR_DESC_ADDR = 0x00002016,
+ POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
+ XSS_EXIT_BITMAP = 0x0000202C,
+ XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ TSC_MULTIPLIER = 0x00002032,
+ TSC_MULTIPLIER_HIGH = 0x00002033,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
+ GUEST_IA32_EFER = 0x00002806,
+ GUEST_IA32_EFER_HIGH = 0x00002807,
+ GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
+ HOST_IA32_EFER = 0x00002c02,
+ HOST_IA32_EFER_HIGH = 0x00002c03,
+ HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t value;
+} __attribute__ ((aligned(16)));
+
+static inline int vmxon(uint64_t phys)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(phys)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline void vmxoff(void)
+{
+ __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+ : [value]"=rm"(tmp), [ret]"=rm"(ret)
+ : [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+ uint64_t value = 0;
+ vmread(encoding, &value);
+ return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [value]"rm"(value), [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+ return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+static inline void prepare_for_vmx_operation(void)
+{
+ uint64_t feature_control;
+ uint64_t required;
+ unsigned long cr0;
+ unsigned long cr4;
+
+ /*
+ * Ensure bits in CR0 and CR4 are valid in VMX operation:
+ * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+ * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+ */
+ __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+ cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+ cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+ __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+ __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+ cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+ cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+ /* Enable VMX operation */
+ cr4 |= X86_CR4_VMXE;
+ __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+ /*
+ * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+ * Bit 0: Lock bit. If clear, VMXON causes a #GP.
+ * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+ * outside of SMX causes a #GP.
+ */
+ required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ required |= FEATURE_CONTROL_LOCKED;
+ feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+ if ((feature_control & required) != required)
+ wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(void)
+{
+ vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+ vmwrite(POSTED_INTR_NV, 0);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS));
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS));
+ vmwrite(EXCEPTION_BITMAP, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+ vmwrite(CR3_TARGET_COUNT, 0);
+ vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+ VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
+ vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+ VM_ENTRY_IA32E_MODE); /* 64-bit guest */
+ vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmwrite(TPR_THRESHOLD, 0);
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
+
+ vmwrite(CR0_GUEST_HOST_MASK, 0);
+ vmwrite(CR4_GUEST_HOST_MASK, 0);
+ vmwrite(CR0_READ_SHADOW, get_cr0());
+ vmwrite(CR4_READ_SHADOW, get_cr4());
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+ uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+ vmwrite(HOST_ES_SELECTOR, get_es());
+ vmwrite(HOST_CS_SELECTOR, get_cs());
+ vmwrite(HOST_SS_SELECTOR, get_ss());
+ vmwrite(HOST_DS_SELECTOR, get_ds());
+ vmwrite(HOST_FS_SELECTOR, get_fs());
+ vmwrite(HOST_GS_SELECTOR, get_gs());
+ vmwrite(HOST_TR_SELECTOR, get_tr());
+
+ if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+ vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+ if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+ vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+ if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+ rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+ vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+ vmwrite(HOST_CR0, get_cr0());
+ vmwrite(HOST_CR3, get_cr3());
+ vmwrite(HOST_CR4, get_cr4());
+ vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+ vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+ vmwrite(HOST_TR_BASE,
+ get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt_base());
+ vmwrite(HOST_IDTR_BASE, get_idt_base());
+ vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+ vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+ vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+ vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+ vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+ vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+ vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+ vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+ vmwrite(GUEST_LDTR_SELECTOR, 0);
+ vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+ vmwrite(GUEST_INTR_STATUS, 0);
+ vmwrite(GUEST_PML_INDEX, 0);
+
+ vmwrite(VMCS_LINK_POINTER, -1ll);
+ vmwrite(GUEST_IA32_DEBUGCTL, 0);
+ vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+ vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+ vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+ vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+ vmwrite(GUEST_ES_LIMIT, -1);
+ vmwrite(GUEST_CS_LIMIT, -1);
+ vmwrite(GUEST_SS_LIMIT, -1);
+ vmwrite(GUEST_DS_LIMIT, -1);
+ vmwrite(GUEST_FS_LIMIT, -1);
+ vmwrite(GUEST_GS_LIMIT, -1);
+ vmwrite(GUEST_LDTR_LIMIT, -1);
+ vmwrite(GUEST_TR_LIMIT, 0x67);
+ vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_ES_AR_BYTES,
+ vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+ vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+ vmwrite(GUEST_DS_AR_BYTES,
+ vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_FS_AR_BYTES,
+ vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_GS_AR_BYTES,
+ vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+ vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+ vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmwrite(GUEST_ACTIVITY_STATE, 0);
+ vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+ vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+ vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+ vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+ vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+ vmwrite(GUEST_ES_BASE, 0);
+ vmwrite(GUEST_CS_BASE, 0);
+ vmwrite(GUEST_SS_BASE, 0);
+ vmwrite(GUEST_DS_BASE, 0);
+ vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+ vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+ vmwrite(GUEST_LDTR_BASE, 0);
+ vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+ vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+ vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+ vmwrite(GUEST_DR7, 0x400);
+ vmwrite(GUEST_RSP, (uint64_t)rsp);
+ vmwrite(GUEST_RIP, (uint64_t)rip);
+ vmwrite(GUEST_RFLAGS, 2);
+ vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+ vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+static inline void prepare_vmcs(void *guest_rip, void *guest_rsp)
+{
+ init_vmcs_control_fields();
+ init_vmcs_host_state();
+ init_vmcs_guest_state(guest_rip, guest_rsp);
+}
+
+#endif /* !GTESTS_KVM_VMX_H */
diff --git a/gtests/include/x86.h b/gtests/include/x86.h
new file mode 100644
index 0000000..2276d22
--- /dev/null
+++ b/gtests/include/x86.h
@@ -0,0 +1,306 @@
+/*
+ * gtests/include/x86.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef GTESTS_KVM_X86_H
+#define GTESTS_KVM_X86_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#define X86_EFLAGS_FIXED (1u << 1)
+
+#define X86_CR4_VME (1ul << 0)
+#define X86_CR4_PVI (1ul << 1)
+#define X86_CR4_TSD (1ul << 2)
+#define X86_CR4_DE (1ul << 3)
+#define X86_CR4_PSE (1ul << 4)
+#define X86_CR4_PAE (1ul << 5)
+#define X86_CR4_MCE (1ul << 6)
+#define X86_CR4_PGE (1ul << 7)
+#define X86_CR4_PCE (1ul << 8)
+#define X86_CR4_OSFXSR (1ul << 9)
+#define X86_CR4_OSXMMEXCPT (1ul << 10)
+#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_VMXE (1ul << 13)
+#define X86_CR4_SMXE (1ul << 14)
+#define X86_CR4_FSGSBASE (1ul << 16)
+#define X86_CR4_PCIDE (1ul << 17)
+#define X86_CR4_OSXSAVE (1ul << 18)
+#define X86_CR4_SMEP (1ul << 20)
+#define X86_CR4_SMAP (1ul << 21)
+#define X86_CR4_PKE (1ul << 22)
+
+/* The enum values match the intruction encoding of each register */
+enum x86_register {
+ RAX = 0,
+ RCX,
+ RDX,
+ RBX,
+ RSP,
+ RBP,
+ RSI,
+ RDI,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+};
+
+struct desc64 {
+ uint16_t limit0;
+ uint16_t base0;
+ unsigned base1:8, type:5, dpl:2, p:1;
+ unsigned limit1:4, zero0:3, g:1, base2:8;
+ uint32_t base3;
+ uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ uint16_t size;
+ uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+ return ((uint64_t)desc->base3 << 32) |
+ (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed.
+ */
+ __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+ uint32_t eax, edx;
+
+ __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t tmp;
+
+ __asm__ __volatile__("in %%dx, %%ax"
+ : /* output */ "=a" (tmp)
+ : /* input */ "d" (port));
+
+ return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+ uint16_t es;
+
+ __asm__ __volatile__("mov %%es, %[es]"
+ : /* output */ [es]"=rm"(es));
+ return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+ uint16_t cs;
+
+ __asm__ __volatile__("mov %%cs, %[cs]"
+ : /* output */ [cs]"=rm"(cs));
+ return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+ uint16_t ss;
+
+ __asm__ __volatile__("mov %%ss, %[ss]"
+ : /* output */ [ss]"=rm"(ss));
+ return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+ uint16_t ds;
+
+ __asm__ __volatile__("mov %%ds, %[ds]"
+ : /* output */ [ds]"=rm"(ds));
+ return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+ uint16_t fs;
+
+ __asm__ __volatile__("mov %%fs, %[fs]"
+ : /* output */ [fs]"=rm"(fs));
+ return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+ uint16_t gs;
+
+ __asm__ __volatile__("mov %%gs, %[gs]"
+ : /* output */ [gs]"=rm"(gs));
+ return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+ uint16_t tr;
+
+ __asm__ __volatile__("str %[tr]"
+ : /* output */ [tr]"=rm"(tr));
+ return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+ uint64_t cr0;
+
+ __asm__ __volatile__("mov %%cr0, %[cr0]"
+ : /* output */ [cr0]"=r"(cr0));
+ return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+ uint64_t cr3;
+
+ __asm__ __volatile__("mov %%cr3, %[cr3]"
+ : /* output */ [cr3]"=r"(cr3));
+ return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+ uint64_t cr4;
+
+ __asm__ __volatile__("mov %%cr4, %[cr4]"
+ : /* output */ [cr4]"=r"(cr4));
+ return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline uint64_t get_gdt_base(void)
+{
+ struct desc_ptr gdt;
+ __asm__ __volatile__("sgdt %[gdt]"
+ : /* output */ [gdt]"=m"(gdt));
+ return gdt.address;
+}
+
+static inline uint64_t get_idt_base(void)
+{
+ struct desc_ptr idt;
+ __asm__ __volatile__("sidt %[idt]"
+ : /* output */ [idt]"=m"(idt));
+ return idt.address;
+}
+
+#define SET_XMM(__var, __xmm) \
+ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+ switch (n) {
+ case 0:
+ SET_XMM(val, xmm0);
+ break;
+ case 1:
+ SET_XMM(val, xmm1);
+ break;
+ case 2:
+ SET_XMM(val, xmm2);
+ break;
+ case 3:
+ SET_XMM(val, xmm3);
+ break;
+ case 4:
+ SET_XMM(val, xmm4);
+ break;
+ case 5:
+ SET_XMM(val, xmm5);
+ break;
+ case 6:
+ SET_XMM(val, xmm6);
+ break;
+ case 7:
+ SET_XMM(val, xmm7);
+ break;
+ }
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+ assert(n >= 0 && n <= 7);
+
+ register v1di xmm0 __asm__("%xmm0");
+ register v1di xmm1 __asm__("%xmm1");
+ register v1di xmm2 __asm__("%xmm2");
+ register v1di xmm3 __asm__("%xmm3");
+ register v1di xmm4 __asm__("%xmm4");
+ register v1di xmm5 __asm__("%xmm5");
+ register v1di xmm6 __asm__("%xmm6");
+ register v1di xmm7 __asm__("%xmm7");
+ switch (n) {
+ case 0:
+ return (unsigned long)xmm0;
+ case 1:
+ return (unsigned long)xmm1;
+ case 2:
+ return (unsigned long)xmm2;
+ case 3:
+ return (unsigned long)xmm3;
+ case 4:
+ return (unsigned long)xmm4;
+ case 5:
+ return (unsigned long)xmm5;
+ case 6:
+ return (unsigned long)xmm6;
+ case 7:
+ return (unsigned long)xmm7;
+ }
+ return 0;
+}
+
+#endif /* !GTESTS_KVM_X86_H */
diff --git a/gtests/lib/kvm_util.c b/gtests/lib/kvm_util.c
new file mode 100644
index 0000000..3e6cbba
--- /dev/null
+++ b/gtests/lib/kvm_util.c
@@ -0,0 +1,3003 @@
+/*
+ * gtests/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#define __STDC_FORMAT_MACROS
+#include <ctype.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/fs.h>
+#include <linux/elf.h>
+
+#include "asm/processor-flags.h"
+#include "asm/msr.h"
+#include "asm/msr-index.h"
+
+#include "test_sparsebit.h"
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
+
+#define PMEM_BASE 0x40000000
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#ifndef PAGE_SHIFT
+#define PAGE_SHIFT 12
+#endif
+
+/* For bitmap operations */
+
+#ifndef BITS_PER_BYTE
+#define BITS_PER_BYTE 8
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#endif
+
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+ size_t mask = size - 1;
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t dirty:1;
+ uint64_t reserved_07:1;
+ uint64_t global:1;
+ uint64_t ignored_11_09:3;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+/* Concrete definition of kvm_util_vm_t. */
+struct userspace_mem_region {
+ struct userspace_mem_region *next, *prev;
+ struct kvm_userspace_memory_region region;
+ enum vm_mem_backing_src_type backing_src_type;
+ test_sparsebit_t *unused_phy_pages;
+ int fd;
+ off_t offset;
+ bool caller_memory;
+ void *host_mem;
+ void *mmap_start;
+ size_t mmap_size;
+};
+struct vcpu {
+ struct vcpu *next, *prev;
+ uint32_t id;
+ int fd;
+ struct kvm_run *state;
+};
+struct kvm_util_vm {
+ int mode;
+ int fd;
+ unsigned int page_size;
+ uint64_t ppgidx_max; /* Maximum physical page index */
+ struct vcpu *vcpu_head;
+ struct userspace_mem_region *userspace_mem_region_head;
+ test_sparsebit_t *vpages_valid;
+ test_sparsebit_t *vpages_mapped;
+ bool virt_l4_created;
+ vm_paddr_t virt_l4;
+};
+
+/* File Scope Function Prototypes */
+static int vcpu_mmap_sz(void);
+static bool hugetlb_supported(const kvm_util_vm_t *vm, uint64_t npages);
+static const struct userspace_mem_region *userspace_mem_region_find(
+ const kvm_util_vm_t *vm, uint64_t start, uint64_t end);
+static const struct vcpu *vcpu_find(const kvm_util_vm_t *vm,
+ uint32_t vcpuid);
+static vm_paddr_t phy_page_alloc(kvm_util_vm_t *vm,
+ vm_paddr_t paddr_min, uint32_t memslot);
+static struct userspace_mem_region *memslot2region(kvm_util_vm_t *vm,
+ uint32_t memslot);
+static int get_hugepfnmap_size(void *start_addr);
+
+/* Capability
+ *
+ * Input Args:
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_util_cap(long cap)
+{
+ int rv;
+ int kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ TEST_ASSERT(kvm_fd >= 0, "open %s failed, rv: %i errno: %i",
+ KVM_DEV_PATH, kvm_fd, errno);
+
+ rv = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(rv != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ " rv: %i errno: %i", rv, errno);
+
+ close(kvm_fd);
+
+ return rv;
+}
+
+/* VM Create
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ * phy_pages - Physical memory pages
+ * perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0. The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+kvm_util_vm_t *vm_create(enum guest_mode mode, uint64_t phy_pages, int perm)
+{
+ kvm_util_vm_t *vm;
+ int kvm_fd;
+
+ /* Allocate memory. */
+ vm = calloc(1, sizeof(*vm));
+ TEST_ASSERT(vm != NULL, "Insufficent Memory");
+
+ vm->mode = mode;
+ kvm_fd = open(KVM_DEV_PATH, perm);
+ TEST_ASSERT(kvm_fd >= 0, "open %s failed, rv: %i errno: %i",
+ KVM_DEV_PATH, kvm_fd, errno);
+
+ /* Create VM. */
+ vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+ "rv: %i errno: %i", vm->fd, errno);
+
+ close(kvm_fd);
+
+ /* Setup mode specific traits. */
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ vm->page_size = 0x1000; /* 4K */
+
+ /* Limit to 48-bit canonical virtual addresses. */
+ vm->vpages_valid = test_sparsebit_alloc();
+ test_sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (48 - 1)) / vm->page_size);
+ test_sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (48 - 1)) - 1)) / vm->page_size,
+ (1ULL << (48 - 1)) / vm->page_size);
+
+ /* Limit physical addresses to 52-bits. */
+ vm->ppgidx_max = ((1ULL << 52) / vm->page_size) - 1;
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+ }
+
+ /* Allocate and setup memory for guest. */
+ vm->vpages_mapped = test_sparsebit_alloc();
+ if (phy_pages != 0)
+ vm_userspace_mem_region_add(vm, NULL,
+ 0, 0, phy_pages, 0);
+
+ return vm;
+}
+
+/* Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+kvm_util_vm_t *vm_create_default(uint32_t vcpuid, void *guest_code)
+{
+ kvm_util_vm_t *vm;
+
+ /* Create VM */
+ vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+ /* Setup guest code */
+ kvm_util_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+ /* Setup IRQ Chip */
+ vm_create_irqchip(vm);
+
+ /* Add the first vCPU. */
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+ return vm;
+}
+
+/* Adds a vCPU with reasonable defaults (i.e., a stack)
+ *
+ * Input Args:
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(kvm_util_vm_t *vm, uint32_t vcpuid, void *guest_code)
+{
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ vm_vaddr_t stack_vaddr;
+ stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+ /* Create VCPU */
+ vm_vcpu_add(vm, vcpuid);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vm, vcpuid, ®s);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vm, vcpuid, ®s);
+
+ /* Setup the MP state */
+ mp_state.mp_state = 0;
+ vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/* Create a default VM for VMX tests.
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+kvm_util_vm_t *
+vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code)
+{
+ struct kvm_cpuid2 *cpuid;
+ kvm_util_vm_t *vm;
+ vm_vaddr_t vmxon_vaddr;
+ vm_paddr_t vmxon_paddr;
+ vm_vaddr_t vmcs_vaddr;
+ vm_paddr_t vmcs_paddr;
+
+ vm = vm_create_default(vcpuid, (void *) guest_code);
+
+ /* Enable nesting in CPUID */
+ cpuid = allocate_kvm_cpuid2();
+ kvm_get_supported_cpuid(cpuid);
+ find_cpuid_entry(cpuid, 0x1)->ecx |= (1 << 5) /* VMX */;
+ vcpu_set_cpuid(vm, vcpuid, cpuid);
+ free(cpuid);
+
+ /* Setup of a region of guest memory for the vmxon region. */
+ vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
+ vmxon_paddr = addr_vmvirt2vmphy(vm, vmxon_vaddr);
+
+ /* Setup of a region of guest memory for a vmcs. */
+ vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
+ vmcs_paddr = addr_vmvirt2vmphy(vm, vmcs_vaddr);
+
+ vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr,
+ vmcs_paddr);
+
+ return vm;
+}
+
+/*
+ * Getter for the VM's fd.
+ */
+int vm_fd(const kvm_util_vm_t *vm)
+{
+ return vm->fd;
+}
+
+/*
+ * Getter for a VCPU's fd.
+ */
+int vcpu_fd(const kvm_util_vm_t *vm, uint32_t vcpuid)
+{
+ return vcpu_find(vm, vcpuid)->fd;
+}
+
+/* VM Free
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * vmpp - Pointer to pointer to opaque type that describes the VM.
+ *
+ * Return: None
+ *
+ * Destroys and frees the VM pointed to by *vmpp. On success, the
+ * contents of *vmpp is poisoned, such that any further use causes
+ * a SEGV.
+ */
+void kvm_util_vm_free(kvm_util_vm_t **vmpp)
+{
+ int rv;
+ kvm_util_vm_t *vmp = *vmpp;
+
+ if (vmp == NULL)
+ return;
+
+ /* Free userspace_mem_regions. */
+ while (vmp->userspace_mem_region_head) {
+ struct userspace_mem_region *region
+ = vmp->userspace_mem_region_head;
+
+ region->region.memory_size = 0;
+ rv = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
+ ®ion->region);
+ TEST_ASSERT(rv == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ "rv: %i errno: %i", rv, errno);
+
+ vmp->userspace_mem_region_head = region->next;
+ test_sparsebit_free(®ion->unused_phy_pages);
+ switch (region->backing_src_type) {
+ case VM_MEM_SRC_PMEM_HUGE:
+ rv = get_hugepfnmap_size(region->mmap_start);
+ /*
+ * Users of /dev/pmem may not fault in region->mmap_size
+ * entirely. e.g. in the demand paging tests.
+ * Hence, the ASSERT below can only check if HugePFNMap
+ * is within a range.
+ */
+ TEST_ASSERT(rv > 0 && rv <= (region->mmap_size / 1024),
+ "HugePFNMap: %d out of range", rv);
+
+ rv = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(rv == 0, "munmap failed, rv: %i errno: %i",
+ rv, errno);
+ break;
+ case VM_MEM_SRC_PMEM_SMALL:
+ rv = get_hugepfnmap_size(region->mmap_start);
+ TEST_ASSERT(rv == 0, "unexpected HugePFNMap size: %d",
+ rv);
+
+ rv = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(rv == 0, "munmap failed, rv: %i errno: %i",
+ rv, errno);
+ break;
+ case VM_MEM_SRC_ANONYMOUS:
+ case VM_MEM_SRC_ANONYMOUS_THP:
+ case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+ case VM_MEM_SRC_FD_PRIVATE:
+ rv = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(rv == 0, "munmap failed, rv: %i errno: %i",
+ rv, errno);
+ break;
+
+ default:
+ TEST_ASSERT((region->backing_src_type
+ == VM_MEM_SRC_CALLER_MAINTAINED)
+ || (region->backing_src_type
+ == VM_MEM_SRC_DIR),
+ "Unexpected backing_source: 0x%i",
+ region->backing_src_type);
+ /* Intentional, nothing to do */
+ break;
+ }
+
+ free(region);
+ }
+
+ /* Free VCPUs. */
+ while (vmp->vcpu_head)
+ vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+ /* Free sparsebit arrays. */
+ test_sparsebit_free(&vmp->vpages_valid);
+ test_sparsebit_free(&vmp->vpages_mapped);
+
+ /* Close file descriptor for the VM. */
+ rv = close(vmp->fd);
+ TEST_ASSERT(rv == 0, "Close of vm fd failed,\n"
+ " vmp->fd: %i rv: %i errno: %i", vmp->fd, rv, errno);
+
+ /* Free the structure describing the VM. */
+ free(vmp);
+ *vmpp = NULL;
+}
+
+#if 0
+/* Allocate kvm_dirty_log
+ *
+ * Input Args:
+ * region - The memslot to track.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * A pointer to the allocated kvm_dirty_log struct. Never returns NULL.
+ *
+ * Allocates a kvm_dirty_log struct for a corresponding memslot.
+ */
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(const struct kvm_userspace_memory_region *region)
+{
+ struct kvm_dirty_log *dirty_log;
+ size_t bitmap_size = region->memory_size / 4096 / 8;
+
+ dirty_log = calloc(1, sizeof(*dirty_log));
+ TEST_ASSERT(dirty_log, "Failed to allocate struct kvm_dirty_log.");
+
+ dirty_log->slot = region->slot;
+ dirty_log->dirty_bitmap = calloc(1, bitmap_size);
+ TEST_ASSERT(dirty_log->dirty_bitmap,
+ "Failed to allocate dirty_bitmap (%lu bytes).",
+ bitmap_size);
+
+ return dirty_log;
+}
+#endif
+
+/* VM Get Dirty Log
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * logp - pointer to kvm dirty log
+ *
+ * Return:
+ * Return value from KVM_GET_DIRTY_LOG IOCTL call.
+ *
+ * Performs the KVM_GET_DIRTY_LOG IOCTL call to obtain the dirty log
+ * for the kvm memory slot given by logp->slot.
+ */
+int kvm_util_vm_get_dirty_log(const kvm_util_vm_t *vm,
+ struct kvm_dirty_log *logp)
+{
+ int rv;
+
+ rv = ioctl(vm->fd, KVM_GET_DIRTY_LOG, logp);
+
+ return rv;
+}
+
+/* Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ * hvirt - Starting host virtual address
+ * vm - Virtual Machine
+ * vmvirt - Starting guest virtual address
+ * len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ * Returns 0 if the bytes starting at hvirt for a length of len
+ * are equal the guest virtual bytes starting at vmvirt. Returns
+ * a value < 0, if bytes at hvirt are less than those at vmvirt.
+ * Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hvirt, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by vmvirt.
+ */
+int kvm_util_memcmp_hvirt_gvirt(const host_vaddr_t hvirt,
+ const kvm_util_vm_t *vm, const vm_vaddr_t vmvirt, size_t len)
+{
+ size_t amt;
+
+ /* Compare a batch of bytes until either a match is found
+ * or all the bytes have been compared.
+ */
+ for (uintptr_t offset = 0; offset < len; offset += amt) {
+ host_vaddr_t ptr1 = hvirt + offset;
+
+ /* Determine host address for guest virtual address
+ * at offset.
+ */
+ host_vaddr_t ptr2 = addr_vmvirt2hvirt(vm, vmvirt + offset);
+
+ /* Determine amount to compare on this pass.
+ * Don't allow the comparsion to cross a page boundary.
+ */
+ amt = len - offset;
+ if (((uintptr_t) ptr1 / vm->page_size)
+ != (((uintptr_t) ptr1 + amt) / vm->page_size))
+ amt = vm->page_size - ((uintptr_t) ptr1
+ % vm->page_size);
+ if (((uintptr_t) ptr2 / vm->page_size)
+ != (((uintptr_t) ptr2 + amt) / vm->page_size))
+ amt = vm->page_size - ((uintptr_t) ptr2
+ % vm->page_size);
+ TEST_ASSERT((((uintptr_t) ptr1 / vm->page_size)
+ == (((uintptr_t) ptr1 + amt - 1)
+ / vm->page_size))
+ && (((uintptr_t) ptr2 / vm->page_size)
+ == (((uintptr_t) ptr2 + amt - 1)
+ / vm->page_size)),
+ "Attempt to cmp host to guest memory across a page "
+ "boundary,\n"
+ " ptr1: %p ptr2: %p\n"
+ " amt: 0x%zx page_size: 0x%x",
+ ptr1, ptr2, amt, vm->page_size);
+
+ /* Perform the comparison. If there is a difference
+ * return that result to the caller, otherwise need
+ * to continue on looking for a mismatch.
+ */
+ int rv = memcmp(ptr1, ptr2, amt);
+ if (rv != 0)
+ return rv;
+ }
+
+ /* No mismatch found. Let the caller know the two memory
+ * areas are equal.
+ */
+ return 0;
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ * filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm. On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_util_vm_elf_load(kvm_util_vm_t *vm, const char *filename,
+ uint32_t data_memslot, uint32_t vttbl_memslot)
+{
+ off_t offset, offset_rv;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in the ELF header. */
+ Elf64_Ehdr hdr;
+ test_elfhdr_get(filename, &hdr);
+
+ /* For each program header.
+ * The following ELF header members specify the location
+ * and size of the program headers:
+ *
+ * e_phoff - File offset to start of program headers
+ * e_phentsize - Size of each program header
+ * e_phnum - Number of program header entries
+ */
+ for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+ /* Seek to the beginning of the program header. */
+ offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Failed to seek to begining of program header %u,\n"
+ " filename: %s\n"
+ " rv: %jd errno: %i",
+ n1, filename, (intmax_t) offset_rv, errno);
+
+ /* Read in the program header. */
+ Elf64_Phdr phdr;
+ test_read(fd, &phdr, sizeof(phdr));
+
+ /* Skip if this header doesn't describe a loadable segment. */
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ /* Allocate memory for this segment within the VM. */
+ TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+ "memsize of 0,\n"
+ " phdr index: %u p_memsz: 0x%" PRIx64,
+ n1, (uint64_t) phdr.p_memsz);
+ vm_vaddr_t seg_vstart = phdr.p_vaddr;
+ seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+ seg_vend |= vm->page_size - 1;
+ size_t seg_size = seg_vend - seg_vstart + 1;
+
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ data_memslot, vttbl_memslot);
+ TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+ "virtual memory for segment at requested min addr,\n"
+ " segment idx: %u\n"
+ " seg_vstart: 0x%lx\n"
+ " vaddr: 0x%lx",
+ n1, seg_vstart, vaddr);
+ memset(addr_vmvirt2hvirt(vm, vaddr), 0, seg_size);
+ /* TODO(lhuemill): Set permissions of each memory segment
+ * based on the least-significant 3 bits of phdr.p_flags.
+ */
+
+ /* Load portion of initial state that is contained within
+ * the ELF file.
+ */
+ if (phdr.p_filesz) {
+ offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == phdr.p_offset,
+ "Seek to program segment offset failed,\n"
+ " program header idx: %u errno: %i\n"
+ " offset_rv: 0x%jx\n"
+ " expected: 0x%jx\n",
+ n1, errno, (intmax_t) offset_rv,
+ (intmax_t) phdr.p_offset);
+ test_read(fd, addr_vmvirt2hvirt(vm, phdr.p_vaddr),
+ phdr.p_filesz);
+ }
+ }
+}
+
+/* VM Clock Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args:
+ * clockp - Where to store the current time.
+ *
+ * Return:
+ * 0 on success, -1 on failure, with errno specifying reason for failure.
+ *
+ * Obtains the current time for the vm specified by vm and stores it
+ * at the location specified by clockp.
+ */
+int vm_clock_get(const kvm_util_vm_t *vm, struct kvm_clock_data *clockp)
+{
+ int rv;
+
+ rv = ioctl(vm->fd, KVM_GET_CLOCK, clockp);
+
+ return rv;
+}
+
+/* VM Clock Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * clockp - Pointer to time to be set
+ *
+ * Output Args: None
+ *
+ * Return:
+ * 0 on success, -1 on failure, with errno specifying reason for failure.
+ *
+ * Sets the time of the VM specified by vm to the time pointed to by clockp.
+ */
+int vm_clock_set(kvm_util_vm_t *vm, const struct kvm_clock_data *clockp)
+{
+ int rv;
+
+ rv = ioctl(vm->fd, KVM_SET_CLOCK, clockp);
+
+ return rv;
+}
+
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+ struct kvm_cpuid2 *cpuid;
+ int nent = 100; /* copied from vanadium */
+ size_t size;
+
+ size = sizeof(*cpuid);
+ size += nent * sizeof(struct kvm_cpuid_entry2);
+ cpuid = malloc(size);
+ TEST_ASSERT(cpuid != NULL, "Insufficient memory.");
+
+ cpuid->nent = nent;
+
+ return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * cpuid - The supported KVM CPUID
+ *
+ * Return: void
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid)
+{
+ int rv;
+ int kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ TEST_ASSERT(kvm_fd >= 0, "open %s failed, rv: %i errno: %i",
+ KVM_DEV_PATH, kvm_fd, errno);
+
+ rv = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+ TEST_ASSERT(rv == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+ rv, errno);
+
+ close(kvm_fd);
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ * cpuid: The cpuid.
+ * function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
+ uint32_t index)
+{
+ struct kvm_cpuid_entry2 *entry = NULL;
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index) {
+ entry = &cpuid->entries[i];
+ break;
+ }
+ }
+
+ TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+ function, index);
+ return entry;
+}
+
+/* VM VCPU CPUID Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU id
+ * cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(kvm_util_vm_t *vm,
+ uint32_t vcpuid, const struct kvm_cpuid2 *cpuid)
+{
+ int rv;
+ const struct vcpu *vcpu;
+
+ vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ rv = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ TEST_ASSERT(rv == 0, "KVM_SET_CPUID2 failed, rv: %i errno: %i",
+ rv, errno);
+
+}
+
+static bool has_hugepfn_flag(char *str)
+{
+ char *saveptr;
+ char *tok;
+
+ do {
+ tok = strtok_r(str, " ", &saveptr);
+ str = NULL;
+
+ if (!strcmp("hp", tok))
+ return true;
+ } while (tok);
+
+ return false;
+}
+
+static int get_hugepfnmap_size(void *start_addr)
+{
+ FILE *fp = fopen("/proc/self/smaps", "r");
+ bool found_map = false;
+ char *line, *path, c;
+ void *start, *end;
+ unsigned long offset;
+ int major, minor, inode, sz = 0, ret = 0;
+
+ if (!fp)
+ return -ENOENT;
+
+ while (1) {
+ int r;
+
+ r = fscanf(fp, "%m[^\n]%c", &line, &c);
+
+ if (r == 1)
+ free(line);
+
+ if (r == EOF || r == 1)
+ goto out;
+
+ if (isdigit(line[0])) {
+ char bits[4];
+
+ r = sscanf(line, "%lx-%lx %4c %lx %x:%x %d %m[^\n]",
+ (unsigned long *) &start,
+ (unsigned long *) &end,
+ bits, &offset, &major, &minor,
+ &inode, &path);
+
+ if ((unsigned long) start_addr == (unsigned long) start)
+ found_map = true;
+
+ } else if (found_map && (strstr(line, "HugePFNMap:") == line)) {
+ r = sscanf(line, "HugePFNMap: %d kB", &sz);
+
+ if (!sz)
+ break;
+ } else if (found_map && (strstr(line, "VmFlags:") == line)) {
+
+ if (has_hugepfn_flag(line + strlen("VmFlags:")))
+ ret = sz;
+ break;
+ }
+
+ free(line);
+ }
+
+ free(line);
+out:
+ fclose(fp);
+ return ret;
+}
+
+/* VM Userspace Memory Region Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * backing_src - Storage source for this region.
+ * NULL to use anonymous memory.
+ * guest_paddr - Starting guest physical address
+ * slot - KVM region slot
+ * npages - Number of physical pages
+ * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr. The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(kvm_util_vm_t *vm,
+ struct vm_mem_backing_src *backing_src,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags)
+{
+ int rv;
+ unsigned long pmem_size = 0;
+ struct userspace_mem_region *region;
+ size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+
+ /* For now (may change in the future), use anonymous mmap as the
+ * default backing source. In the future, the default backing
+ * source can be changed to any source that doesn't take a
+ * backing arg.
+ */
+ enum vm_mem_backing_src_type src_type
+ = (backing_src) ? backing_src->type : VM_MEM_SRC_ANONYMOUS;
+
+ TEST_ASSERT(src_type != VM_MEM_SRC_DIR,
+ "Not Yet Supported, src_type: 0x%x", src_type);
+
+ TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ "address not on a page boundary.\n"
+ " guest_paddr: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, vm->page_size);
+ TEST_ASSERT((((guest_paddr / vm->page_size) + npages) - 1)
+ <= vm->ppgidx_max, "Physical range beyond maximum "
+ "supported physical address,\n"
+ " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " vm->ppgidx_max: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, npages, vm->ppgidx_max, vm->page_size);
+
+ /* Confirm a mem region with an overlapping address doesn't
+ * already exist.
+ */
+ region = (struct userspace_mem_region *) userspace_mem_region_find(
+ vm, guest_paddr, guest_paddr + npages * vm->page_size);
+ if (region != NULL)
+ TEST_ASSERT(false, "overlapping userspace_mem_region already "
+ "exists\n"
+ " requested guest_paddr: 0x%lx npages: 0x%lx "
+ "page_size: 0x%x\n"
+ " existing guest_paddr: 0x%lx size: 0x%lx",
+ guest_paddr, npages, vm->page_size,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Confirm no region with the requested slot already exists. */
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == slot)
+ break;
+ if ((guest_paddr <= (region->region.guest_phys_addr
+ + region->region.memory_size))
+ && ((guest_paddr + npages * vm->page_size)
+ >= region->region.guest_phys_addr))
+ break;
+ }
+ if (region != NULL)
+ TEST_ASSERT(false, "A mem region with the requested slot "
+ "or overlapping physical memory range already exists.\n"
+ " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+ " existing slot: %u paddr: 0x%lx size: 0x%lx",
+ slot, guest_paddr, npages,
+ region->region.slot,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Allocate and initialize new mem region structure. */
+ region = calloc(1, sizeof(*region));
+ TEST_ASSERT(region != NULL, "Insufficient Memory");
+ region->backing_src_type = src_type;
+ region->fd = (src_type == VM_MEM_SRC_FD_PRIVATE)
+ ? backing_src->fd_private.fd : -1;
+ switch (src_type) {
+ case VM_MEM_SRC_PMEM_SMALL:
+ region->caller_memory = false;
+ region->mmap_size = npages * vm->page_size;
+
+ TEST_ASSERT(backing_src->pmem.pmem_fd > 0,
+ "pmem fd not initialized: %d",
+ backing_src->pmem.pmem_fd);
+
+ rv = ioctl(backing_src->pmem.pmem_fd, BLKGETSIZE64, &pmem_size);
+ TEST_ASSERT(rv == 0, "err getting Pmem size\n");
+
+ TEST_ASSERT(region->mmap_size <= pmem_size,
+ "requested size: %ld, available pmem: %ld\n",
+ region->mmap_size, pmem_size);
+ /* Force small page mappings */
+ region->mmap_start = mmap((void *) (PMEM_BASE - PAGE_SIZE),
+ region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, backing_src->pmem.pmem_fd,
+ 0);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+
+ TEST_ASSERT((unsigned long) region->mmap_start &
+ (huge_page_size - 1),
+ "mmap_start is not small page aligned: %lx\n",
+ (unsigned long) region->mmap_start);
+
+ region->host_mem = region->mmap_start;
+ break;
+ case VM_MEM_SRC_PMEM_HUGE:
+ region->caller_memory = false;
+ region->mmap_size = npages * vm->page_size;
+ TEST_ASSERT(!(region->mmap_size & (huge_page_size - 1)),
+ "mmap size not huge page aligned");
+
+ TEST_ASSERT(backing_src->pmem.pmem_fd > 0,
+ "pmem fd not initialized: %d",
+ backing_src->pmem.pmem_fd);
+
+ rv = ioctl(backing_src->pmem.pmem_fd, BLKGETSIZE64, &pmem_size);
+ TEST_ASSERT(rv == 0, "err getting Pmem size\n");
+
+ TEST_ASSERT(region->mmap_size <= pmem_size,
+ "requested size: %ld, available pmem: %ld\n",
+ region->mmap_size, pmem_size);
+
+ region->mmap_start = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, backing_src->pmem.pmem_fd,
+ 0);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+
+ TEST_ASSERT(!((unsigned long) region->mmap_start &
+ (huge_page_size - 1)),
+ "mmap_start is not huge page aligned: %lx\n",
+ (unsigned long) region->mmap_start);
+
+ region->host_mem = region->mmap_start;
+ break;
+ case VM_MEM_SRC_ANONYMOUS:
+ case VM_MEM_SRC_ANONYMOUS_THP:
+ case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+ if ((src_type == VM_MEM_SRC_ANONYMOUS_THP)
+ || (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB)) {
+ TEST_ASSERT(hugetlb_supported(vm, npages),
+ "Unsupported huge TLB settings,\n"
+ " src_type: 0x%x\n"
+ " npages: 0x%lx", src_type, npages);
+ }
+ region->caller_memory = false;
+ region->mmap_size = npages * vm->page_size;
+ if (src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+ /* Enough memory to align up to a huge page. */
+ region->mmap_size += huge_page_size;
+ }
+ region->mmap_start = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS
+ | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB
+ ? MAP_HUGETLB : 0),
+ -1, 0);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+
+ /* Align THP allocation up to start of a huge page. */
+ region->host_mem = align(region->mmap_start,
+ src_type == VM_MEM_SRC_ANONYMOUS_THP
+ ? huge_page_size : 1);
+
+ /* As needed perform madvise */
+ if ((src_type == VM_MEM_SRC_ANONYMOUS)
+ || (src_type == VM_MEM_SRC_ANONYMOUS_THP)) {
+ rv = madvise(region->host_mem, npages * vm->page_size,
+ (src_type == VM_MEM_SRC_ANONYMOUS)
+ ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(rv == 0, "madvise failed,\n"
+ " addr: %p\n"
+ " length: 0x%lx\n"
+ " src_type: %x",
+ region->host_mem, npages * vm->page_size,
+ src_type);
+ }
+
+ break;
+
+ case VM_MEM_SRC_FD_PRIVATE:
+ region->caller_memory = false;
+ region->fd = backing_src->fd_private.fd;
+ region->offset = backing_src->fd_private.offset;
+ region->mmap_size = npages * vm->page_size;
+ region->mmap_start = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ region->fd, region->offset);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+ region->host_mem = region->mmap_start;
+ break;
+
+ case VM_MEM_SRC_DIR:
+ TEST_ASSERT(backing_src != NULL,
+ "Unexpected NULL backing_src for VM_MEM_SRC_DIR");
+ /* TODO(lhuemill): implement VM_MEM_SRC_DIR backing src. */
+ break;
+
+ case VM_MEM_SRC_CALLER_MAINTAINED:
+ region->caller_memory = true;
+ TEST_ASSERT(backing_src != NULL,
+ "Unexpected NULL backing_src for "
+ "VM_MEM_SRC_CALLER_MAINTAINED");
+ region->host_mem = backing_src->caller_maintained.mem_start;
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown backing source, src_type: 0x%i",
+ src_type);
+ /* NOT REACHED */
+ }
+
+ region->unused_phy_pages = test_sparsebit_alloc();
+ test_sparsebit_set_num(region->unused_phy_pages,
+ guest_paddr / vm->page_size, npages);
+ region->region.slot = slot;
+ region->region.flags = flags;
+ region->region.guest_phys_addr = guest_paddr;
+ region->region.memory_size = npages * vm->page_size;
+ region->region.userspace_addr = (uintptr_t) region->host_mem;
+ rv = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region);
+ TEST_ASSERT(rv == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rv: %i errno: %i\n"
+ " slot: %u flags: 0x%x\n"
+ " guest_phys_addr: 0x%lx size: 0x%lx",
+ rv, errno, slot, flags,
+ guest_paddr, (uint64_t) region->region.memory_size);
+
+ /* Add to linked-list of memory regions. */
+ if (vm->userspace_mem_region_head)
+ vm->userspace_mem_region_head->prev = region;
+ region->next = vm->userspace_mem_region_head;
+ vm->userspace_mem_region_head = region;
+}
+
+/* VM Memory Region Flags Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(kvm_util_vm_t *vm, uint32_t slot, uint32_t flags)
+{
+ int rv;
+ struct userspace_mem_region *region;
+
+ /* Locate memory region. */
+ region = memslot2region(vm, slot);
+
+ region->region.flags = flags;
+
+ rv = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region);
+
+ TEST_ASSERT(rv == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rv: %i errno: %i slot: %u flags: 0x%x",
+ rv, errno, slot, flags);
+}
+
+/* VM VCPU Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates and adds to the VM specified by vm and virtual CPU with
+ * the ID given by vcpuid.
+ */
+void vm_vcpu_add(kvm_util_vm_t *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu;
+ struct kvm_sregs sregs;
+
+ /* Confirm a vcpu with the specified id doesn't already exist. */
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ if (vcpu != NULL)
+ TEST_ASSERT(false, "vcpu with the specified id "
+ "already exists,\n"
+ " requested vcpuid: %u\n"
+ " existing vcpuid: %u state: %p",
+ vcpuid, vcpu->id, vcpu->state);
+
+ /* Allocate and initialize new vcpu structure. */
+ vcpu = calloc(1, sizeof(*vcpu));
+ TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+ vcpu->id = vcpuid;
+ vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+ TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rv: %i errno: %i",
+ vcpu->fd, errno);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ vcpu_mmap_sz(), sizeof(*vcpu->state));
+ vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+ TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+ "vcpu id: %u errno: %i", vcpuid, errno);
+
+ /* Add to linked-list of VCPUs. */
+ if (vm->vcpu_head)
+ vm->vcpu_head->prev = vcpu;
+ vcpu->next = vm->vcpu_head;
+ vm->vcpu_head = vcpu;
+
+ /* Set mode specific system register values. */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ setUnusableSegment(&sregs.ldt);
+ setLongModeFlatKernelCodeSegment(0x8, &sregs.cs);
+ setLongModeFlatKernelDataSegment(0x10, &sregs.ds);
+ setLongModeFlatKernelDataSegment(0x10, &sregs.es);
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+
+ /* If virtual translation table have been setup, set system register
+ * to point to the tables. It's okay if they haven't been setup yet,
+ * in that the code that sets up the virtual translation tables, will
+ * go back through any VCPUs that have already been created and set
+ * their values.
+ */
+ if (vm->virt_l4_created) {
+ struct kvm_sregs sregs;
+
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+
+ sregs.cr3 = vm->virt_l4;
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+ }
+}
+
+/* VM VCPU Remove
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ */
+void vm_vcpu_rm(kvm_util_vm_t *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+
+ int rv = close(vcpu->fd);
+ TEST_ASSERT(rv == 0, "Close of VCPU fd failed, rv: %i "
+ "errno: %i", rv, errno);
+
+ if (vcpu->next)
+ vcpu->next->prev = vcpu->prev;
+ if (vcpu->prev)
+ vcpu->prev->next = vcpu->next;
+ else
+ vm->vcpu_head = vcpu->next;
+ free(vcpu);
+}
+
+/* VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size (bytes)
+ * vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Lowest virtual address at or below vaddr_min, with at least
+ * sz unused bytes. TEST_ASSERT failure if no area of at least
+ * size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes. A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+vm_vaddr_t vm_vaddr_unused_gap(const kvm_util_vm_t *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
+{
+ uint64_t pages = (sz + (vm->page_size - 1)) / vm->page_size;
+
+ /* Determine lowest permitted virtual page index. */
+ uint64_t pgidx_start = (vaddr_min + (vm->page_size - 1))
+ / vm->page_size;
+ if ((pgidx_start * vm->page_size) < vaddr_min)
+ goto no_va_found;
+
+ /* Loop over section with enough valid virtual page indexes. */
+ if (!test_sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages))
+ pgidx_start = test_sparsebit_next_set_num(vm->vpages_valid,
+ pgidx_start, pages);
+ do {
+ /*
+ * Are there enough unused virtual pages available at
+ * the currently proposed starting virtual page index.
+ * If not, adjust proposed starting index to next
+ * possible.
+ */
+ if (test_sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages))
+ goto va_found;
+ pgidx_start = test_sparsebit_next_clear_num(vm->vpages_mapped,
+ pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+
+ /*
+ * If needed, adjust proposed starting virtual address,
+ * to next range of valid virtual addresses.
+ */
+ if (!test_sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages)) {
+ pgidx_start = test_sparsebit_next_set_num(
+ vm->vpages_valid, pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+ }
+ } while (pgidx_start != 0);
+
+no_va_found:
+ TEST_ASSERT(false, "No vaddr of specified pages available, "
+ "pages: 0x%lx", pages);
+
+ /* NOT REACHED */
+ return -1;
+
+va_found:
+ TEST_ASSERT(test_sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages),
+ "Unexpected, invalid virtual page index range,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+ TEST_ASSERT(test_sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages),
+ "Unexpected, pages already mapped,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+
+ return pgidx_start * vm->page_size;
+}
+
+/* VM Virtual Address Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size in bytes
+ * vaddr_min - Minimum starting virtual address
+ * data_memslot - Memory region slot for data pages
+ * vttbl_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm. The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min. Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(kvm_util_vm_t *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t vttbl_memslot)
+{
+ uint64_t pages = (sz / vm->page_size) + ((sz % vm->page_size) != 0);
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ /* If needed, create page map l4 table. */
+ if (!vm->virt_l4_created) {
+ vm_paddr_t paddr = phy_page_alloc(vm,
+ KVM_UTIL_VIRT_MIN_PADDR, vttbl_memslot);
+ vm->virt_l4 = paddr;
+
+ /* Set pointer to virt_l4 tables in all the VCPUs that
+ * have already been created. Future VCPUs will have
+ * the value set as each one is created.
+ */
+ for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
+ vcpu = vcpu->next) {
+ struct kvm_sregs sregs;
+
+ /* Obtain the current system register settings */
+ vcpu_sregs_get(vm, vcpu->id, &sregs);
+
+ /* Set and store the pointer to the start of the
+ * virt_l4 tables.
+ */
+ sregs.cr3 = vm->virt_l4;
+ vcpu_sregs_set(vm, vcpu->id, &sregs);
+ }
+
+ vm->virt_l4_created = true;
+ }
+
+ /* Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size) {
+ vm_paddr_t paddr;
+
+ paddr = phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+
+ virt_pg_map(vm, vaddr, paddr, vttbl_memslot);
+
+ test_sparsebit_set(vm->vpages_mapped,
+ vaddr / vm->page_size);
+ }
+
+ return vaddr_start;
+}
+
+/* Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vmphy - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by vmphy, within the VM given by vm. When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing vmphy exists.
+ */
+host_vaddr_t addr_vmphy2hvirt(const kvm_util_vm_t *vm, vm_paddr_t vmphy)
+{
+ for (struct userspace_mem_region *region
+ = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((vmphy >= region->region.guest_phys_addr)
+ && (vmphy <= (region->region.guest_phys_addr
+ + region->region.memory_size - 1)))
+ return (host_vaddr_t) ((uintptr_t) region->host_mem
+ + (vmphy - region->region.guest_phys_addr));
+ }
+
+ TEST_ASSERT(false, "No vm physical memory at 0x%lx", vmphy);
+ return NULL;
+}
+
+/* Address VM Virtual to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vmphy - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ *
+ * Translates the VM virtual address given by vmvirt to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm. When found, the host
+ * virtual address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+host_vaddr_t addr_vmvirt2hvirt(const kvm_util_vm_t *vm, vm_vaddr_t vmvirt)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+ struct pageDirectoryPointerEntry *pdpe;
+ struct pageDirectoryEntry *pde;
+ struct pageTableEntry *pte;
+ host_vaddr_t hvirt;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ index[0] = (vmvirt >> 12) & 0x1ffu;
+ index[1] = (vmvirt >> 21) & 0x1ffu;
+ index[2] = (vmvirt >> 30) & 0x1ffu;
+ index[3] = (vmvirt >> 39) & 0x1ffu;
+
+ if (!vm->virt_l4_created)
+ goto unmapped_vmvirt;
+ pml4e = addr_vmphy2hvirt(vm, vm->virt_l4);
+ if (!pml4e[index[3]].present)
+ goto unmapped_vmvirt;
+
+ pdpe = addr_vmphy2hvirt(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present)
+ goto unmapped_vmvirt;
+
+ pde = addr_vmphy2hvirt(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present)
+ goto unmapped_vmvirt;
+
+ pte = addr_vmphy2hvirt(vm, pde[index[1]].address * vm->page_size);
+ if (!pte[index[0]].present)
+ goto unmapped_vmvirt;
+
+ hvirt = addr_vmphy2hvirt(vm, pte[index[0]].address * vm->page_size);
+
+ return hvirt + (vmvirt & 0xfffu);
+
+unmapped_vmvirt:
+ TEST_ASSERT(false, "No mapping for vm virtual address, "
+ "vmvirt: 0x%lx", vmvirt);
+ return NULL;
+}
+
+/* Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * hvirt - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hvirt, within the VM given by vm. When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hvirt exists.
+ */
+vm_paddr_t addr_hvirt2vmphy(const kvm_util_vm_t *vm, host_vaddr_t hvirt)
+{
+ for (struct userspace_mem_region *region
+ = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((hvirt >= region->host_mem)
+ && (hvirt <= (region->host_mem
+ + region->region.memory_size - 1)))
+ return (vm_paddr_t) ((uintptr_t)
+ region->region.guest_phys_addr
+ + (hvirt - (uintptr_t) region->host_mem));
+ }
+
+ TEST_ASSERT(false, "No mapping to a guest physical address, "
+ "hvirt: %p", hvirt);
+ return -1;
+}
+
+/* Address VM Virtual to VM Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vmvirt - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ */
+vm_paddr_t addr_vmvirt2vmphy(const kvm_util_vm_t *vm, vm_vaddr_t vmvirt)
+{
+ host_vaddr_t hvirt = addr_vmvirt2hvirt(vm, vmvirt);
+
+ return addr_hvirt2vmphy(vm, hvirt);
+}
+
+/* VM Create IRQ Chip
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(kvm_util_vm_t *vm)
+{
+ int rv;
+
+ rv = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+ TEST_ASSERT(rv == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+ "rv: %i errno: %i", rv, errno);
+}
+
+/* VM VCPU State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(const kvm_util_vm_t *vm, uint32_t vcpuid)
+{
+ const struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return vcpu->state;
+}
+
+/* VM VCPU Run
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(kvm_util_vm_t *vm, uint32_t vcpuid)
+{
+ int rv;
+ const struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ do {
+ rv = ioctl(vcpu->fd, KVM_RUN, NULL);
+ } while (rv == -1 && errno == EINTR);
+ TEST_ASSERT(rv == 0, "KVM_RUN IOCTL failed, "
+ "rv: %i errno: %i", rv, errno);
+}
+
+/* VM VCPU Set MP State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(kvm_util_vm_t *vm, uint32_t vcpuid,
+ const struct kvm_mp_state *mp_state)
+{
+ int rv;
+ const struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ rv = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+ TEST_ASSERT(rv == 0, "KVM_SET_MP_STATE IOCTL failed, "
+ "rv: %i errno: %i", rv, errno);
+}
+
+/* VM VCPU Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(const kvm_util_vm_t *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ int rv;
+ const struct vcpu *vcpu;
+
+ vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ rv = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+ TEST_ASSERT(rv == 0, "KVM_GET_REGS failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+/* VM VCPU Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(kvm_util_vm_t *vm,
+ uint32_t vcpuid, const struct kvm_regs *regs)
+{
+ int rv;
+ struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ rv = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+ TEST_ASSERT(rv == 0, "KVM_SET_REGS failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+void vcpu_events_get(const kvm_util_vm_t *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ int rv;
+ const struct vcpu *vcpu;
+
+ vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ rv = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+ TEST_ASSERT(rv == 0, "KVM_GET_VCPU_EVENTS, failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+void vcpu_events_set(kvm_util_vm_t *vm, uint32_t vcpuid,
+ const struct kvm_vcpu_events *events)
+{
+ int rv;
+ struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ rv = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+ TEST_ASSERT(rv == 0, "KVM_SET_VCPU_EVENTS, failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args. Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(kvm_util_vm_t *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT((num >= 1) && (num <= 6), "Unsupported number of args,\n"
+ " num: %u\n"
+ " expected: (num >= 1) && (num <= 6)",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vm, vcpuid, ®s);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vm, vcpuid, ®s);
+ va_end(ap);
+}
+
+/* VM VCPU System Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(const kvm_util_vm_t *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ int rv;
+ const struct vcpu *vcpu;
+
+ vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ rv = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+ TEST_ASSERT(rv == 0, "KVM_GET_SREGS failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+/* VM VCPU System Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(kvm_util_vm_t *vm,
+ uint32_t vcpuid, const struct kvm_sregs *sregs)
+{
+ int rv;
+ struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the sregs. */
+ rv = ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+ TEST_ASSERT(rv == 0, "KVM_SET_SREGS failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+/* VCPU Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(kvm_util_vm_t *vm,
+ uint32_t vcpuid, unsigned long cmd, void *arg)
+{
+ int rv;
+ struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ rv = ioctl(vcpu->fd, cmd, arg);
+ TEST_ASSERT(rv == 0, "vcpu ioctl %lu failed, rv: %i errno: %i (%s)",
+ cmd, rv, errno, strerror(errno));
+}
+
+/* VM Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(kvm_util_vm_t *vm, unsigned long cmd, void *arg)
+{
+ int rv;
+
+ rv = ioctl(vm->fd, cmd, arg);
+ TEST_ASSERT(rv == 0, "vm ioctl %lu failed, rv: %i errno: %i (%s)",
+ cmd, rv, errno, strerror(errno));
+}
+
+/* VM VCPU xcr Regs Get
+ *
+ * Output Args:
+ * xcrs - Values of VCPU xcr regs
+ *
+ * Return: None
+ *
+ * Gets the xcr regs of the VCPU specified by vcpuid.
+ */
+void vcpu_xcrs_get(kvm_util_vm_t *vm,
+ uint32_t vcpuid, struct kvm_xcrs *xcrs)
+{
+ int rv;
+ struct vcpu *vcpu;
+
+ TEST_ASSERT(kvm_util_cap(KVM_CAP_XCRS),
+ "KVM does not support KVM_CAP_XCRS. Bailing.\n");
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the xcrs. */
+ rv = ioctl(vcpu->fd, KVM_GET_XCRS, xcrs);
+ TEST_ASSERT(rv == 0, "KVM_GET_XCRS failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+/* VM VCPU xcr Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * xcrs - Values to set VCPU xcr regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the xcr regs of the VCPU specified by vcpuid to the values
+ * given by xcrs.
+ */
+void vcpu_xcrs_set(kvm_util_vm_t *vm,
+ uint32_t vcpuid, const struct kvm_xcrs *xcrs)
+{
+ int rv;
+ struct vcpu *vcpu;
+
+ vcpu = (struct vcpu *) vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the xcrs. */
+ rv = ioctl(vcpu->fd, KVM_SET_XCRS, xcrs);
+ TEST_ASSERT(rv == 0, "KVM_SET_XCRS failed, rv: %i errno: %i",
+ rv, errno);
+}
+
+/* VM Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, const kvm_util_vm_t *vm, uint8_t indent)
+{
+ fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+ fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+ fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+ fprintf(stream, "%*sMem Regions:\n", indent, "");
+ for (struct userspace_mem_region *region
+ = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+ "host_virt: %p\n", indent + 2, "",
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size,
+ region->host_mem);
+ fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+ test_sparsebit_dump(stream, region->unused_phy_pages, 0);
+ }
+ fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+ test_sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+ fprintf(stream, "%*svirt_l4_created: %u\n", indent, "",
+ vm->virt_l4_created);
+ if (vm->virt_l4_created) {
+ fprintf(stream, "%*sVirtual Translation Tables:\n",
+ indent + 2, "");
+ virt_dump(stream, vm, indent + 4);
+ }
+ fprintf(stream, "%*sVCPUs:\n", indent, "");
+ for (struct vcpu *vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* VM VCPU Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, const kvm_util_vm_t *vm,
+ uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vm, vcpuid, ®s);
+ regs_dump(stream, ®s, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
+/* Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * regs - register
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by regs, to the FILE stream
+ * given by steam.
+ */
+void regs_dump(FILE *stream, const struct kvm_regs *regs,
+ uint8_t indent)
+{
+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+ indent, "",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+ indent, "",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
+ "r10: 0x%.16llx r11: 0x%.16llx\n",
+ indent, "",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+ "r14: 0x%.16llx r15: 0x%.16llx\n",
+ indent, "",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+ indent, "",
+ regs->rip, regs->rflags);
+}
+
+/* Segment Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * segment - KVM segment
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by segment, to the FILE stream
+ * given by steam.
+ */
+void segment_dump(FILE *stream, const struct kvm_segment *segment,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+ "selector: 0x%.4x type: 0x%.2x\n",
+ indent, "", segment->base, segment->limit,
+ segment->selector, segment->type);
+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+ indent, "", segment->present, segment->dpl,
+ segment->db, segment->s, segment->l);
+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+ "unusable: 0x%.2x padding: 0x%.2x\n",
+ indent, "", segment->g, segment->avl,
+ segment->unusable, segment->padding);
+}
+
+/* dtable Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * dtable - KVM dtable
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream
+ * given by steam.
+ */
+void dtable_dump(FILE *stream, const struct kvm_dtable *dtable,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+ indent, "", dtable->base, dtable->limit,
+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+/* System Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * sregs - System registers
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by sregs, to the FILE stream
+ * given by steam.
+ */
+void sregs_dump(FILE *stream, const struct kvm_sregs *sregs,
+ uint8_t indent)
+{
+ unsigned int i;
+
+ fprintf(stream, "%*scs:\n", indent, "");
+ segment_dump(stream, &sregs->cs, indent + 2);
+ fprintf(stream, "%*sds:\n", indent, "");
+ segment_dump(stream, &sregs->ds, indent + 2);
+ fprintf(stream, "%*ses:\n", indent, "");
+ segment_dump(stream, &sregs->es, indent + 2);
+ fprintf(stream, "%*sfs:\n", indent, "");
+ segment_dump(stream, &sregs->fs, indent + 2);
+ fprintf(stream, "%*sgs:\n", indent, "");
+ segment_dump(stream, &sregs->gs, indent + 2);
+ fprintf(stream, "%*sss:\n", indent, "");
+ segment_dump(stream, &sregs->ss, indent + 2);
+ fprintf(stream, "%*str:\n", indent, "");
+ segment_dump(stream, &sregs->tr, indent + 2);
+ fprintf(stream, "%*sldt:\n", indent, "");
+ segment_dump(stream, &sregs->ldt, indent + 2);
+
+ fprintf(stream, "%*sgdt:\n", indent, "");
+ dtable_dump(stream, &sregs->gdt, indent + 2);
+ fprintf(stream, "%*sidt:\n", indent, "");
+ dtable_dump(stream, &sregs->idt, indent + 2);
+
+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+ indent, "",
+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+ "apic_base: 0x%.16llx\n",
+ indent, "",
+ sregs->cr8, sregs->efer, sregs->apic_base);
+
+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+ sregs->interrupt_bitmap[i]);
+ }
+}
+
+/* Known KVM exit reasons */
+struct exit_reason {
+ unsigned int reason;
+ const char *name;
+} exit_reasons_known[] = {
+ {KVM_EXIT_UNKNOWN, "UNKNOWN"},
+ {KVM_EXIT_EXCEPTION, "EXCEPTION"},
+ {KVM_EXIT_IO, "IO"},
+ {KVM_EXIT_HYPERCALL, "HYPERCALL"},
+ {KVM_EXIT_DEBUG, "DEBUG"},
+ {KVM_EXIT_HLT, "HLT"},
+ {KVM_EXIT_MMIO, "MMIO"},
+ {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+ {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+ {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+ {KVM_EXIT_INTR, "INTR"},
+ {KVM_EXIT_SET_TPR, "SET_TPR"},
+ {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+ {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+ {KVM_EXIT_S390_RESET, "S390_RESET"},
+ {KVM_EXIT_DCR, "DCR"},
+ {KVM_EXIT_NMI, "NMI"},
+ {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+ {KVM_EXIT_OSI, "OSI"},
+ {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+ {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/* Exit Reason String
+ *
+ * Input Args:
+ * exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason. If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+ unsigned int n1;
+
+ for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+ if (exit_reason == exit_reasons_known[n1].reason)
+ return exit_reasons_known[n1].name;
+ }
+
+ return "Unknown";
+}
+
+/* Exit Reason Value
+ *
+ * Input Args:
+ * name - exit reason string
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent exit reason value or -1 if no equivalent exit value is
+ * found.
+ *
+ * Searches for a KVM exit reason with a string name equal to name and if
+ * found returns the value of that exit reason. A value of -1 is returned
+ * if no exit reason with the given name is found.
+ */
+int exit_reason_val(const char *name)
+{
+ for (unsigned int n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+ if (strcmp(exit_reasons_known[n1].name, name) == 0)
+ return exit_reasons_known[n1].reason;
+ }
+
+ return -1;
+}
+
+/* Exit Reasons List
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Displays to the FILE stream given by stream, a list of all known
+ * exit reasons.
+ */
+void exit_reasons_list(FILE *stream, unsigned int indent)
+{
+ for (unsigned int n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+ fprintf(stream, "%*s%s\n",
+ indent, "", exit_reasons_known[n1].name);
+ }
+}
+
+/* VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * vttbl_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(kvm_util_vm_t *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t vttbl_memslot)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x",
+ vaddr, vm->page_size);
+ TEST_ASSERT(test_sparsebit_is_set(vm->vpages_valid,
+ (vaddr / vm->page_size)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->page_size);
+ TEST_ASSERT((paddr / vm->page_size) <= vm->ppgidx_max,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->ppgidx_max: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->ppgidx_max, vm->page_size);
+
+ index[0] = (vaddr >> 12) & 0x1ffu;
+ index[1] = (vaddr >> 21) & 0x1ffu;
+ index[2] = (vaddr >> 30) & 0x1ffu;
+ index[3] = (vaddr >> 39) & 0x1ffu;
+
+ /* Allocate page directory pointer table if not present. */
+ pml4e = addr_vmphy2hvirt(vm, vm->virt_l4);
+ if (!pml4e[index[3]].present) {
+ pml4e[index[3]].address = phy_page_alloc(vm,
+ KVM_UTIL_VIRT_MIN_PADDR, vttbl_memslot)
+ / vm->page_size;
+ pml4e[index[3]].writable = true;
+ pml4e[index[3]].present = true;
+ }
+
+ /* Allocate page directory table if not present. */
+ struct pageDirectoryPointerEntry *pdpe;
+ pdpe = addr_vmphy2hvirt(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present) {
+ pdpe[index[2]].address = phy_page_alloc(vm,
+ KVM_UTIL_VIRT_MIN_PADDR, vttbl_memslot)
+ / vm->page_size;
+ pdpe[index[2]].writable = true;
+ pdpe[index[2]].present = true;
+ }
+
+ /* Allocate page table if not present. */
+ struct pageDirectoryEntry *pde;
+ pde = addr_vmphy2hvirt(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present) {
+ pde[index[1]].address = phy_page_alloc(vm,
+ KVM_UTIL_VIRT_MIN_PADDR, vttbl_memslot)
+ / vm->page_size;
+ pde[index[1]].writable = true;
+ pde[index[1]].present = true;
+ }
+
+ /* Fill in page table entry. */
+ struct pageTableEntry *pte;
+ pte = addr_vmphy2hvirt(vm, pde[index[1]].address * vm->page_size);
+ pte[index[0]].address = paddr / vm->page_size;
+ pte[index[0]].writable = true;
+ pte[index[0]].present = 1;
+}
+
+/* Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the contents of all the
+ * virtual translation tables for the VM given by vm.
+ */
+void virt_dump(FILE *stream, const kvm_util_vm_t *vm, uint8_t indent)
+{
+ struct pageMapL4Entry *pml4e, *pml4e_start;
+ struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+ struct pageDirectoryEntry *pde, *pde_start;
+ struct pageTableEntry *pte, *pte_start;
+
+ if (!vm->virt_l4_created)
+ return;
+
+ fprintf(stream, "%*s "
+ " no\n", indent, "");
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+ pml4e_start = (struct pageMapL4Entry *) addr_vmphy2hvirt(vm,
+ vm->virt_l4);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+ if (!pml4e->present)
+ continue;
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+ addr_hvirt2vmphy(vm, pml4e), (uint64_t) pml4e->address,
+ pml4e->writable, pml4e->execute_disable);
+
+ pdpe_start = addr_vmphy2hvirt(vm, pml4e->address
+ * vm->page_size);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+ if (!pdpe->present)
+ continue;
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hvirt2vmphy(vm, pdpe),
+ (uint64_t) pdpe->address, pdpe->writable,
+ pdpe->execute_disable);
+
+ pde_start = addr_vmphy2hvirt(vm,
+ pdpe->address * vm->page_size);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+ if (!pde->present)
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hvirt2vmphy(vm, pde),
+ (uint64_t) pde->address, pde->writable,
+ pde->execute_disable);
+
+ pte_start = addr_vmphy2hvirt(vm,
+ pde->address * vm->page_size);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+ if (!pte->present)
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hvirt2vmphy(vm, pte),
+ (uint64_t) pte->address,
+ pte->writable,
+ pte->execute_disable,
+ pte->dirty,
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+ | ((uint64_t) n4));
+ }
+ }
+ }
+ }
+}
+
+/* Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by segp to an unusable state.
+ */
+void setUnusableSegment(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->unusable = true;
+}
+
+/* Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a code segment
+ * with the selector value given by selector.
+ */
+void setLongModeFlatKernelCodeSegment(uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+ * | kFlagCodeReadable
+ */
+ segp->g = true;
+ segp->l = true;
+ segp->present = 1;
+}
+
+/* Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a data segment
+ * with the selector value given by selector.
+ */
+void setLongModeFlatKernelDataSegment(uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+ * | kFlagDataWritable
+ */
+ segp->g = true;
+ segp->present = true;
+}
+
+/* VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+ int dev_fd, rv;
+
+ dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+ TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rv: %i errno: %i",
+ __func__, KVM_DEV_PATH, dev_fd, errno);
+
+ rv = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ TEST_ASSERT(rv >= sizeof(struct kvm_run),
+ "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rv: %i errno: %i",
+ __func__, rv, errno);
+
+ close(dev_fd);
+
+ return rv;
+}
+
+/* Huge TLB Supported
+ *
+ * Returns true iff the given parameters specify a condition that the
+ * current platform is able to map via one or more huge TLB entries.
+ * see: ./Documentation/vm/hugetlbpage.txt
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * npages - number of regular pages (_SC_PAGESIZE bytes each)
+ */
+static bool hugetlb_supported(const kvm_util_vm_t *vm, uint64_t npages)
+{
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG,
+ "Unknown VM mode, vm->mode: 0x%x", vm->mode);
+
+ if ((npages % KVM_UTIL_PGS_PER_HUGEPG) != 0)
+ return false;
+
+ return true;
+}
+
+/* Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive. If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned. Null is returned only when no overlapping
+ * region exists.
+ */
+static const struct userspace_mem_region *userspace_mem_region_find(
+ const kvm_util_vm_t *vm, uint64_t start, uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ uint64_t existing_start = region->region.guest_phys_addr;
+ uint64_t existing_end = region->region.guest_phys_addr
+ + region->region.memory_size - 1;
+ if ((start <= existing_end) && (end >= existing_start))
+ return region;
+ }
+
+ return NULL;
+}
+
+/* KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+const struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(const kvm_util_vm_t *vm, uint64_t start,
+ uint64_t end)
+{
+ const struct userspace_mem_region *region;
+
+ region = userspace_mem_region_find(vm, start, end);
+ if (!region)
+ return NULL;
+
+ return ®ion->region;
+}
+
+/* VCPU Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+static const struct vcpu *vcpu_find(const kvm_util_vm_t *vm,
+ uint32_t vcpuid)
+{
+ struct vcpu *vcpup;
+
+ for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
+ if (vcpup->id == vcpuid)
+ return vcpup;
+ }
+
+ return NULL;
+}
+
+/* Physical Page Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * paddr_min - Physical address minimum
+ * memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting physical address
+ *
+ * Within the VM specified by vm, locates an available physical page
+ * at or above paddr_min. If found, the page is marked as in use
+ * and its address is returned. A TEST_ASSERT failure occurs if no
+ * page is available at or above paddr_min.
+ */
+static vm_paddr_t phy_page_alloc(kvm_util_vm_t *vm,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+ test_sparsebit_idx_t pg;
+
+ TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+ "not divisable by page size.\n"
+ " paddr_min: 0x%lx page_size: 0x%x",
+ paddr_min, vm->page_size);
+
+ /* Locate memory region. */
+ region = memslot2region(vm, memslot);
+
+ /* Locate next available physical page at or above paddr_min. */
+ pg = paddr_min / vm->page_size;
+
+ if (!test_sparsebit_is_set(region->unused_phy_pages, pg)) {
+ pg = test_sparsebit_next_set(region->unused_phy_pages, pg);
+ if (pg == 0) {
+ fprintf(stderr, "No guest physical page available, "
+ "paddr_min: 0x%lx page_size: 0x%x memslot: %u",
+ paddr_min, vm->page_size, memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_ASSERT(false, "No guest physical page available");
+ }
+ }
+
+ /* Specify page as in use and return its address. */
+ test_sparsebit_clear(region->unused_phy_pages, pg);
+
+ return pg * vm->page_size;
+}
+
+/* Memslot to region
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to memory region structure that describe memory region
+ * using kvm memory slot ID given by memslot. TEST_ASSERT failure
+ * on error (e.g. currently no memory region using memslot as a KVM
+ * memory slot ID).
+ */
+static struct userspace_mem_region *memslot2region(kvm_util_vm_t *vm,
+ uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == memslot)
+ break;
+ }
+ if (region == NULL) {
+ fprintf(stderr, "No mem region with the requested slot found,\n"
+ " requested slot: %u\n", memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_ASSERT(false, "Mem region not found");
+ }
+
+ return region;
+}
+
+/*
+ * Reads a VCPU array from /proc/self/kvm.
+ *
+ * Input Args:
+ * name: The field to retrieve.
+ * index: The index of the VCPU array to read.
+ * out: The output array
+ * len: The capacity of the output array
+ */
+void vcpu_read_proc_array(const char *name, int index, uint64_t *out, int len)
+{
+ int r;
+ FILE *fp = fopen("/proc/self/kvm", "r");
+ TEST_ASSERT(fp, "Failed to open /proc/self/kvm with errno %d.", errno);
+
+ for (;;) {
+ char *field;
+ int i;
+
+ r = fscanf(fp, "%ms : ", &field);
+ TEST_ASSERT(r == 1,
+ "Read %d items (errno=%d). Was looking for '%s'.",
+ r, errno, name);
+
+ r = strcmp(name, field);
+ free(field);
+ if (r) {
+ r = fscanf(fp, "%*[^\n]\n");
+ TEST_ASSERT(r == 0, "Failed to scan to end of line.");
+ continue;
+ }
+
+ for (i = 0; i < index; i++) {
+ r = fscanf(fp, "%*[^ ] ");
+ TEST_ASSERT(r == 0, "Failed to scan for index %d.", i);
+ }
+
+ for (i = 0; i < len; i++) {
+ uint64_t x;
+ r = fscanf(fp, "%" SCNu64 "%*[,\n ]", &x);
+ TEST_ASSERT(r == 1,
+ "Array only had %d item(s). Needed %d.",
+ i, len);
+ out[i] = x;
+ }
+
+ r = fclose(fp);
+ TEST_ASSERT(r == 0,
+ "Failed to close /proc/self/kvm with errno %d.",
+ errno);
+ return;
+ }
+
+ /* NOT REACHED */
+}
+
+void vm_read_proc_array(const char *name, uint64_t *out, int len)
+{
+ vcpu_read_proc_array(name, 0, out, len);
+}
+
+/*
+ * Reads a VCPU field from /proc/self/kvm.
+ *
+ * Input Args:
+ * name: The field to retrieve.
+ * index: The index of the VCPU field to read.
+ *
+ * Output Args: None.
+ * Return: The field's value.
+ */
+uint64_t vcpu_read_proc_field(const char *name, int index)
+{
+ uint64_t data;
+
+ vcpu_read_proc_array(name, index, &data, 1);
+ return data;
+}
+
+/*
+ * Reads a VM field from /proc/self/kvm. Can't be used with per-vCPU fields.
+ *
+ * Input Args:
+ * name: The field to retrieve.
+ *
+ * Output Args: None.
+ * Return: The field's value.
+ */
+uint64_t vm_read_proc_field(const char *name)
+{
+ return vcpu_read_proc_field(name, 0);
+}
+
+/* VM Create Device
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cd - Create Device
+ *
+ * Output Args: device fd in cd->fd
+ *
+ * Return: 0/success errno/failure
+ *
+ * Creates an emulated device in the kernel.
+ */
+int vm_create_device(const kvm_util_vm_t *vm, struct kvm_create_device *cd)
+{
+ int rv;
+
+ rv = ioctl(vm->fd, KVM_CREATE_DEVICE, cd);
+ if (rv)
+ return errno;
+ return 0;
+}
diff --git a/gtests/lib/test_sparsebit.c b/gtests/lib/test_sparsebit.c
new file mode 100644
index 0000000..93d30b6
--- /dev/null
+++ b/gtests/lib/test_sparsebit.c
@@ -0,0 +1,2793 @@
+/*
+ * gtests/lib/test_sparsebit.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Test Sparsebit Library
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64. A sparsebit array is allocated through
+ * the use test_sparsebit_alloc() and free'd via test_sparsebit_free(),
+ * such as in the following:
+ *
+ * test_sparsebit_t *s;
+ * s = test_sparsebit_alloc();
+ * test_sparsebit_free(&s);
+ *
+ * The test_sparsebit_t type resolves down to a struct test_sparsebit.
+ * Note that, test_sparsebit_free() takes a pointer to the test_sparsebit
+ * structure. This is so that test_sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct test_sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of test_sparsebit_alloc() and the call of
+ * test_sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated test sparsebit array. All of
+ * these operations take as a parameter the value returned from
+ * test_sparsebit_alloc() and most also take a bit index. Frequently
+ * used routines include:
+ *
+ * ---- Query Operations
+ * test_sparsebit_is_set(sbit, idx)
+ * test_sparsebit_is_clear(sbit, idx)
+ * test_sparsebit_any_set(sbit)
+ * test_sparsebit_first_set(sbit)
+ * test_sparsebit_next_set(sbit, prev_idx)
+ *
+ * ---- Modifying Operations
+ * test_sparsebit_set(sbit, idx)
+ * test_sparsebit_clear(sbit, idx)
+ * test_sparsebit_set_num(sbit, idx, num);
+ * test_sparsebit_clear_num(sbit, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array. This can be done via code with the following structure:
+ *
+ * test_sparsebit_idx_t idx;
+ * if (test_sparsebit_any_set(sbit)) {
+ * idx = test_sparsebit_first_set(sbit);
+ * do {
+ * ...
+ * idx = test_sparsebit_next_set(sbit, idx);
+ * } while (idx != 0);
+ * }
+ *
+ * The index of the first bit set needs to be obtained via
+ * test_sparsebit_first_set(), because test_sparsebit_next_set(), needs
+ * the index of the previously set. The test_sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to test_sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set. This is because a TEST_ASSERT
+ * failure is produced if test_sparsebit_first_set() is called with
+ * no bits set. It is the callers responsibility to assure that the
+ * test sparsebit array has at least a single bit set before calling
+ * test_sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of test sparsebit is
+ * opaque to the caller. One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation. This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set. It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ * typedef uint64_t test_sparsebit_idx_t;
+ * typedef uint64_t test_sparsebit_num_t;
+ *
+ * test_sparsebit_idx_t idx;
+ * uint32_t mask;
+ * test_sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after. The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set. For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + Sum of bits set in all the nodes is equal to the value of
+ * the struct test_sparsebit_pvt num_set member.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ *
+ * + Node starting index is evenly divisable by the number of bits
+ * within a nodes mask member.
+ *
+ * + Nodes never represent a range of bits that wrap around the
+ * highest supported index.
+ *
+ * (idx + MASK_BITS + num_after - 1) <= ((test_sparsebit_idx_t) 0) - 1)
+ *
+ * As a consequence of the above, the num_after member of a node
+ * will always be <=:
+ *
+ * maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ * + Nodes within the binary search tree are sorted based on each
+ * nodes starting index.
+ *
+ * + The range of bits described by any two nodes do not overlap. The
+ * range of bits described by a single node is:
+ *
+ * start: node->idx
+ * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code. For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct test_sparsebit_pvt num_set member is updated. Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists. At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set. Such temporary violations must be corrected before
+ * returning to the caller. These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include <test_sparsebit.h>
+
+#include <assert.h>
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <test_util.h>
+#include <values.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+/* Concrete definition of test_sparsebit_t and definition of
+ * implementation private structures.
+ */
+struct test_sparsebit_pvt;
+struct test_sparsebit {
+ struct test_sparsebit_pvt *pimpl; /* Pointer to implementation private
+ * data.
+ */
+};
+struct node;
+typedef struct test_sparsebit_pvt {
+ struct node *root; /* Points to root node of the binary search
+ * tree. Equal to NULL when no bits are set in
+ * the entire sparsebit array.
+ */
+ test_sparsebit_num_t num_set; /* A redundant count of the total
+ * number of bits set. Used for
+ * diagnostic purposes and to change
+ * the time complexity of
+ * test_sparsebit_num_set() from
+ * O(n) to O(1).
+ * Note: Due to overflow, a value of
+ * 0 means none or all set.
+ */
+} pvt_t;
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHARBITS)
+typedef struct node {
+ struct node *parent;
+ struct node *left;
+ struct node *right;
+ test_sparsebit_idx_t idx; /* index of least-significant bit in mask */
+ test_sparsebit_num_t num_after; /* num contiguously set after mask */
+ mask_t mask;
+} node_t;
+
+/* File Scope Function Prototypes */
+static test_sparsebit_num_t node_num_set(const node_t *nodep);
+static node_t *node_copy_subtree(const node_t *subtree);
+static node_t *node_find(pvt_t *s, test_sparsebit_idx_t idx);
+static const node_t *node_find_const(const pvt_t *s, test_sparsebit_idx_t idx);
+static node_t *node_add(pvt_t *s, test_sparsebit_idx_t idx);
+static void node_rm(pvt_t *s, node_t *nodep);
+static node_t *node_split(pvt_t *s, test_sparsebit_idx_t idx);
+static const node_t *node_first_const(const pvt_t *s);
+static node_t *node_next(pvt_t *s, node_t *n);
+static const node_t *node_next_const(const pvt_t *s, const node_t *n);
+static node_t *node_prev(pvt_t *s, node_t *n);
+static bool all_set(const pvt_t *s);
+static bool is_set(const pvt_t *s, test_sparsebit_idx_t idx);
+static void bit_set(pvt_t *s, test_sparsebit_idx_t idx);
+static void bit_clear(pvt_t *s, test_sparsebit_idx_t idx);
+static void node_reduce(pvt_t *s, node_t *nodep);
+static size_t display_range(FILE *stream, test_sparsebit_idx_t low,
+ test_sparsebit_idx_t high, bool prepend_comma_space);
+static void dump_nodes(FILE *stream, const node_t *node,
+ unsigned int indent);
+
+/* Test Sparsebit Allocate
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Allocated test sparsebit array.
+ *
+ * Allocates the memory needed to maintain the initial state of
+ * a test sparsebit array. The initial state of the newly allocated
+ * sparsebit array has all bits cleared.
+ */
+test_sparsebit_t *test_sparsebit_alloc(void)
+{
+ test_sparsebit_t *s;
+
+ /* Allocate top level structure. */
+ s = calloc(1, sizeof(*s));
+ TEST_ASSERT(s != NULL, "Insufficent Memory");
+
+ /* Allocate memory, to hold implementation private data */
+ s->pimpl = calloc(1, sizeof(*s->pimpl));
+ TEST_ASSERT(s->pimpl != NULL, "Insufficent Memory");
+
+ return s;
+}
+
+/* Test Sparsebit Free
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * sbitpp - pointer to opaque sparsebit array pointer
+ *
+ * Return: None
+ *
+ * Frees the implementation dependent data for the test sparsebit array
+ * pointed to by sbitp and poisons the pointer to that data.
+ */
+void test_sparsebit_free(test_sparsebit_t **sbitp)
+{
+ pvt_t *pvt = (*sbitp)->pimpl;
+
+ if (pvt != NULL) {
+
+ /* First clear any bits already set in the destination */
+ test_sparsebit_clear(*sbitp, 0);
+ test_sparsebit_clear_num(*sbitp, 1,
+ ~((test_sparsebit_num_t) 0));
+ if (test_sparsebit_any_set(*sbitp)) {
+ fputs(" dump_internal:\n", stderr);
+ test_sparsebit_dump_internal(stderr, *sbitp, 4);
+ }
+ TEST_ASSERT((pvt->root == NULL) && (pvt->num_set == 0),
+ "Unexpected non-NULL root or num_set != 0, after "
+ "clearing all bits\n"
+ " *sbitp: %p (*sbitp)->pimpl: %p pvt->root: %p "
+ "pvt->num_set: 0x%lx",
+ *sbitp, (*sbitp)->pimpl, pvt->root, pvt->num_set);
+
+ free(pvt);
+ (*sbitp)->pimpl = NULL;
+ }
+
+ /* Free top-level structure and then posion caller's pointer to it. */
+ free(*sbitp);
+ *sbitp = NULL;
+}
+
+/* Test Sparsebit Copy
+ *
+ * Input Args:
+ * src - Source test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * dst - Destination test sparsebit array
+ *
+ * Return: None
+ *
+ * Makes a copy of the sparsebit array given by source, to the sparsebit
+ * array given by dst. Note, dst must have already been allocated via
+ * test_sparsebit_alloc(). It can though already have bit settings, which
+ * if different from src will be cleared.
+ */
+void test_sparsebit_copy(test_sparsebit_t *dstp, const test_sparsebit_t *src)
+{
+ pvt_t *d = dstp->pimpl;
+ const pvt_t *s = src->pimpl;
+
+ /* First clear any bits already set in the destination */
+ test_sparsebit_clear(dstp, 0);
+ test_sparsebit_clear_num(dstp, 1, ~((test_sparsebit_num_t) 0));
+ if (test_sparsebit_any_set(dstp)) {
+ fputs(" dump_internal src:\n", stderr);
+ test_sparsebit_dump_internal(stderr, src, 4);
+ fputs(" dump_internal dst:\n", stderr);
+ test_sparsebit_dump_internal(stderr, dstp, 4);
+ TEST_ASSERT(false, "Destination bits set after clearing "
+ "all bits");
+ }
+ TEST_ASSERT((d->root == NULL) && (d->num_set == 0),
+ "Unexpected non-NULL root or num_set != 0, after "
+ "clearing all bits\n"
+ " d: %p d->root: %p d->num_set: %lu",
+ d, d->root, d->num_set);
+
+ if (s->root) {
+ d->root = node_copy_subtree(s->root);
+ d->num_set = s->num_set;
+ }
+}
+
+/* Test Sparsebit Is Set
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * idx - Bit index
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if the bit is set, false otherwise
+ *
+ * Determines whether the bit at the index given by idx, within the
+ * test sparsebit array is set or not. Returns true if the bit is
+ * set, otherwise false is returned.
+ */
+bool test_sparsebit_is_set(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx)
+{
+ return is_set(sbit->pimpl, idx);
+}
+
+/* Test Sparsebit Is Set Num
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * idx - Bit index
+ * num - number of consecutive bits to check
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if num consecutive bits starting at idx are all set,
+ * false otherwise.
+ *
+ * Determines whether num consecutive bits starting at idx are all
+ * set. Returns true if all the bits are set, otherwise false
+ * is returned.
+ */
+bool test_sparsebit_is_set_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx, test_sparsebit_num_t num)
+{
+ test_sparsebit_idx_t next_cleared;
+
+ TEST_ASSERT(num > 0, "Num of 0 not supported, num: 0x%lx", num);
+
+ TEST_ASSERT((idx + (num - 1)) >= idx, "Index plus num wraps beyond "
+ "highest supported index,\n"
+ " idx: 0x%lx num: 0x%lx", idx, num);
+
+ /* With num > 0, the first bit must be set. */
+ if (!test_sparsebit_is_set(sbit, idx))
+ return false;
+
+ /* Find the next cleared bit */
+ next_cleared = test_sparsebit_next_clear(sbit, idx);
+
+
+ /* If no cleared bits beyond idx, then there are at least num
+ * set bits. Earlier TEST_ASSERT confirmed that idx + num
+ * doesn't wrap.
+ */
+ if (next_cleared == 0)
+ return true;
+
+ /* Are there enough set bits between idx and the next cleared bit? */
+ if ((next_cleared - idx) >= num)
+ return true;
+
+ return false;
+}
+
+/* Test Sparsebit Is Clear
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * idx - Bit index
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if the bit is cleared, false otherwise
+ *
+ * Determines whether the bit at the index given by idx, within the
+ * test sparsebit array is set or not. Returns true if the bit is
+ * cleared, otherwise false is returned.
+ */
+bool test_sparsebit_is_clear(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx)
+{
+ return !test_sparsebit_is_set(sbit, idx);
+}
+
+/* Test Sparsebit Is Cleared Num
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * idx - Bit index
+ * num - number of consecutive bits to check
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if num consecutive bits starting at idx are all cleared,
+ * false otherwise.
+ *
+ * Determines whether num consecutive bits starting at idx are all
+ * cleared. Returns true if all the bits are cleared, otherwise false
+ * is returned.
+ */
+bool test_sparsebit_is_clear_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t idx, test_sparsebit_num_t num)
+{
+ test_sparsebit_idx_t next_set;
+
+ TEST_ASSERT(num > 0, "Num of 0 not supported, num: 0x%lx", num);
+
+ TEST_ASSERT((idx + (num - 1)) >= idx, "Index plus num wraps beyond "
+ "highest supported index,\n"
+ " idx: 0x%lx num: 0x%lx", idx, num);
+
+ /* With num > 0, the first bit must be cleared. */
+ if (!test_sparsebit_is_clear(sbit, idx))
+ return false;
+
+ /* Find the next set bit */
+ next_set = test_sparsebit_next_set(sbit, idx);
+
+ /* If no set bits beyond idx, then there are at least num
+ * cleared bits. Earlier TEST_ASSERT confirmed that idx + num
+ * doesn't wrap.
+ */
+ if (next_set == 0)
+ return true;
+
+ /* Are there enough cleared bits between idx and the next set bit? */
+ if ((next_set - idx) >= num)
+ return true;
+
+ return false;
+}
+
+/* Test Sparsebit Num Set
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Total number of bits set. Note: a value of 0 is returned for
+ * the case of all bits set. This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value. The function, test_sparsebit_any_set(), instead of
+ * test_sparseibt_num_set() > 0, should be used to determine if the
+ * test sparsebit array has any bits set.
+ */
+test_sparsebit_num_t test_sparsebit_num_set(const test_sparsebit_t *sbit)
+{
+ return sbit->pimpl->num_set;
+}
+
+/* Test Sparsebit Any Set
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if any bit is set.
+ *
+ * Determines whether any bit is set in the test sparsebit array
+ * given by sbit. Return true if any bit is set, false otherwise.
+ */
+bool test_sparsebit_any_set(const test_sparsebit_t *sbit)
+{
+ const pvt_t *s = sbit->pimpl;
+
+ /* Nodes only describe set bits. If any nodes then there
+ * is at least 1 bit set.
+ */
+ if (s->root) {
+ /* Every node should have a non-zero mask. For now will
+ * just assure that the root node has a non-zero mask,
+ * which is a quick check that at least 1 bit is set.
+ */
+ TEST_ASSERT(s->root->mask != 0, "Root node with mask "
+ "of zero: mask: %x", s->root->mask);
+
+ TEST_ASSERT((s->num_set > 0)
+ || ((s->root->num_after == ((test_sparsebit_num_t) 0)
+ - MASK_BITS) && (s->root->mask == ~(mask_t) 0)),
+ "Total num_set == 0, without all bits set,\n"
+ " s->num_set: 0x%lx s->root->mask: %x "
+ "s->root->num_after: 0x%lx", s->num_set, s->root->mask,
+ s->root->num_after);
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Test Sparsebit All Set
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if all bits are set.
+ *
+ * Determines whether all the bits in the test sparsebit array are set.
+ */
+bool test_sparsebit_all_set(const test_sparsebit_t *sbit)
+{
+ return all_set(sbit->pimpl);
+}
+
+/* Test Sparsebit All Cleared
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if all bits are cleared.
+ *
+ * Determines whether all the bits in the test sparsebit array are cleared.
+ */
+bool test_sparsebit_all_clear(const test_sparsebit_t *sbit)
+{
+ return !test_sparsebit_any_set(sbit);
+}
+
+/* Test Sparsebit Any Clear
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if any bits are set.
+ *
+ * Determines whether all the bits in the test sparsebit array are set.
+ */
+bool test_sparsebit_any_clear(const test_sparsebit_t *sbit)
+{
+ return !test_sparsebit_all_set(sbit);
+}
+
+/* Test Sparsebit First Set
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Entry Requirement:
+ * + At least one bit within the test sparsebit array given by
+ * sbit is set.
+ *
+ * Return:
+ * Index of first set bit.
+ *
+ * Determines and returns the index of the first set bit. A TEST_ASSERT()
+ * failure occurs if no bits are set. Code of the following form is
+ * typically used to iterate over all the set bits:
+ *
+ * if (test_sparsebit_any_set(sbit)) {
+ * idx = test_sparsebit_first_set(sbit);
+ * do {
+ * ...
+ * idx = test_sparsebit_next_set(sbit, idx);
+ * } while (idx != 0);
+ * }
+ */
+test_sparsebit_idx_t test_sparsebit_first_set(const test_sparsebit_t *sbit)
+{
+ unsigned int n1;
+ const pvt_t *s = sbit->pimpl;
+ const node_t *nodep;
+
+ /* Validate at least 1 bit is set */
+ TEST_ASSERT(test_sparsebit_any_set(sbit), "No bits set");
+
+ /* Find the left-most node. */
+ nodep = node_first_const(s);
+ TEST_ASSERT(nodep != NULL, "Unexpected, no nodes");
+
+ /* Return index of first bit set in mask.
+ * Note: Each node is required to have a non-zero mask. In the case
+ * where the mask is ~0, it is not allowed to set the mask to 0,
+ * reduce .idx by MASK_BITS and increase .num_after by MASK_BITS.
+ */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ break;
+ }
+ TEST_ASSERT(n1 < MASK_BITS, "No bits set in mask, "
+ "nodep->idx: %lx nodep->mask: %x", nodep->idx, nodep->mask);
+
+ return nodep->idx + n1;
+}
+
+/* Test Sparsebit First Clear
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Entry Requirement:
+ * + At least one bit within the test sparsebit array given by
+ * sbit is cleared.
+ *
+ * Return:
+ * Index of first cleared bit.
+ *
+ * Determines and returns the index of the first cleared bit. A TEST_ASSERT()
+ * failure occurs if no bits are cleared. Code of the following form is
+ * typically used to iterate over all the cleared bits:
+ *
+ * if (test_sparsebit_any_clear(sbit)) {
+ * idx = test_sparsebit_first_clear(sbit);
+ * do {
+ * ...
+ * idx = test_sparsebit_next_clear(sbit, idx);
+ * } while (idx != 0);
+ * }
+ */
+test_sparsebit_idx_t test_sparsebit_first_clear(const test_sparsebit_t *sbit)
+{
+ const pvt_t *s = sbit->pimpl;
+ const node_t *nodep1, *nodep2;
+
+ /* Validate at least 1 bit is cleared. */
+ TEST_ASSERT(test_sparsebit_any_clear(sbit), "No bits cleared");
+
+ /* Find the left-most node. */
+ nodep1 = node_first_const(s);
+
+ /* If no nodes or first node index > 0 then lowest cleared is 0 */
+ if ((nodep1 == NULL) || (nodep1->idx > 0))
+ return 0;
+
+ /* Does the mask in the first node contain any cleared bits. */
+ for (unsigned int n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep1->mask & (1 << n1)))
+ return nodep1->idx + n1;
+ }
+
+ /* All mask bits set in first node. If there isn't a second node
+ * then the first cleared bit is the first bit after the bits
+ * described by the first node.
+ */
+ nodep2 = node_next_const(s, nodep1);
+ if (nodep2 == NULL) {
+ /* No second node. First cleared bit is first bit beyond
+ * bits described by first node.
+ */
+ TEST_ASSERT(nodep1->mask == ~((mask_t) 0), "Node 1 "
+ "expected to have a mask with all bits set,\n"
+ " nodep1: %p nodep1->mask: %x",
+ nodep1, nodep1->mask);
+ TEST_ASSERT((nodep1->idx + MASK_BITS + nodep1->num_after - 1)
+ < ~((test_sparsebit_idx_t) 0), "Node 1 describes "
+ "all bits set, but earlier check\n"
+ "indicated there is at least one cleared bit.\n"
+ " nodep1: %p nodep1->idx: 0x%lx nodep1->mask: %x "
+ "nodep1->num_after: 0x%lx",
+ nodep1, nodep1->idx, nodep1->mask, nodep1->num_after);
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /* There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes.
+ */
+ if ((nodep1->idx + MASK_BITS + nodep1->num_after) != nodep2->idx) {
+ /* Gap exists between the first and second nodes.
+ * Return index of first bit within the gap.
+ */
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /* Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ TEST_ASSERT(nodep2->mask != ~((mask_t) 0), "Unexpected all bits "
+ "set in second node,\n"
+ " nodep2: %p nodep2->idx: 0x%lx nodep2->mask: %x",
+ nodep2, nodep2->idx, nodep2->mask);
+ for (unsigned int n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep2->mask & (1 << n1)))
+ return nodep2->idx + n1;
+ }
+
+ /* Not Reached */
+ TEST_ASSERT(false, "No cleared bit found in second node,\n"
+ " nodep2: %p nodep2->idx: 0x%lx nodep2->mask: %x",
+ nodep2, nodep2->idx, nodep2->mask);
+ return -1;
+}
+
+/* Test Sparsebit Next Set
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * idx - Bit index of previous bit
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Index of next bit after prev that is set.
+ * Zero if no bit after prev is set.
+ *
+ * Returns index of next bit set within sbit after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+test_sparsebit_idx_t test_sparsebit_next_set(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t prev)
+{
+ test_sparsebit_idx_t lowest_possible = prev + 1;
+ const pvt_t *s = sbit->pimpl;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /* Find the leftmost 'candidate' overlapping or to the right
+ * of lowest_possible.
+ */
+ const node_t *candidate = NULL;
+ bool contains = false; /* true iff lowest_possible is
+ * within candidate
+ */
+
+ /* Find node that describes setting of bit at lowest_possible.
+ * If such a node doesn't exist, find the node with the lowest
+ * starting index that is > lowest_possible.
+ */
+ for (const node_t *nodep = s->root; nodep;) {
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+ >= lowest_possible) {
+ candidate = nodep;
+ if (candidate->idx <= lowest_possible) {
+ contains = true;
+ break;
+ }
+ nodep = nodep->left;
+ } else {
+ nodep = nodep->right;
+ }
+ }
+ if (candidate == NULL)
+ return 0;
+
+ /* Does the candidate node describe the setting of lowest_possible? */
+ if (!contains) {
+ /* Candidate doesn't describe setting of bit at lowest_possible.
+ * Candidate points to the first node with a starting index
+ * > lowest_possible.
+ */
+ TEST_ASSERT(candidate->idx > lowest_possible, "Candidate "
+ "not containing lowest_possible has starting index\n"
+ "before lowest_possible,\n"
+ " lowest_possible: 0x%lx\n"
+ " candidate->idx: 0x%lx\n"
+ " contains: %u",
+ lowest_possible, candidate->idx, contains);
+ TEST_ASSERT(candidate->mask != 0, "Zero mask");
+
+ /* Locate and return the index of the index that describes
+ * the first non-zero mask bit.
+ */
+ for (unsigned int n1 = 0; n1 < MASK_BITS; n1++) {
+ if (candidate->mask & (1 << n1))
+ return candidate->idx + n1;
+ }
+
+ /* Not Reached */
+ TEST_ASSERT(false, "Not Reached");
+ }
+
+ /* Candidate describes setting of bit at lowest_possible.
+ * Note: although the node describes the setting of the bit
+ * at lowest_possible, its possible that its setting and the
+ * setting of all latter bits described by this node are 0.
+ * For now, just handle the cases where this node describes
+ * a bit at or after an index of lowest_possible that is set.
+ */
+ TEST_ASSERT(candidate->mask != 0, "Zero mask");
+ test_sparsebit_idx_t start = lowest_possible - candidate->idx;
+ for (test_sparsebit_idx_t n1 = start; n1 < MASK_BITS; n1++) {
+ if (candidate->mask & (1 << n1))
+ return candidate->idx + n1;
+ }
+ if (candidate->num_after) {
+ test_sparsebit_idx_t first_num_after_idx
+ = candidate->idx + MASK_BITS;
+ return lowest_possible < first_num_after_idx
+ ? first_num_after_idx : lowest_possible;
+ }
+
+ /* Although candidate node describes setting of bit at
+ * the index of lowest_possible, all bits at that index and
+ * latter that are described by candidate are cleared. With
+ * this, the next bit is the first bit in the next node, if
+ * such a node exists. If a next node doesn't exist, then
+ * there is no next set bit.
+ */
+ const node_t *candidate_next = node_next_const(s, candidate);
+ if (!candidate_next)
+ return 0;
+
+ TEST_ASSERT(candidate_next->mask != 0, "Unexpected zero mask");
+ for (unsigned int n1 = 0; n1 < MASK_BITS; n1++) {
+ if (candidate_next->mask & (1 << n1))
+ return candidate_next->idx + n1;
+ }
+
+ /* Not Reached */
+ TEST_ASSERT(false, "Not Reached");
+
+ return 0;
+}
+
+/* Test Sparsebit Next Cleared
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * idx - Bit index of previous bit
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Index of next bit after prev that is set.
+ * Zero if no bit after prev is cleared.
+ *
+ * Returns index of next bit cleared within sbit after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+test_sparsebit_idx_t test_sparsebit_next_clear(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t prev)
+{
+ const node_t *nodep1, *nodep2;
+ unsigned int n1;
+ const pvt_t *s = sbit->pimpl;
+
+ /* A bit after the highest index can't be set. */
+ if (prev == ~(test_sparsebit_idx_t) 0)
+ return 0;
+
+ /* Does a node describing the setting of prev + 1 exist? */
+ nodep1 = node_find_const(s, prev + 1);
+ if (nodep1 == NULL) {
+ /* No node that describes the setting of prev + 1,
+ * so the bit at prev + 1 is cleared.
+ */
+ return prev + 1;
+ }
+
+ /* Does a mask bit in node 1 describe the next cleared bit. */
+ for (test_sparsebit_idx_t idx = ((prev + 1) - nodep1->idx);
+ idx < MASK_BITS; idx++) {
+ if (!(nodep1->mask & (1 << idx)))
+ return nodep1->idx + idx;
+ }
+
+ /* Next cleared bit is not described by node 1. If there
+ * isn't a next node, then next cleared bit is described
+ * by bit after the bits described by the first node.
+ */
+ nodep2 = node_next_const(s, nodep1);
+ if (nodep2 == NULL) {
+ /* No second node. First cleared bit is first bit beyond
+ * bits described by first node.
+ */
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /* There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes.
+ */
+ if ((nodep1->idx + MASK_BITS + nodep1->num_after) != nodep2->idx) {
+ /* Gap exists between the first and second nodes.
+ * Return index of first bit within the gap.
+ */
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /* Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ TEST_ASSERT(nodep2->mask != ~((mask_t) 0), "Unexpected all bits "
+ "set in second node,\n"
+ " nodep2: %p nodep2->idx: 0x%lx nodep2->mask: %x",
+ nodep2, nodep2->idx, nodep2->mask);
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep2->mask & (1 << n1)))
+ return nodep2->idx + n1;
+ }
+
+ /* Not Reached */
+ TEST_ASSERT(false, "No cleared bit found in second node,\n"
+ " nodep2: %p nodep2->idx: 0x%lx nodep2->mask: %x",
+ nodep2, nodep2->idx, nodep2->mask);
+
+ return 0;
+}
+
+/* Test Sparsebit Next Set Num
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * start - Bit index of previous bit
+ * num - number of consecutively set bits
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Index of first sequence of num consequitvely set bits, with an
+ * index > start. Value of 0 returned if no such sequence exsists.
+ *
+ * Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+test_sparsebit_idx_t test_sparsebit_next_set_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t start, test_sparsebit_num_t num)
+{
+ test_sparsebit_idx_t idx;
+
+ TEST_ASSERT(num >= 1, "num too small, num: 0x%lx", num);
+
+ for (idx = test_sparsebit_next_set(sbit, start);
+ (idx != 0) && ((idx + (num - 1)) >= idx);
+ idx = test_sparsebit_next_set(sbit, idx)) {
+ TEST_ASSERT(test_sparsebit_is_set(sbit, idx),
+ "Unexpected, bit not set, idx: %lx", idx);
+
+ /* Does the sequence of bits starting at idx consist of
+ * num set bits?
+ */
+ if (test_sparsebit_is_set_num(sbit, idx, num))
+ return idx;
+
+ /* Sequence of set bits at idx isn't large enough.
+ * Skip this entire sequence of set bits.
+ */
+ idx = test_sparsebit_next_clear(sbit, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Test Sparsebit Next Clear Num
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * start - Bit index of previous bit
+ * num - number of consecutively cleared bits
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Index of first sequence of num consequitvely cleared bits, with an
+ * index > start. Value of 0 returned if no such sequence exsists.
+ *
+ * Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+test_sparsebit_idx_t test_sparsebit_next_clear_num(const test_sparsebit_t *sbit,
+ test_sparsebit_idx_t start, test_sparsebit_num_t num)
+{
+ test_sparsebit_idx_t idx;
+
+ TEST_ASSERT(num >= 1, "num too small, num: 0x%lx", num);
+
+ for (idx = test_sparsebit_next_clear(sbit, start);
+ (idx != 0) && ((idx + (num - 1)) >= idx);
+ idx = test_sparsebit_next_clear(sbit, idx)) {
+ TEST_ASSERT(test_sparsebit_is_clear(sbit, idx),
+ "Unexpected, bit not cleared, idx: %lx", idx);
+
+ /* Does the sequence of bits starting at idx consist of
+ * num cleared bits?
+ */
+ if (test_sparsebit_is_clear_num(sbit, idx, num))
+ return idx;
+
+ /* Sequence of cleared bits at idx isn't large enough.
+ * Skip this entire sequence of cleared bits.
+ */
+ idx = test_sparsebit_next_set(sbit, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Test Sparsebit Set Bit
+ *
+ * Input Args:
+ * idx - bit index
+ *
+ * Input/Output Args:
+ * sbitp - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the test sparsebit array given by sbit, sets the bit at the
+ * index given by idx.
+ */
+void test_sparsebit_set(test_sparsebit_t *sbitp, test_sparsebit_idx_t idx)
+{
+ test_sparsebit_set_num(sbitp, idx, 1);
+}
+
+/* Test Sparsebit Clear Bit
+ *
+ * Input Args:
+ * idx - bit index
+ *
+ * Input/Output Args:
+ * sbitp - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the test sparsebit array given by sbit, clears the bit at the
+ * index given by idx.
+ */
+void test_sparsebit_clear(test_sparsebit_t *sbitp, test_sparsebit_idx_t idx)
+{
+ test_sparsebit_clear_num(sbitp, idx, 1);
+}
+
+/* Test Sparsebit Set Num
+ *
+ * Input Args:
+ * idx - bit index
+ * num - number of bits to set
+ *
+ * Input/Output Args:
+ * sbitp - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the test sparsebit array given by sbit, inclusively sets the bits
+ * at the index of idx through idx + num - 1.
+ */
+void test_sparsebit_set_num(test_sparsebit_t *sbitp,
+ test_sparsebit_idx_t start, test_sparsebit_num_t num)
+{
+ pvt_t *s = sbitp->pimpl;
+ node_t *nodep, *next;
+ unsigned int n1;
+
+ TEST_ASSERT(num > 0, "Num of 0 not supported, num: 0x%lx", num);
+
+ TEST_ASSERT((start + (num - 1)) >= start, "Index plus num wraps beyond "
+ "highest supported index,\n"
+ " start: 0x%lx num: 0x%lx", start, num);
+
+ /* Copy of input arguments, which during processing get modified,
+ * instead of modifying the actual input parameters.
+ */
+ test_sparsebit_idx_t idx = start;
+ test_sparsebit_num_t n = num;
+
+ /* Leading - bits before first mask boundary */
+ /* TODO(lhuemill): With some effort it may be possible to
+ * replace the following loop with a sequential sequence
+ * of statements. High level sequence would be:
+ *
+ * 1. Use node_split() to force node that describes setting
+ * of idx to be within the mask portion of a node.
+ * 2. Form mask of bits to be set.
+ * 3. Determine number of mask bits already set in the node
+ * and store in a local variable named num_already_set.
+ * 4. Set the appropriate mask bits within the node.
+ * 5. Increment struct test_sparsebit_pvt num_set member
+ * by the number of bits that were actually set.
+ * Exclude from the counts bits that were already set.
+ * 6. Before returning to the caller, use node_reduce() to
+ * handle the multiple corner cases that this method
+ * introduces.
+ */
+ for (; (n > 0) && ((idx % MASK_BITS) != 0); idx++, n--)
+ bit_set(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ test_sparsebit_idx_t middle_start, middle_end;
+ middle_start = idx;
+ middle_end = middle_start + n - (n % MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+ TEST_ASSERT(nodep, "No node at split point, after calling "
+ "node_split(), "
+ "nodep: %p middle_start: 0x%lx", nodep, middle_start);
+
+ /* As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if ((middle_end + 1) > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ TEST_ASSERT((next->idx + MASK_BITS + next->num_after
+ - 1) <= middle_end, "Node not part of "
+ "middle,\n"
+ " middle start: 0x%lx end: 0x%lx\n"
+ " next->idx: 0x%lx\n"
+ " MASK_BITS: %lu\n"
+ " next->num_after: 0x%lx",
+ middle_start, middle_end, next->idx,
+ MASK_BITS, next->num_after);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed set each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep->mask & (1 << n1))) {
+ nodep->mask |= (1 << n1);
+ s->num_set++;
+ }
+ }
+
+ s->num_set -= nodep->num_after;
+ nodep->num_after = 0;
+ s->num_set += (middle_end - middle_start) + 1 - MASK_BITS;
+ nodep->num_after = (middle_end - middle_start) + 1 - MASK_BITS;
+
+ node_reduce(s, nodep);
+ }
+ idx = middle_end + 1;
+ n -= (middle_end - middle_start) + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ TEST_ASSERT(n < MASK_BITS, "More than mask worth of trailing bits, "
+ "idx: 0x%lx n: %lu", idx, n);
+ for (; n > 0; idx++, n--)
+ bit_set(s, idx);
+}
+
+/* Test Sparsebit Clear Num
+ *
+ * Input Args:
+ * idx - bit index
+ * num - number of bits to set
+ *
+ * Input/Output Args:
+ * sbitp - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the test sparsebit array given by sbit, inclusively clears the bits
+ * at the index of idx through idx + num - 1.
+ */
+void test_sparsebit_clear_num(test_sparsebit_t *sbitp,
+ test_sparsebit_idx_t start, test_sparsebit_num_t num)
+{
+ TEST_ASSERT(num > 0, "Num of 0 not supported, num: 0x%lx", num);
+
+ TEST_ASSERT((start + (num - 1)) >= start, "Index plus num wraps beyond "
+ "highest supported index,\n"
+ " start: 0x%lx num: 0x%lx", start, num);
+
+ /* Copy of input arguments, which during processing get modified,
+ * instead of modifying the actual input parameters.
+ */
+ test_sparsebit_idx_t idx = start;
+ test_sparsebit_num_t n = num;
+
+ pvt_t *s = sbitp->pimpl;
+ node_t *nodep;
+ unsigned int n1;
+
+ /* Leading - bits before first mask boundary */
+ for (; (n > 0) && ((idx % MASK_BITS) != 0); idx++, n--)
+ bit_clear(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ test_sparsebit_idx_t middle_start, middle_end;
+ middle_start = idx;
+ middle_end = middle_start + n - (n % MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+ TEST_ASSERT(nodep, "No node at split point, after calling "
+ "node_split(), "
+ "nodep: %p middle_start: 0x%lx", nodep, middle_start);
+
+ /* As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if ((middle_end + 1) > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (node_t *next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ TEST_ASSERT((next->idx + MASK_BITS
+ + next->num_after - 1) <= middle_end,
+ "Unexpected node crossing middle end "
+ "boundary,\n"
+ " middle_end: 0x%lx\n"
+ " next->idx: 0x%lx\n"
+ " MASK_BITS: %lu\n"
+ " next->num_after: 0x%lx",
+ middle_end, next->idx, MASK_BITS,
+ next->num_after);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed clear each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ nodep->mask &= ~(1 << n1);
+ s->num_set--;
+ }
+ }
+
+ /* Clear any bits described by num_after */
+ s->num_set -= nodep->num_after;
+ nodep->num_after = 0;
+
+ /* Delete the node that describes the beginning of
+ * the middle bits and perform any allowed reductions
+ * with the nodes prev or next of nodep.
+ */
+ node_reduce(s, nodep);
+ nodep = NULL;
+ }
+ idx = middle_end + 1;
+ n -= (middle_end - middle_start) + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ TEST_ASSERT(n < MASK_BITS, "More than mask worth of trailing bits, "
+ "idx: 0x%lx n: %lu", idx, n);
+ for (; n > 0; idx++, n--)
+ bit_clear(s, idx);
+}
+
+/* Test Sparsebit Set All
+ *
+ * Input Args: None
+ *
+ * Input/Output Args:
+ * sbitp - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets all the bits within the test sparsebit array specified
+ * by sbitp.
+ */
+void test_sparsebit_set_all(test_sparsebit_t *sbitp)
+{
+ test_sparsebit_set(sbitp, 0);
+ test_sparsebit_set_num(sbitp, 1, ~(test_sparsebit_idx_t) 0);
+}
+
+/* Test Sparsebit Clear All
+ *
+ * Input Args: None
+ *
+ * Input/Output Args:
+ * sbitp - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Clear all the bits within the test sparsebit array specified
+ * by sbitp.
+ */
+void test_sparsebit_clear_all(test_sparsebit_t *sbitp)
+{
+ test_sparsebit_clear(sbitp, 0);
+ test_sparsebit_clear_num(sbitp, 1, ~(test_sparsebit_idx_t) 0);
+}
+
+/* Test Sparsebit Dump
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * indent - number of spaces at start of each output line
+ *
+ * Output Args:
+ * stream - output stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the bit settings
+ * of sbit. Each line of output is prefixed with the number of
+ * spaces given by indent. The length of each line is implementation
+ * dependent and does not depend on the indent amount. The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ * 5, 8, 10, 11, 12, 13, 14, 18
+ *
+ * set:
+ *
+ * 0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits. This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void test_sparsebit_dump(FILE *stream, const test_sparsebit_t *sbit,
+ unsigned int indent)
+{
+ const pvt_t *s = sbit->pimpl;
+ size_t current_line_len = 0;
+ size_t sz;
+
+ if (!test_sparsebit_any_set(sbit))
+ return;
+
+ /* Display initial indent */
+ fprintf(stream, "%*s", indent, "");
+
+ /* For each node */
+ for (const node_t *nodep = node_first_const(s);
+ nodep; nodep = node_next_const(s, nodep)) {
+ unsigned int n1;
+ test_sparsebit_idx_t low, high;
+
+ /* For each group of bits in the mask */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ low = high = nodep->idx + n1;
+
+ for (; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ high = nodep->idx + n1;
+ else
+ break;
+ }
+
+ if ((n1 == MASK_BITS) && nodep->num_after)
+ high += nodep->num_after;
+
+ /* How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /* If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if ((current_line_len + sz) > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+
+ /* If num_after and most significant-bit of mask is not
+ * set, then still need to display a range for the bits
+ * described by num_after.
+ */
+ if (!(nodep->mask & (1 << (MASK_BITS - 1)))
+ && nodep->num_after) {
+ low = nodep->idx + MASK_BITS;
+ high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+ /* How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /* If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if ((current_line_len + sz) > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+ fputs("\n", stream);
+}
+
+/* Test Sparsebit Dump Internal
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ * indent - number of spaces at start of each output line
+ *
+ * Output Args:
+ * stream - output stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of sbit. Each line of output is prefixed with the number
+ * of spaces given by indent. The output is completely implementation
+ * dependent and subject to change. Output from this function should only
+ * be used for diagnostic purposes. For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+void test_sparsebit_dump_internal(FILE *stream, const test_sparsebit_t *sbit,
+ unsigned int indent)
+{
+ const pvt_t *s = sbit->pimpl;
+
+ /* Dump the contents of sbit */
+ fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+ fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+ if (s->root)
+ dump_nodes(stream, s->root, indent);
+}
+
+/* Test Sparsebit Validate Internal
+ *
+ * Input Args:
+ * sbit - test sparsebit array
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Validates the internal state of the test sparsebit array given by
+ * sbit. On error, diagnostic information is printed to stderr and
+ * TEST_ASSERT failure is produced, which terminates the calling program.
+ * The checks performed are implementation dependent.
+ */
+void test_sparsebit_validate_internal(const test_sparsebit_t *sbit)
+{
+ bool error_detected = false;
+ const node_t *nodep, *prev = NULL;
+ test_sparsebit_num_t total_bits_set = 0;
+ const pvt_t *s = sbit->pimpl;
+
+ /* For each node */
+ for (nodep = node_first_const(s); nodep;
+ prev = nodep, nodep = node_next_const(s, nodep)) {
+
+ /* Increase total bits set by the number of bits set
+ * in this node.
+ */
+ for (unsigned int n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ total_bits_set++;
+ }
+
+ total_bits_set += nodep->num_after;
+
+ /* Arbitrary choice as to whether a mask of 0 is allowed
+ * or not. For diagnostic purposes it is beneficial to
+ * have only one valid means to represent a set of bits.
+ * To support this an arbitrary choice has been made
+ * to not allow a mask of zero.
+ */
+ if (nodep->mask == 0) {
+ fprintf(stderr, "Node mask of zero, "
+ "nodep: %p nodep->mask: 0x%x",
+ nodep, nodep->mask);
+ error_detected = true;
+ break;
+ }
+
+ /* Validate num_after is not greater than the max index
+ * - the number of mask bits. The num_after member
+ * uses 0-based indexing and thus has no value that
+ * represents all bits set. This limitation is handled
+ * by requiring a non-zero mask. With a non-zero mask,
+ * MASK_BITS worth of bits are described by the mask,
+ * which makes the largest needed num_after equal to:
+ *
+ * (~(test_sparsebit_num_t) 0) - MASK_BITS + 1
+ */
+ if (nodep->num_after
+ > (~(test_sparsebit_num_t) 0) - MASK_BITS + 1) {
+ fprintf(stderr, "num_after too large, "
+ "nodep: %p nodep->num_after: 0x%lx",
+ nodep, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Validate node index is divisible by the mask size */
+ if (nodep->idx % MASK_BITS) {
+ fprintf(stderr, "Node index not divisable by "
+ "mask size,\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "MASK_BITS: %lu\n",
+ nodep, nodep->idx, MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /* Validate bits described by node don't wrap beyond the
+ * highest supported index.
+ */
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+ < nodep->idx) {
+ fprintf(stderr, "Bits described by node wrap "
+ "beyond highest supported index,\n"
+ " nodep: %p nodep->idx: 0x%lx\n"
+ " MASK_BITS: %lu nodep->num_after: 0x%lx",
+ nodep, nodep->idx, MASK_BITS, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Check parent pointers. */
+ if (nodep->left) {
+ if (nodep->left->parent != nodep) {
+ fprintf(stderr, "Left child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->left: %p "
+ "nodep->left->parent: %p",
+ nodep, nodep->left,
+ nodep->left->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (nodep->right) {
+ if (nodep->right->parent != nodep) {
+ fprintf(stderr, "Right child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->right: %p "
+ "nodep->right->parent: %p",
+ nodep, nodep->right,
+ nodep->right->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (nodep->parent == NULL) {
+ if (s->root != nodep) {
+ fprintf(stderr, "Unexpected root node, "
+ "s->root: %p nodep: %p",
+ s->root, nodep);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (prev != NULL) {
+ /* Is index of previous node before index of
+ * current node?
+ */
+ if (prev->idx >= nodep->idx) {
+ fprintf(stderr, "Previous node index "
+ ">= current node index,\n"
+ " prev: %p prev->idx: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx",
+ prev, prev->idx, nodep, nodep->idx);
+ error_detected = true;
+ break;
+ }
+
+ /* Nodes occur in asscending order, based on each
+ * nodes starting index.
+ */
+ if ((prev->idx + MASK_BITS + prev->num_after - 1)
+ >= nodep->idx) {
+ fprintf(stderr, "Previous node bit range "
+ "overlap with current node bit range,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /* When the node has all mask bits set, it shouldn't
+ * be adjacent to the last bit described by the
+ * previous node.
+ */
+ if (((nodep->mask) == ~((mask_t) 0))
+ && ((prev->idx + MASK_BITS + prev->num_after)
+ == nodep->idx)) {
+ fprintf(stderr, "Current node has mask with "
+ "all bits set and is adjacent to the "
+ "previous node,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+
+ error_detected = true;
+ break;
+ }
+ }
+ }
+
+ if (!error_detected) {
+ /* Is sum of bits set in each node equal to the count
+ * of total bits set.
+ */
+ if (s->num_set != total_bits_set) {
+ fprintf(stderr, "Number of bits set missmatch,\n"
+ " s->num_set: 0x%lx total_bits_set: 0x%lx",
+ s->num_set, total_bits_set);
+
+ error_detected = true;
+ }
+ }
+
+ if (error_detected) {
+ fputs(" dump_internal:\n", stderr);
+ test_sparsebit_dump_internal(stderr, sbit, 4);
+ TEST_ASSERT(false, "Validate internal detected an error.");
+ assert(false);
+ }
+}
+
+/* ======= Start of Implementation Dependent Local Functions ============ */
+
+/* Node Num Set
+ *
+ * Input Args:
+ * nodep - pointer to node to count set bits within
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Number of bits set.
+ *
+ * Determines and returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static test_sparsebit_num_t node_num_set(const node_t *nodep)
+{
+ unsigned int n1;
+ test_sparsebit_num_t total = 0;
+
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ total++;
+ }
+ total += nodep->num_after;
+
+ return total;
+}
+
+/* Node Copy Subtree
+ *
+ * Input Args:
+ * subtree - pointer to root of sub-tree of nodes
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to newly allocated copy of subtree.
+ *
+ * Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * In the case of insufficient memory a TEST_ASSERT failure is produced.
+ */
+static node_t *node_copy_subtree(const node_t *subtree)
+{
+ node_t *root;
+
+ /* Duplicate the node at the root of the subtree */
+ root = calloc(1, sizeof(*root));
+ TEST_ASSERT(root != NULL, "Insufficient Memory");
+ root->idx = subtree->idx;
+ root->mask = subtree->mask;
+ root->num_after = subtree->num_after;
+
+ /* As needed, recursively duplicate the left and right subtrees */
+ if (subtree->left) {
+ root->left = node_copy_subtree(subtree->left);
+ root->left->parent = root;
+ }
+
+ if (subtree->right) {
+ root->right = node_copy_subtree(subtree->right);
+ root->right->parent = root;
+ }
+
+ return root;
+}
+
+/* Node Find Const
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ * idx - bit index
+ *
+ * Output Args: None
+ *
+ * Return: Pointer to node that describes the setting of the bit at idx.
+ * NULL if there is no such node.
+ *
+ * Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx. A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask.
+ */
+static const node_t *node_find_const(const pvt_t *s, test_sparsebit_idx_t idx)
+{
+ node_t *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = (nodep->idx > idx) ? nodep->left : nodep->right) {
+ if ((idx >= nodep->idx) && (idx <= (nodep->idx + MASK_BITS
+ + nodep->num_after - 1)))
+ break;
+ }
+
+ return nodep;
+}
+
+/* Node Find
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ * idx - bit index
+ *
+ * Output Args: None
+ *
+ * Return: Pointer to node that describes the setting of the bit at idx.
+ * NULL if there is no such node.
+ *
+ * A non-const wrapper of node_find_const(). This wrapper works the same
+ * as node_find_const() but takes a non-const pointer to the test
+ * sparsebit implementation private area and returns a non-const pointer
+ * to the node, if it is found.
+ */
+static node_t *node_find(pvt_t *s, test_sparsebit_idx_t idx)
+{
+ return (node_t *) node_find_const(s, idx);
+}
+
+/* Node Add
+ *
+ * Input Args:
+ * idx - bit index
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Return: pointer to newly added node
+ *
+ * Entry Requirements:
+ * + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx. Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerative cases causes this implementation of
+ * a binary search tree to turn into a doubly-linked list.
+ * Change implementation to a red-black tree, which is a
+ * form of a partially balanced binary tree. Worst case
+ * the lowest leaf node of a red-black tree will be at
+ * most 2 times the distance of the highest leaf node.
+ */
+static node_t *node_add(pvt_t *s, test_sparsebit_idx_t idx)
+{
+ node_t *nodep, *parentp, *prev;
+
+ TEST_ASSERT(node_find_const(s, idx) == NULL, "There is already a node "
+ " that describes the setting of this bit, idx: 0x%lx", idx);
+
+ /* Allocate and initialize the new node. */
+ nodep = calloc(1, sizeof(*nodep));
+ TEST_ASSERT(nodep != NULL, "Insufficient Memory");
+ nodep->idx = idx - (idx % MASK_BITS);
+
+ /* If no nodes, set it up as the root node. */
+ if (s->root == NULL) {
+ s->root = nodep;
+ return nodep;
+ }
+
+ /* Find the parent where the new node should be attached
+ * and add the node there.
+ */
+ TEST_ASSERT(s->root != NULL, "Unexpected missing root node, "
+ "s->root: %p", s->root);
+ parentp = s->root;
+ while (true) {
+ if (idx < parentp->idx) {
+ if (!parentp->left) {
+ parentp->left = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->left;
+ } else {
+ TEST_ASSERT(idx > (parentp->idx + MASK_BITS
+ + parentp->num_after - 1),
+ "Unexpected node that describes setting "
+ "of idx,\n"
+ " idx: 0x%lx\n"
+ " parentp->idx: 0x%lx\n"
+ " MASK_BITS: %lu\n"
+ " parentp->num_after: %lu",
+ idx, parentp->idx, MASK_BITS,
+ parentp->num_after);
+ if (!parentp->right) {
+ parentp->right = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->right;
+ }
+ }
+
+ /* Does num_after bits of previous node overlap with the mask
+ * of the new node. If so set the bits in the new nodes mask
+ * and reduce the previous nodes num_after.
+ */
+ prev = node_prev(s, nodep);
+ while (prev && ((prev->idx + MASK_BITS + prev->num_after - 1)
+ >= nodep->idx)) {
+ TEST_ASSERT(prev->num_after > 0, "Expected previous node "
+ "to have bits described by num_after,\n"
+ " prev: %p prev->idx: 0x%lx prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx",
+ prev, prev->idx, prev->num_after, nodep, nodep->idx);
+ unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+ - nodep->idx;
+ TEST_ASSERT(n1 < MASK_BITS, "Expected last bit "
+ "described by prev->num_after to be within "
+ "new nodes mask,\n"
+ " n1: %u prev->idx: 0x%lx MASK_BITS: %lu "
+ "prev->num_after: 0x%lx nodep->idx: 0x%lx",
+ n1, prev->idx, MASK_BITS, prev->num_after,
+ nodep->idx);
+ TEST_ASSERT(!(nodep->mask & (1 << n1)), "Unexpected "
+ "mask bit already set,\n"
+ " nodep->idx: 0x%lx nodep->mask: 0x%x n1: %u\n"
+ " prev->idx: 0x%lx MASK_BITS: %lu "
+ " prev->num_after: 0x%lx",
+ nodep->idx, nodep->mask, n1,
+ prev->idx, MASK_BITS, prev->num_after);
+ nodep->mask |= (1 << n1);
+ prev->num_after--;
+ }
+
+ return nodep;
+}
+
+/* Node Remove
+ *
+ * Input Args:
+ * nodep - pointer to test sparsebit array node to be removed
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Return: None
+ *
+ * Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(pvt_t *s, node_t *nodep)
+{
+ node_t *tmp;
+
+ TEST_ASSERT(nodep, "NULL node pointer, nodep: %p", nodep);
+
+ TEST_ASSERT((s->num_set >= node_num_set(nodep)) || all_set(s),
+ "Count of total bits set is less than bits being removed,\n"
+ " s->num_set: 0x%lx node_num_set(nodep): 0x%lx "
+ "nodep->mask: %x nodep->num_after: 0x%lx",
+ s->num_set, node_num_set(nodep), nodep->mask, nodep->num_after);
+ s->num_set -= node_num_set(nodep);
+
+ /* Have both left and right child */
+ if (nodep->left && nodep->right) {
+ /* Move left children to the leftmost leaf node
+ * of the right child.
+ */
+ for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+ ;
+ tmp->left = nodep->left;
+ nodep->left = NULL;
+ tmp->left->parent = tmp;
+ }
+
+ /* Left only child */
+ if (nodep->left) {
+ TEST_ASSERT(nodep->right == NULL, "Has right child,\n"
+ " nodep: %p nodep->left: %p nodep->right: %p",
+ nodep, nodep->left, nodep->right);
+ if (nodep->parent == NULL) {
+ s->root = nodep->left;
+ nodep->left->parent = NULL;
+ } else {
+ nodep->left->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->left;
+ else {
+ TEST_ASSERT(nodep == nodep->parent->right,
+ "Expected right child");
+ nodep->parent->right = nodep->left;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+
+ /* Right only child */
+ if (nodep->right) {
+ TEST_ASSERT(nodep->left == NULL, "Has left child,\n"
+ " nodep: %p nodep->left: %p nodep->right: %p",
+ nodep, nodep->left, nodep->right);
+
+ if (nodep->parent == NULL) {
+ s->root = nodep->right;
+ nodep->right->parent = NULL;
+ } else {
+ nodep->right->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->right;
+ else {
+ TEST_ASSERT(nodep == nodep->parent->right,
+ "Expected right child");
+ nodep->parent->right = nodep->right;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+ /* Leaf Node */
+ TEST_ASSERT((nodep->left == NULL) && (nodep->right == NULL),
+ "Not a leaf node, nodep: %p nodep->left: %p nodep->right: %p",
+ nodep, nodep->left, nodep->right);
+ if (nodep->parent == NULL) {
+ s->root = NULL;
+ } else {
+ if (nodep->parent->left == nodep)
+ nodep->parent->left = NULL;
+ else {
+ TEST_ASSERT(nodep == nodep->parent->right,
+ "Expected right child");
+ nodep->parent->right = NULL;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+}
+
+/* Node Split
+ *
+ * Input Args:
+ * idx - bit index
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Return:
+ * Pointer to new/previously_existing node where the nodes starting
+ * index is equal to idx.
+ *
+ * Entry Requirements:
+ * + idx at start of a mask boundary
+ *
+ * Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index. If no such node exists, a new
+ * node at the specified index is created.
+ */
+static node_t *node_split(pvt_t *s, test_sparsebit_idx_t idx)
+{
+ node_t *nodep1, *nodep2;
+ test_sparsebit_idx_t offset;
+ test_sparsebit_num_t orig_num_after;
+
+ TEST_ASSERT(!(idx % MASK_BITS), "Split index not on a mask boundary, "
+ "idx: 0x%lx", idx);
+
+ /* Is there a node that describes the setting of idx?
+ * If not, add it.
+ */
+ nodep1 = node_find(s, idx);
+ if (nodep1 == NULL) {
+ nodep1 = node_add(s, idx);
+ TEST_ASSERT(nodep1 != NULL, "NULL return from node_add()");
+ TEST_ASSERT(nodep1->idx == idx, "Unexpected starting index,\n"
+ " nodep1->idx: 0x%lx\n"
+ " idx: 0x%lx", nodep1->idx, idx);
+ return nodep1;
+ }
+
+ /* All done if the starting index of the node is where the
+ * split should occur.
+ */
+ if (nodep1->idx == idx)
+ return nodep1;
+
+ /* Split point not at start of mask, so it must be part of
+ * bits described by num_after.
+ */
+ /* Calculate offset within num_after for where the split is
+ * to occur.
+ */
+ offset = idx - (nodep1->idx + MASK_BITS);
+ orig_num_after = nodep1->num_after;
+
+ /* Add a new node to describe the bits starting at
+ * the split point.
+ */
+ nodep1->num_after = offset;
+ nodep2 = node_add(s, idx);
+ TEST_ASSERT(nodep2 != NULL, "NULL return from node_add()");
+ TEST_ASSERT(nodep2->idx == idx, "Unexpected starting index,\n"
+ " nodep2->idx: 0x%lx\n"
+ " idx: 0x%lx", nodep2->idx, idx);
+
+ /* Move bits after the split point into the new node */
+ nodep2->num_after = orig_num_after - offset;
+ if (nodep2->num_after >= MASK_BITS) {
+ nodep2->mask = ~((mask_t) 0);
+ nodep2->num_after -= MASK_BITS;
+ } else {
+ nodep2->mask = (1 << nodep2->num_after) - 1;
+ nodep2->num_after = 0;
+ }
+
+ return nodep2;
+}
+
+/* Node First Const
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Node pointer to the node with the lowest index.
+ *
+ * Searches for and returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static const node_t *node_first_const(const pvt_t *s)
+{
+ const node_t *nodep;
+
+ for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+ ;
+
+ return nodep;
+}
+
+/* Node Next Const
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ * np - pointer to previous test sparsebit array node
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Node pointer to the node with the lowest index > the index
+ * of the node pointed to by np.
+ * NULL if no node with a higher index exists.
+ *
+ * Searches for and returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static const node_t *node_next_const(const pvt_t *s, const node_t *np)
+{
+ const node_t *nodep = np;
+
+ /* If current node has a right child, next node is the left-most
+ * of the right child.
+ */
+ if (nodep->right) {
+ for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+ ;
+ return nodep;
+ }
+
+ /* No right child. Go up until node is left child of a parent.
+ * That parent is then the next node.
+ */
+ for (; nodep->parent && nodep == nodep->parent->right;
+ nodep = nodep->parent)
+ ;
+
+ return nodep->parent;
+}
+
+/* Node Next
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ * idx - bit index
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Node pointer to the node with the lowest index > the index
+ * of the node pointed to by np.
+ * NULL if no node with a higher index exists.
+
+ * A non-const wrapper of node_find_const(). This wrapper works the same
+ * as node_find_const() but takes a non-const pointer to the test
+ * sparsebit implementation private area and returns a non-const pointer
+ * to the node, if it is found.
+ */
+static node_t *node_next(pvt_t *s, node_t *np)
+{
+ return (node_t *) node_next_const(s, np);
+}
+
+/* Node Previous
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ * np - pointer to next test sparsebit array node
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Node pointer to the node with the highest index < the index
+ * of the node pointed to by np.
+ * NULL if no node with a lower index exists.
+ *
+ * Searches for and returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static node_t *node_prev(pvt_t *s, node_t *np)
+{
+ const node_t *nodep = np;
+
+ /* If current node has a left child, next node is the right-most
+ * of the left child.
+ */
+ if (nodep->left) {
+ for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+ ;
+ return (node_t *) nodep;
+ }
+
+ /* No left child. Go up until node is right child of a parent.
+ * That parent is then the next node.
+ */
+ for (; nodep->parent && nodep == nodep->parent->left;
+ nodep = nodep->parent)
+ ;
+
+ return (node_t *) nodep->parent;
+}
+
+/* All Set
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if all bits are set.
+ *
+ * Determines whether all the bits in the test sparsebit array are set.
+ */
+static bool all_set(const pvt_t *s)
+{
+ /* If any nodes there must be at least one bit set. Only case
+ * where a bit is set and total num set is 0, is when all bits
+ * are set.
+ */
+ if (s->root && (s->num_set == 0))
+ return true;
+
+ return false;
+}
+
+/* Is Set
+ *
+ * Input Args:
+ * s - pointer to test sparsebit array implementation private data
+ * idx - Bit index
+ *
+ * Output Args: None
+ *
+ * Return:
+ * True if the bit is set, false otherwise
+ *
+ * Determines whether the bit at the index given by idx, within the
+ * test sparsebit array is set or not. Returns true if the bit is
+ * set, otherwise false is returned.
+ */
+static bool is_set(const pvt_t *s, test_sparsebit_idx_t idx)
+{
+ const node_t *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = (nodep->idx > idx) ? nodep->left : nodep->right) {
+ if ((idx >= nodep->idx) && (idx <= (nodep->idx + MASK_BITS
+ + nodep->num_after - 1)))
+ break;
+ }
+ if (nodep == NULL)
+ return false;
+
+ /* Bit is set if it is any of the bits described by num_after */
+ if (nodep->num_after && (idx >= (nodep->idx + MASK_BITS)))
+ return true;
+
+ /* Is the corresponding mask bit set */
+ TEST_ASSERT((idx >= nodep->idx) && ((idx - nodep->idx) < MASK_BITS),
+ "index not part of bits described by mask, "
+ "idx: 0x%lx nodep->idx: 0x%lx MASK_BITS: %lu",
+ idx, nodep->idx, MASK_BITS);
+ if (nodep->mask & (1 << (idx - nodep->idx)))
+ return true;
+
+ return false;
+}
+
+/* Bit Set
+ *
+ * Input Args:
+ * idx - bit index
+ *
+ * Input/Output Args:
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the test sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(pvt_t *s, test_sparsebit_idx_t idx)
+{
+ node_t *nodep;
+
+ /* Skip bits that are already set */
+ if (is_set(s, idx))
+ return;
+
+ /* Get a node where the bit at idx is described by the mask.
+ * The node_split will also create a node, if there isn't
+ * already a node that describes the setting of bit.
+ */
+ nodep = node_split(s, idx - (idx % MASK_BITS));
+ TEST_ASSERT(nodep, "node not present after node_split, "
+ "nodep: %p idx: 0x%lx", nodep, idx);
+
+ /* Set the bit within the nodes mask */
+ TEST_ASSERT((idx >= nodep->idx)
+ && (idx <= (nodep->idx + MASK_BITS - 1)),
+ "After node split, idx not part of node mask, "
+ "nodep: %p nodep->idx: 0x%lx idx: 0x%lx MASK_BITS: %lu",
+ nodep, nodep->idx, idx, MASK_BITS);
+ TEST_ASSERT(!(nodep->mask & (1 << (idx - nodep->idx))),
+ "Unexpected, bit already set, idx: 0x%lx "
+ "nodep->idx: 0x%lx nodep->mask: 0x%x",
+ idx, nodep->idx, nodep->mask);
+ nodep->mask |= (1 << (idx - nodep->idx));
+ s->num_set++;
+
+ node_reduce(s, nodep);
+}
+
+/* Bit Clear
+ *
+ * Input Args:
+ * idx - bit index
+ *
+ * Input/Output Args:
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the test sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(pvt_t *s, test_sparsebit_idx_t idx)
+{
+ node_t *nodep;
+
+ /* Skip bits that are already cleared */
+ if (!is_set(s, idx))
+ return;
+
+ /* Is there a node that describes the setting of this bit? */
+ nodep = node_find(s, idx);
+ if (nodep == NULL)
+ return;
+
+ /* If a num_after bit, split the node, so that the bit is
+ * part of a node mask.
+ */
+ if (idx >= (nodep->idx + MASK_BITS)) {
+ nodep = node_split(s, idx - (idx % MASK_BITS));
+ TEST_ASSERT(nodep, "node not present after node_split, "
+ "nodep: %p idx: 0x%lx", nodep, idx);
+ TEST_ASSERT((idx >= nodep->idx)
+ && (idx <= (nodep->idx + MASK_BITS - 1)),
+ "After node split, idx not part of node mask, "
+ "nodep: %p nodep->idx: 0x%lx idx: 0x%lx MASK_BITS: %lu",
+ nodep, nodep->idx, idx, MASK_BITS);
+ }
+
+ /* After node_split above, bit at idx should be within the mask.
+ * Clear that bit.
+ */
+ TEST_ASSERT((idx >= nodep->idx) && (idx <= nodep->idx + MASK_BITS - 1),
+ "Index not within node mask after doing node_split,\n"
+ " nodep: %p nodep->idx: 0x%lx idx: 0x%lx MASK_BITS: %lu",
+ nodep, nodep->idx, idx, MASK_BITS);
+ TEST_ASSERT(nodep->mask & (1 << (idx - nodep->idx)),
+ "Unexpected, mask bit is clear, "
+ "idx: 0x%lx nodep->idx: 0x%lx "
+ "nodep->mask: 0x%x",
+ idx, nodep->idx, nodep->mask);
+ TEST_ASSERT(nodep->mask & (1 << (idx - nodep->idx)),
+ "Unexpected, bit already cleared, idx: 0x%lx "
+ "nodep->idx: 0x%lx nodep->mask: 0x%x",
+ idx, nodep->idx, nodep->mask);
+ nodep->mask &= ~(1 << (idx - nodep->idx));
+ TEST_ASSERT((s->num_set > 0) || all_set(s),
+ "Unexpected global count "
+ "of bits set, s->num_set: 0x%lx", s->num_set);
+ s->num_set--;
+
+ node_reduce(s, nodep);
+}
+
+/* Node Reduce
+ *
+ * Input Args: None
+ *
+ * Input/Output Args:
+ * nodep - pointer to next test sparsebit array node
+ * s - pointer to test sparsebit array implementation private data
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form. For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes. Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction. Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered. It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap. Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes. This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ */
+static void node_reduce(pvt_t *s, node_t *nodep)
+{
+ bool reduction_performed;
+
+ do {
+ reduction_performed = false;
+ node_t *prev, *next, *tmp;
+
+ /* Potential reductions within the current node. */
+ /* Nodes with all bits cleared may be removed. */
+ if ((nodep->mask == 0) && (nodep->num_after == 0)) {
+ /* About to remove the node pointed to by
+ * nodep, which normally would cause a problem
+ * for the next pass through the reduction loop,
+ * because the node at the starting point no longer
+ * exists. This potential problem is handled
+ * by first remembering the location of the next
+ * or previous nodes. Doesn't matter which, because
+ * once the node at nodep is removed, there will be
+ * no other nodes between prev and next.
+ *
+ * Note, the checks performed on nodep against both
+ * both prev and next both check for an adjacent
+ * node that can be reduced into a single node. As
+ * such, after removing the node at nodep, doesn't
+ * matter whether the nodep for the next pass
+ * through the loop is equal to the previous pass
+ * prev or next node. Either way, on the next pass
+ * the one not selected will become either the
+ * prev or next node.
+ */
+ tmp = node_next(s, nodep);
+ if (tmp == NULL)
+ tmp = node_prev(s, nodep);
+
+ node_rm(s, nodep);
+ nodep = NULL;
+
+ nodep = tmp;
+ reduction_performed = true;
+ continue;
+ }
+
+ /* When the mask is 0, can reduce the amount of num_after
+ * bits by moving the initial num_after bits into the mask.
+ */
+ if (nodep->mask == 0) {
+ TEST_ASSERT(nodep->num_after != 0, "Expected at "
+ "least 1 num_after bit,\n"
+ " nodep: %p nodep->mask: 0x%x "
+ "nodep->num_after: 0x%lx",
+ nodep, nodep->mask, nodep->num_after);
+ TEST_ASSERT((nodep->idx + MASK_BITS) > nodep->idx,
+ "non-zero num_after setting describes bits "
+ "beyond the max index,\n"
+ " nodep: %p nodep->idx: 0x%lx MASK_BITS: %lu",
+ nodep, nodep->idx, MASK_BITS);
+
+ nodep->idx += MASK_BITS;
+
+ if (nodep->num_after >= MASK_BITS) {
+ nodep->mask = ~0;
+ nodep->num_after -= MASK_BITS;
+ } else {
+ nodep->mask = (1u << nodep->num_after) - 1;
+ nodep->num_after = 0;
+ }
+
+ TEST_ASSERT(nodep->mask != 0, "Unexpected mask of "
+ "zero, nodep: %p nodep->mask: 0x%x",
+ nodep, nodep->mask);
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /* Potential reductions between the current and
+ * previous nodes.
+ */
+ prev = node_prev(s, nodep);
+ if (prev) {
+ test_sparsebit_idx_t prev_highest_bit;
+
+ /* Nodes with no bits set can be removed. */
+ if ((prev->mask == 0) && (prev->num_after == 0)) {
+ node_rm(s, prev);
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /* All mask bits set and previous node has
+ * adjacent index.
+ */
+ if (((nodep->mask + 1) == 0)
+ && ((prev->idx + MASK_BITS) == nodep->idx)) {
+ prev->num_after += MASK_BITS + nodep->num_after;
+ nodep->mask = 0;
+ nodep->num_after = 0;
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /* Is node adjacent to previous node and the node
+ * contains a single contiguous range of bits
+ * starting from the beginning of the mask?
+ */
+ prev_highest_bit = prev->idx + MASK_BITS - 1
+ + prev->num_after;
+ if (((prev_highest_bit + 1) == nodep->idx)
+ && ((nodep->mask | (nodep->mask >> 1))
+ == nodep->mask)) {
+ /* How many contiguous bits are there?
+ * Is equal to the total number of set
+ * bits, due to an earlier check that
+ * there is a single contiguous range of
+ * set bits.
+ */
+ unsigned int num_contiguous
+ = __builtin_popcount(nodep->mask);
+ TEST_ASSERT((num_contiguous > 0)
+ && ((1ULL << num_contiguous) - 1)
+ == nodep->mask,
+ "Unexpected mask, mask: 0x%x "
+ "num_contiguous: %u",
+ nodep->mask, num_contiguous);
+
+ prev->num_after += num_contiguous;
+ nodep->mask = 0;
+
+ /* For predictable performance, handle special
+ * case where all mask bits are set and there
+ * is a non-zero num_after setting. This code
+ * is functionally correct without the following
+ * conditionalized statements, but without them
+ * the value of num_after is only reduced by
+ * the number of mask bits per pass. There are
+ * cases where num_after can be close to 2^64.
+ * Without this code it could take nearly
+ * (2^64) / 32 passes to perform the full
+ * reduction.
+ */
+ if (num_contiguous == MASK_BITS) {
+ prev->num_after += nodep->num_after;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+
+ /* Potential reductions between the current and
+ * next nodes.
+ */
+ next = node_next(s, nodep);
+ if (next) {
+ /* Nodes with no bits set can be removed. */
+ if ((next->mask == 0) && (next->num_after == 0)) {
+ node_rm(s, next);
+ reduction_performed = true;
+ continue;
+ }
+
+ /* Is next node index adjacent to current node
+ * and has a mask with all bits set? */
+ if ((next->idx == (nodep->idx
+ + MASK_BITS + nodep->num_after))
+ && (next->mask == ~((mask_t) 0))) {
+ nodep->num_after += MASK_BITS;
+ next->mask = 0;
+ nodep->num_after += next->num_after;
+ next->num_after = 0;
+
+ node_rm(s, next);
+ next = NULL;
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+ } while (nodep && reduction_performed);
+}
+
+/* Display Range
+ *
+ * Input Args: None
+ * low - low index of range
+ * high - high index of range
+ * prepend_comma_space - add ", " prefix
+ *
+ * Output Args:
+ * stream - output stream
+ *
+ * Return:
+ * Number of characters that were or would have been displayed.
+ *
+ * When stream is non-Null, displays the inclusive index range given by
+ * low and high. When prepend_comma_space is true, the character sequence
+ * ", " is prefixed to the displayed range.
+ *
+ * When stream is NULL, nothing is displayed, but the number of characters
+ * that would have been printed is still returned.
+ */
+static size_t display_range(FILE *stream, test_sparsebit_idx_t low,
+ test_sparsebit_idx_t high, bool prepend_comma_space)
+{
+ const char *fmt_str;
+ size_t sz;
+
+ /* Determine the printf format string */
+ if (low == high)
+ fmt_str = (prepend_comma_space)
+ ? ", 0x%lx" : "0x%lx";
+ else
+ fmt_str = (prepend_comma_space)
+ ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+ /* When stream is NULL, just determine the size of what would
+ * have been printed, else print the range.
+ */
+ if (stream == NULL)
+ sz = snprintf(NULL, 0, fmt_str, low, high);
+ else
+ sz = fprintf(stream, fmt_str, low, high);
+
+ return sz;
+}
+
+/* Dump Sub-Tree of Nodes
+ *
+ * Input Args:
+ * nodep - pointer to top of node sub-tree to be dumped
+ * indent - number of spaces at start of each output line
+ *
+ * Output Args:
+ * stream - output stream
+ *
+ * Return: None
+ *
+ * Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep. Each line of output
+ * is prefixed by the number of spaces given by indent. On each
+ * recursion, the indent amount is increased by 2. This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, const node_t *nodep,
+ unsigned int indent)
+{
+ const char *node_type;
+
+ /* Dump contents of node */
+ if (nodep->parent == NULL)
+ node_type = "root";
+ else if (nodep == nodep->parent->left)
+ node_type = "left";
+ else {
+ TEST_ASSERT(nodep == nodep->parent->right,
+ "Unexpected, not right child, "
+ "nodep: %p nodep->parent->right: %p",
+ nodep, nodep->parent->right);
+ node_type = "right";
+ }
+ fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+ fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
+ nodep->parent, nodep->left, nodep->right);
+ fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+ indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+ /* If present, dump contents of left child nodes */
+ if (nodep->left)
+ dump_nodes(stream, nodep->left, indent + 2);
+
+ /* If present, dump contents of right child nodes */
+ if (nodep->right)
+ dump_nodes(stream, nodep->right, indent + 2);
+}
diff --git a/gtests/lib/test_util.c b/gtests/lib/test_util.c
new file mode 100644
index 0000000..c3829c8
--- /dev/null
+++ b/gtests/lib/test_util.c
@@ -0,0 +1,3411 @@
+/*
+ * gtests/lib/test_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include <test_util.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <execinfo.h>
+#include <float.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <bits/endian.h>
+
+#include <linux/elf.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+/* The function sgnif relies on the floating point formats having
+ * an exponent radix of 2.
+ */
+#if FLT_RADIX != 2
+#error "FLT_RADIX != 2. This implementation only supports FLT_RADIX == 2."
+#endif
+
+#define INF ((uint64_t)0 - 1) /* For test_symb_infinity. */
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
+#define CONST_STRLEN(str) (ARRAY_SIZE(str) - 1)
+#define GOTO_ERROR(val) do { \
+ rv = val; \
+ goto error; \
+ } while (0)
+#define HEX_PREFIX "0x"
+
+#define MOUNTS_PATH "/proc/mounts"
+#define DEBUGFS_TYPE "debugfs"
+
+/* We use a uint64_t to store addresses, thus
+ * the need for a defined maximum nibble count.
+ */
+#define TEST_PG_MAX_NIBBLES ((sizeof(uint64_t) * CHAR_BIT) / 4)
+
+static const unsigned int nsecs_per_sec = 1000000000;
+
+const struct test_symb test_symb_infinity[] = {
+ {"INF", INF},
+ {"INFINITY", INF},
+ {"Inf", INF},
+ {"Infinity", INF},
+ {"inf", INF},
+ {"infinity", INF},
+ {NULL}, /* End of list marker */
+};
+
+/* Convenience Macros */
+#define CEIL_BYTES_TO_PAGES(x) (((x) + (getpagesize() - 1)) / getpagesize())
+#define PTR_ADD(ptr, num) (void *)((uintptr_t)(ptr) + (num))
+#define TS_VALIDATE(t) do { \
+ TEST_ASSERT((t)->tv_sec >= 0, "%s " #t " negative secs, " \
+ #t "->tv_sec: %li", __func__, (t)->tv_sec); \
+ TEST_ASSERT((t)->tv_nsec >= 0, "%s " #t " negative nsecs, " \
+ #t "->tv_nsec: %li", __func__, (t)->tv_nsec); \
+ TEST_ASSERT((t)->tv_nsec < nsecs_per_sec, "%s " #t "too many nsecs, " \
+ #t "->tv_nsec: %li", __func__, (t)->tv_nsec); \
+} while (0)
+
+#define TEST_MALLOC_MAGIC_NUM 0xBACC821F
+#define TEST_MALLOC_RED_ZONE_SIZE 128
+
+const struct test_symb test_known_errno[] = {
+ {"EPERM", EPERM},
+ {"ENOENT", ENOENT},
+ {"ESRCH", ESRCH},
+ {"EINTR", EINTR},
+ {"EIO", EIO},
+ {"ENXIO", ENXIO},
+ {"E2BIG", E2BIG},
+ {"ENOEXEC", ENOEXEC},
+ {"EBADF", EBADF},
+ {"ECHILD", ECHILD},
+ {"EAGAIN", EAGAIN},
+ {"ENOMEM", ENOMEM},
+ {"EACCES", EACCES},
+ {"EFAULT", EFAULT},
+ {"ENOTBLK", ENOTBLK},
+ {"EBUSY", EBUSY},
+ {"EEXIST", EEXIST},
+ {"EXDEV", EXDEV},
+ {"ENODEV", ENODEV},
+ {"ENOTDIR", ENOTDIR},
+ {"EISDIR", EISDIR},
+ {"EINVAL", EINVAL},
+ {"ENFILE", ENFILE},
+ {"EMFILE", EMFILE},
+ {"ENOTTY", ENOTTY},
+ {"ETXTBSY", ETXTBSY},
+ {"EFBIG", EFBIG},
+ {"ENOSPC", ENOSPC},
+ {"ESPIPE", ESPIPE},
+ {"EROFS", EROFS},
+ {"EMLINK", EMLINK},
+ {"EPIPE", EPIPE},
+ {"EDOM", EDOM},
+ {"ERANGE", ERANGE},
+ {NULL} /* End of list marker */
+};
+
+const struct test_symb test_known_sig[] = {
+ {"SIGHUP", SIGHUP},
+ {"SIGINT", SIGINT},
+ {"SIGQUIT", SIGQUIT},
+ {"SIGILL", SIGILL},
+ {"SIGTRAP", SIGTRAP},
+ {"SIGABRT", SIGABRT},
+ {"SIGBUS", SIGBUS},
+ {"SIGFPE", SIGFPE},
+ {"SIGKILL", SIGKILL},
+ {"SIGUSR1", SIGUSR1},
+ {"SIGSEGV", SIGSEGV},
+ {"SIGUSR2", SIGUSR2},
+ {"SIGPIPE", SIGPIPE},
+ {"SIGALRM", SIGALRM},
+ {"SIGTERM", SIGTERM},
+ {"SIGCHLD", SIGCHLD},
+ {"SIGCONT", SIGCONT},
+ {"SIGSTOP", SIGSTOP},
+ {"SIGPROF", SIGPROF},
+ {"SIGIO", SIGIO},
+ {"SIGPOLL", SIGPOLL},
+ {"SIGPWR", SIGPWR},
+ {NULL} /* End of list marker */
+};
+
+/* test_malloc keeps track of its allocations by building a
+ * singly linked list of test_malloc_alloc structs (lookup
+ * efficiency is not a prioirty).
+ */
+struct test_malloc_alloc {
+ struct test_malloc_alloc *next;
+
+ /* User payload's starting address. */
+ void *user_addr;
+
+ /* Starting address of the entire allocation. */
+ void *start_addr;
+
+ /* The user payload's size. */
+ size_t user_size;
+
+ /* The size of the entire allocation. */
+ size_t alloc_size;
+
+ /* The flags with which this memory was test_malloc-ed. */
+ uint32_t flags;
+
+ /* Whether this memory is mmap-ed. */
+ bool mmaped;
+};
+
+struct test_malloc_alloc *alloc_list;
+
+/* Local function prototypes */
+static float sgnif(long double expected, long double actual,
+ unsigned int mant_dig, long double min_normalized);
+static bool has_infinity(unsigned int num, const float weights[]);
+static void parse_perm(const char *perm, int *prot, bool *shared);
+static int proc_maps_max_nibbles(const char *map);
+static size_t line_len(const char *str);
+static void malloc_create(struct test_malloc_alloc *allocp,
+ size_t size, uint32_t flags, size_t align_bytes, int fd, off_t offset);
+static struct test_malloc_alloc *malloc_query(const void *addr, bool unlink);
+static void test_init(void);
+
+/* Obtain the complete command-line argument, even in cases where the
+ * argument spans more than a single argv[] pointer. Within the returned
+ * string, the arguments are joined with a space between each pair of
+ * arguments.
+ *
+ * ARGS:
+ * arg1 - pointer to first string that is part of the flags
+ * args[] - remainder of argv array starting with pointer after
+ * that for arg1. The array is required to be NULL
+ * terminated.
+ *
+ * Side Effects:
+ * + optind is incremented to point to the next argument beyond
+ * those parsed.
+ *
+ * Returns:
+ * A pointer to a dynamically allocated string that is a concatenation
+ * of arg1 plus all the strings from args[] that make up the argument.
+ * Note, the caller is responble for freeing the memory of the returned
+ * string.
+ */
+char *test_get_opt_str(const char *arg1, char *args[])
+{
+ char *str;
+
+ str = test_dyn_sprintf("%s", arg1);
+
+ /* Append additional arguments until an arg starting with - */
+ while ((*args != NULL) && (**args != '-')) {
+ char *prev_str = str;
+ str = test_dyn_sprintf("%s %s", str, *args);
+ free(prev_str);
+
+ args++;
+ optind++;
+ }
+
+ return str;
+}
+
+/* parse_i64
+ *
+ * Parse the decimal or hexadecimal value provided in the string pointed
+ * to by str. Parsed value returned in location pointed to by val. Maximum
+ * and Minimum parsed values given by max and min respectively. When symb
+ * pointer is non-NULL, it points to a NULL-terminated array of symbolic
+ * values. Each of these symbolic values has a string and value that it
+ * represents. Note that the symbolic value is allowed to be outside the
+ * range [min:max].
+ *
+ * ARGS:
+ * str - Pointer to null-terminated string to be parsed.
+ * val - Pointer to where the parsed or symbolic value is returned.
+ * min - Minimum allowed parsed value. Symbolic values are allowed
+ * to be outside min boundary.
+ * max - Maximum allowed parsed value. Symbolic values are allowed
+ * to exceed max.
+ * symb - Pointer to null-terminated array of symbolic values. A symb
+ * of value of NULL means there is no array of symbolic values.
+ *
+ * Returns:
+ * TEST_UTIL_SUCCESS - on parsing a value within range [min:max]
+ * or finding a matching symbolic entry.
+ * TEST_UTIL_VALUE_ERR - Parsed value outside range [min:max].
+ * TEST_UTIL_SYNTAX_ERR - String contains invalid syntax
+ * TEST_ASSERT - test_assert if min > max
+ *
+ */
+int test_parse_i64(const char *str, int64_t *val,
+ int64_t min, int64_t max, const struct test_symbi symb[])
+{
+ const char *chptr;
+ unsigned long long int tmp;
+
+ /* test_assert if min value provided by user is greater than max */
+ if (symb == NULL)
+ TEST_ASSERT(min < max, " min can not be greater than max , "
+ "min:= %"PRIi64" max: %"PRIi64"\n", min, max);
+
+ /* Skip leading white space */
+ for (chptr = str; *chptr != '\0' && isspace(*chptr); chptr++)
+ ;
+
+ /* Empty or string of only whitespace considered a syntax error */
+ if (*chptr == '\0')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ bool negative_num = false;
+ if ((*chptr == '+') || (*chptr == '-')) {
+ if (*chptr == '-')
+ negative_num = true;
+
+ chptr = chptr + 1;
+ }
+
+ /* Is there a matching symbol entry.
+ * In case of multiple matching symbols, use the longest
+ */
+ const struct test_symbi *symb_match = NULL;
+ for (const struct test_symbi *symb_entry = symb; (symb_entry != NULL)
+ && (symb_entry->name != NULL); symb_entry++) {
+ if (strncmp(str, symb_entry->name, strlen(symb_entry->name))
+ == 0) {
+ if ((symb_match == NULL)
+ || (strlen(symb_entry->name)
+ > strlen(symb_match->name)))
+ symb_match = symb_entry;
+ }
+ }
+
+ char *endptr;
+ if (symb_match != NULL) {
+ endptr = (char *) (str + strlen(symb_match->name));
+ *val = symb_match->val;
+ /* Skip trailing whitespace */
+ for (chptr = endptr; *chptr != '\0' && isspace(*chptr); chptr++)
+ ;
+
+ /* Syntax error if anything left to parse */
+ if (*chptr != '\0')
+ return TEST_UTIL_SYNTAX_ERR;
+ else
+ return TEST_UTIL_SUCCESS;
+ }
+
+ if (!isdigit(*chptr))
+ return TEST_UTIL_SYNTAX_ERR;
+
+ if (strncasecmp(chptr, HEX_PREFIX,
+ CONST_STRLEN(HEX_PREFIX)) == 0) {
+ chptr += CONST_STRLEN(HEX_PREFIX);
+
+ /* Whitespace after hex prefix not allowed */
+ if (isspace(*chptr))
+ return TEST_UTIL_SYNTAX_ERR;
+
+ /* Negative or positive sign after hex prefix not allowed */
+ if ((*chptr == '-') || (*chptr == '+'))
+ return TEST_UTIL_SYNTAX_ERR;
+
+ /* In case of multiple 0x in the string, which is not allowed */
+ if (strncasecmp(chptr, HEX_PREFIX,
+ CONST_STRLEN(HEX_PREFIX)) == 0)
+ return TEST_UTIL_SYNTAX_ERR;
+
+ tmp = strtoull(chptr, &endptr, 16);
+ } else
+ tmp = strtoull(chptr, &endptr, 10);
+
+ /* Syntax error if nothing was parsed by call to strtoull. */
+ if (chptr == endptr)
+ return TEST_UTIL_SYNTAX_ERR;
+
+ /* Skip trailing whitespace */
+ for (chptr = endptr; *chptr != '\0' && isspace(*chptr); chptr++)
+ ;
+
+ /* Syntax error if anything left to parse */
+ if (*chptr != '\0')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ if (tmp > max) {
+ if (tmp != min)
+ return TEST_UTIL_VALUE_ERR;
+ }
+
+ *val = tmp;
+
+ /* If strtoull returns a positive value and not wrapped around */
+ if ((negative_num) && (*val > 0))
+ *val = *val * (-1);
+
+ /* In case of wrap around */
+ if ((!negative_num) && (*val == min))
+ return TEST_UTIL_VALUE_ERR;
+
+ /* In case the value provided by user is out of range */
+ if ((*val > max) || (*val < min))
+ return TEST_UTIL_VALUE_ERR;
+
+ return TEST_UTIL_SUCCESS;
+}
+
+/* Parse u32
+ *
+ * Parse the decimal value provided in the string pointed to by str.
+ * Parsed value returned in location pointed to by val. Maximum parsed
+ * value given by max. When symb pointer is non-NULL, it points to a
+ * NULL-terminated array of symbolic values. Each of these symbolic
+ * values has a string and value that it represents. Note that the
+ * symbolic value is allowed to exceed max.
+ *
+ * ARGS:
+ * str - Pointer to null-terminated string to be parsed.
+ * val - Pointer to where the parsed or symbolic value is returned.
+ * max - Maximum allowed parsed value. Symbolic values are allowed
+ * to exceed max.
+ * symb - Pointer to null-terminated array of symbolic values.
+ *
+ * Returns:
+ * TEST_UTIL_SUCCESS - on parsing a value at or below max or finding
+ * a matching symbolic entry.
+ * TEST_UTIL_VALUE_ERR - Parsed value greater than max.
+ * TEST_UTIL_SYNTAX_ERR - String contains invalid syntax
+ */
+int test_parse_u32(const char *str, uint32_t *val, uint32_t max,
+ const struct test_symb symb[])
+{
+ int rv;
+ uint64_t tmp;
+
+ rv = test_parse_u64(str, &tmp, max, symb);
+
+ if (rv != TEST_UTIL_SUCCESS)
+ return rv;
+
+ *val = (uint32_t)tmp;
+
+ TEST_ASSERT(tmp <= UINT32_MAX,
+ "Value of val greater than expected,"
+ "tmp: 0x%" PRIx64 " max: 0x%" PRIx32 "", tmp, max);
+
+ return TEST_UTIL_SUCCESS;
+}
+
+/* Parse u64
+ *
+ * Parse the decimal value provided in the string pointed to by str.
+ * Parsed value returned in location pointed to by val. Maximum parsed
+ * value given by max. When symb pointer is non-NULL, it points to a
+ * NULL-terminated array of symbolic values. Each of these symbolic
+ * values has a string and value that it represents. Note that the
+ * symbolic value is allowed to exceed max.
+ *
+ * ARGS:
+ * str - Pointer to null-terminated string to be parsed.
+ * val - Pointer to where the parsed or symbolic value is returned.
+ * max - Maximum allowed parsed value. Symbolic values are allowed
+ * to exceed max.
+ * symb - Pointer to null-terminated array of symbolic values.
+ *
+ * Returns:
+ * TEST_UTIL_SUCCESS - on parsing a value at or below max or finding
+ * a matching symbolic entry.
+ * TEST_UTIL_VALUE_ERR - Parsed value greater than max.
+ * TEST_UTIL_SYNTAX_ERR - String contains invalid syntax
+ */
+int test_parse_u64(const char *str, uint64_t *val, uint64_t max,
+ const struct test_symb symb[])
+{
+ const char *chptr;
+ uint64_t tmp;
+
+ /* Skip leading white space */
+ for (chptr = str; *chptr != '\0' && isspace(*chptr); chptr++)
+ ;
+
+ /* Empty or string of only whitespace considered a syntax error */
+ if (*chptr == '\0')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ /* Positive sign prefix a value allowed */
+ if ((*chptr == '+') || (*chptr == '-')) {
+ /* Negative values not allowed */
+ if (*chptr == '-')
+ return TEST_UTIL_SYNTAX_ERR;
+ if (*chptr == '+')
+ chptr = chptr + 1;
+ }
+
+ /* Is there a matching symbol entry.
+ * In case of multiple matching symbols, use the longest
+ */
+ const struct test_symb *symb_match = NULL;
+ for (const struct test_symb *symb_entry = symb; (symb_entry != NULL)
+ && (symb_entry->name != NULL); symb_entry++) {
+ if (strncmp(str, symb_entry->name, strlen(symb_entry->name))
+ == 0) {
+ if ((symb_match == NULL)
+ || (strlen(symb_entry->name)
+ > strlen(symb_match->name)))
+ symb_match = symb_entry;
+ }
+ }
+
+ char *endptr;
+ if (symb_match != NULL) {
+ *val = symb_match->val;
+ endptr = (char *) (str + strlen(symb_match->name));
+
+ /* Skip trailing whitespace */
+ for (chptr = endptr; *chptr != '\0' && isspace(*chptr); chptr++)
+ ;
+
+ /* Syntax error if anything left to parse */
+ if (*chptr != '\0')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ return TEST_UTIL_SUCCESS;
+
+ }
+
+ if (!isdigit(*chptr))
+ return TEST_UTIL_SYNTAX_ERR;
+
+ errno = 0;
+ if (strncasecmp(chptr, HEX_PREFIX,
+ CONST_STRLEN(HEX_PREFIX)) == 0) {
+ chptr += CONST_STRLEN(HEX_PREFIX);
+
+ /* Whitespace after hex prefix not allowed */
+ if (isspace(*chptr))
+ return TEST_UTIL_SYNTAX_ERR;
+
+ /* Negative values not allowed */
+ if (*chptr == '-')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ tmp = strtoull(chptr, &endptr, 16);
+ } else {
+ /* Negative values not allowed */
+ if (*chptr == '-')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ tmp = strtoull(chptr, &endptr, 10);
+ }
+
+ if (chptr == endptr)
+ return TEST_UTIL_SYNTAX_ERR;
+
+ /* Skip trailing whitespace */
+ for (chptr = endptr; *chptr != '\0' && isspace(*chptr); chptr++)
+ ;
+
+ /* Syntax error if anything left to parse */
+ if (*chptr != '\0')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ if (errno != 0) {
+
+ /* strtoull sets errno to ENOSPC for
+ * any value greater than UINT64_MAX
+ */
+ if ((tmp == ULLONG_MAX) || (errno == ERANGE))
+ return TEST_UTIL_VALUE_ERR;
+
+ TEST_ASSERT(errno == ERANGE,
+ "Wrong input to strtoull\n");
+ } else {
+ if (tmp > max)
+ return TEST_UTIL_VALUE_ERR;
+ }
+
+ *val = tmp;
+ tmp = 4321; /* set to a magic number */
+ return TEST_UTIL_SUCCESS;
+}
+
+int test_parse_float(const char *str, float *val)
+{
+ float tmp;
+ char *chptr;
+
+ tmp = strtof(str, &chptr);
+ if (*chptr != '\0')
+ return TEST_UTIL_SYNTAX_ERR;
+
+ if (!isfinite(tmp))
+ return TEST_UTIL_VALUE_ERR;
+
+ *val = tmp;
+
+ return TEST_UTIL_SUCCESS;
+}
+
+/* test_parse_rngs
+ *
+ * Parses out zero or more ranges provided by str. Each range contains one or
+ * two values separated by a colon. When just one value is provided, the low
+ * and high values of the range are set equal to that value. In the case
+ * where colon separated values are provided, low gets set to the value before
+ * the colon, while high is set equal to the value after the colon. Two or
+ * more ranges are specified by separating the ranges with a comma. For
+ * example, the following string:
+ *
+ * 1:5, 54, 0x20,35
+ *
+ * is a specification for the following ranges:
+ *
+ * range 0 low: 1 high: 5
+ * range 1 low: 54 high: 54
+ * range 2 low: 32 high: 35
+ *
+ * Max is used to specify the maximum value permitted in a range. While
+ * symb when non-NULL points to an array of symbolic strings and their
+ * equivalent value. Note that it is valid for the symbol array to contain
+ * entries with values greater than max.
+ *
+ * Results are returned in a dynamically allocated array pointed to by **rngs,
+ * while the length of the results is specified by *num. On entry these
+ * results are unconditionally free. Note, **rngs must always equal NULL
+ * or point to dynamically allocated memory.
+ *
+ * Return Value:
+ * TEST_UTIL_SUCCESS - str parsed with no errors
+ * TEST_UTIL_SYNTAX_ERR - str does not have a valid syntax
+ * TEST_UTIL_VALUE_ERR - str has a value > max or one of the ranges
+ * has low > high.
+ */
+
+int test_parse_rngs(const char *str, struct test_rng **rngs, unsigned int *num,
+ uint64_t max, const struct test_symb symb[])
+{
+ int rv;
+ const char *chptr1, *chptr2;
+ struct test_rng tmp_rng;
+
+ TEST_ASSERT(str != NULL, " ");
+ TEST_ASSERT(rngs != NULL, " ");
+ TEST_ASSERT(num != NULL, " ");
+
+ /* Clear Result */
+ free(*rngs);
+ *rngs = NULL;
+ *num = 0;
+
+ size_t len1 = 0, len2 = 0, pos = 0;
+
+ /* Skip leading white space */
+ for (chptr1 = str; *chptr1 != '\0' && isspace(*chptr1); chptr1++)
+ ;
+
+ for (; *chptr1 != '\0'; chptr1 = chptr2) {
+ pos = strcspn(chptr1, ":,");
+ len1 = (pos == 0) ? strlen(chptr1) : pos;
+ chptr2 = chptr1 + len1;
+
+ if ((chptr2 != NULL) && (*chptr2 == ':')) {
+ /* Range of values. */
+ char *tmp_parse_one = test_dyn_sprintf("%.*s",
+ len1, chptr1);
+ rv = test_parse_u64(tmp_parse_one,
+ &tmp_rng.low, max, symb);
+ free(tmp_parse_one);
+ if (rv)
+ GOTO_ERROR(rv);
+
+ chptr1 = chptr2 + 1;
+
+ chptr2 = strchr(chptr1, ',');
+ len2 = (chptr2 == NULL) ?
+ strlen(chptr1) : chptr2 - chptr1;
+
+ char *tmp_parse_two = test_dyn_sprintf("%.*s",
+ len2, chptr1);
+ rv = test_parse_u64(tmp_parse_two, &tmp_rng.high,
+ max, symb);
+ free(tmp_parse_two);
+ if (rv)
+ GOTO_ERROR(rv);
+ } else { /* Single value. */
+ char *tmp_parse_one = test_dyn_sprintf("%.*s",
+ len1, chptr1);
+ rv = test_parse_u64(tmp_parse_one,
+ &tmp_rng.low, max, symb);
+ free(tmp_parse_one);
+ if (rv)
+ GOTO_ERROR(rv);
+
+ /* No high part, so set high equal to low */
+ tmp_rng.high = tmp_rng.low;
+ }
+
+ /* Is low > high */
+ if (tmp_rng.low > tmp_rng.high)
+ GOTO_ERROR(TEST_UTIL_VALUE_ERR);
+
+ /* Add tmp_rng to results */
+ *rngs = realloc(*rngs, (*num + 1) * sizeof(**rngs));
+ TEST_ASSERT(*rngs != NULL, "Insufficient Memory");
+ memcpy(*rngs + *num, &tmp_rng, sizeof(**rngs));
+ (*num)++;
+
+ /* Skip trailing white space */
+ while ((chptr2 != NULL) && isspace(*chptr2))
+ chptr2++;
+
+ /* If not at end, then there should be a comma to
+ * seperate the ranges.
+ */
+
+ if ((chptr2 != NULL) && (*chptr2 != '\0')) {
+ if (*chptr2 != ',')
+ GOTO_ERROR(TEST_UTIL_SYNTAX_ERR);
+ chptr2++;
+ } else
+ break;
+
+ /* Syntax error if only whitespace after comma */
+ while ((chptr2 != NULL) && (isspace(*chptr2)))
+ chptr2++;
+
+ if ((chptr2 != NULL) && (*chptr2 == '\0'))
+ GOTO_ERROR(TEST_UTIL_SYNTAX_ERR);
+ }
+
+ return 0;
+
+error:
+ free(*rngs);
+ *rngs = NULL;
+ *num = 0;
+ return rv;
+}
+
+char *test_rngs2str(const struct test_rng *rngs, unsigned int num,
+ unsigned int radix)
+{
+ char *str, *next_str;
+ const char *seperator;
+ const char *format;
+ const struct test_rng *rng;
+
+ TEST_ASSERT((radix == 0) || (radix == 10) || (radix == 16),
+ "Unsupported radix, radix: %u", radix);
+
+ str = test_dyn_sprintf("");
+
+ /* For each of the ranges */
+ for (rng = rngs; rng < (rngs + num); rng++) {
+ seperator = (rng == rngs) ? "" : ", ";
+
+ if (rng->low == rng->high)
+ format = (radix == 0) || (radix == 16)
+ ? "%s%s0x%llx" : "%s%s%llu";
+ else
+ format = (radix == 0) || (radix == 16)
+ ? "%s%s0x%llx:0x%llx" : "%s%s%llu:%llu";
+ next_str = test_dyn_sprintf(format, str, seperator,
+ rng->low, rng->high);
+
+ free(str);
+ str = next_str;
+ }
+
+ return str;
+}
+
+/*
+ * Test Ranges Index Is Set
+ *
+ * Determines whether any range within a given range includes a specified
+ * index.
+ *
+ * Input Args:
+ * idx - Index to check against
+ * rngs - Pointer to start of ranges
+ * num - Number of ranges
+ *
+ * Return:
+ * True if any of the ranges specified by rngs and num include the index
+ * given by idx, false otherwise.
+ */
+bool test_rngs_idx_isset(unsigned long long idx, const struct test_rng *rngs,
+ unsigned int num)
+{
+ const struct test_rng *rng;
+
+ /* For each of the ranges */
+ for (rng = rngs; rng < (rngs + num); rng++) {
+ if ((idx >= rng->low) && (idx <= rng->high))
+ return true;
+ }
+
+ /* Not found in any of the ranges. */
+ return false;
+}
+
+/*
+ * Test Ranges Index Set
+ *
+ * If not already set, sets the given index within a specified set of ranges.
+ * When the index is adjacent to an existing range, will expand the existing
+ * range to included the index, otherwise it creates a new range that
+ * contains just the index and adds it to the array of ranges.
+ *
+ * Input Args:
+ * idx - Index to be set
+ *
+ * Input/Output Args:
+ * rngs - Pointer to pointer to start of ranges
+ * num - Pointer to number of ranges
+ *
+ * Return: None
+ */
+void test_rngs_idx_set(unsigned long long idx, struct test_rng **rngs,
+ unsigned int *num)
+{
+ /* All done if the index is already set. */
+ if (test_rngs_idx_isset(idx, *rngs, *num))
+ return;
+
+ /* Is index adjacent to the boundary of an existing range? */
+ for (struct test_rng *rng = *rngs; rng < (*rngs + *num); rng++) {
+ if ((rng->low > 0) && (idx == rng->low - 1)) {
+ /* Is adjacent to lower index. Set the index,
+ * by decreasing the lower bound by 1.
+ */
+ rng->low = idx;
+ return;
+ }
+ if (((rng->high + 1) > rng->high) && (rng->high + 1 == idx)) {
+ /* Is adjacent to high index. Set the index,
+ * by increasing the upper bound.
+ */
+ rng->high = idx;
+ return;
+ }
+ }
+
+ /*
+ * Isn't within or adjacent to any of the existing ranges.
+ * Need to add a new range, that specifies just the given
+ * index.
+ */
+ struct test_rng tmp_rng;
+ memset(&tmp_rng, 0, sizeof(tmp_rng));
+ tmp_rng.low = tmp_rng.high = idx;
+ *rngs = realloc(*rngs, (*num + 1) * sizeof(**rngs));
+ TEST_ASSERT(*rngs != NULL, "Insufficient Memory");
+ memcpy(*rngs + *num, &tmp_rng, sizeof(**rngs));
+ (*num)++;
+}
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+ /*
+ * Build and run this command:
+ *
+ * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+ * grep -v test_dump_stack | cat -n 1>&2
+ *
+ * Note that the spacing is different and there's no newline.
+ */
+ size_t i;
+ size_t n = 20;
+ void *stack[n];
+ const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+ const char *pipeline = "|cat -n 1>&2";
+ char cmd[strlen(addr2line) + strlen(pipeline) +
+ /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+ n * (((sizeof(void *)) * 2) + 1) +
+ /* Null terminator: */
+ 1];
+ char *c;
+
+ n = backtrace(stack, n);
+ c = &cmd[0];
+ c += sprintf(c, "%s", addr2line);
+ /*
+ * Skip the first 3 frames: backtrace, test_dump_stack, and
+ * test_assert. We hope that backtrace isn't inlined and the other two
+ * we've declared noinline.
+ */
+ for (i = 2; i < n; i++)
+ c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+ c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+ system(cmd);
+#pragma GCC diagnostic pop
+}
+
+int test_printk(const char *fmt, ...)
+{
+ va_list ap;
+ int r;
+ FILE *fp;
+
+ fp = fopen("/dev/kmsg", "w");
+ if (!fp)
+ return -1;
+
+ va_start(ap, fmt);
+ r = vfprintf(fp, fmt, ap);
+ va_end(ap);
+ if (fclose(fp))
+ r = -1;
+
+ return r;
+}
+
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+void test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!(exp)) {
+ va_start(ap, fmt);
+
+ fprintf(stderr, "==== Test Assertion Failure ====\n"
+ " %s:%u: %s\n"
+ " pid=%d tid=%d\n",
+ file, line, exp_str, getpid(), gettid());
+ test_dump_stack();
+ if (fmt) {
+ fputs(" ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputs("\n", stderr);
+ }
+ va_end(ap);
+
+ exit(254);
+ }
+
+ return;
+}
+
+/* Version of sprintf() that dynamically allocates and uses a buffer
+ * of the required size. Returns a pointer to the allocated buffer.
+ * Caller is responsible for freeing the allocated buffer.
+ */
+char *test_dyn_sprintf(const char *fmt, ...)
+{
+ int rv;
+ int len;
+ va_list ap;
+ char *buf;
+
+ /* Determine required size of buffer */
+ va_start(ap, fmt);
+ len = vsnprintf(NULL, 0, fmt, ap);
+ va_end(ap);
+ len += CONST_STRLEN("\0");
+
+ /* Allocate buffer and redo the vsnprintf, this time with
+ * a buffer that should have sufficient space.
+ */
+ buf = malloc(len);
+ TEST_ASSERT(buf != NULL, "Insufficient Memory");
+ va_start(ap, fmt);
+ rv = vsnprintf(buf, len, fmt, ap);
+ va_end(ap);
+ TEST_ASSERT(rv < len, "dyn_sprintf insufficient buffer length, "
+ "rv: %i len: %i fmt: %s", rv, len, fmt);
+
+ return buf;
+}
+
+/*
+ * Random
+ *
+ * Returns a pseudo random number in the range [0:2^32-1].
+ */
+uint32_t test_rand32(void)
+{
+ uint32_t val;
+
+ /* Use lrand48() to obtain 31 bits worth of randomness. */
+ val = lrand48();
+
+ /* Make an additional lrand48() call and merge
+ * the randomness into the most significant bits.
+ */
+ val ^= lrand48() << 1;
+
+ return val;
+}
+
+/*
+ * Random Boolean
+ *
+ * Pseudo randomly returns true or false.
+ */
+bool test_rand_bool(void)
+{
+ return test_rand32_mod(2);
+}
+
+/*
+ * Random Modulus
+ *
+ * Pseudo randomly returns unsigned integer in the range [0, mod).
+ */
+uint32_t test_rand32_mod(uint32_t mod)
+{
+ uint32_t val;
+
+ /* Obtain the random value
+ * Use lrand48() when it would produce a sufficient
+ * number of random bits, otherwise use test_rand32().
+ */
+ const uint32_t lrand48maxVal = ((uint32_t) 1 << 31) - 1;
+ val = (mod <= lrand48maxVal)
+ ? (uint32_t) lrand48() : test_rand32();
+
+ /*
+ * The contents of individual bits tend to be less than random
+ * across different seeds. For example, srand48(x) and
+ * srand48(x + n * 4) cause lrand48() to return the same sequence of
+ * least significant bits. For small mod values this can produce
+ * noticably non-random sequnces. For mod values of less than 2
+ * bytes, will use the randomness from all the bytes.
+ */
+ if (mod <= 0x10000) {
+ val = (val & 0xffff) ^ (val >> 16);
+
+ /* If mod less than a byte, can further combine down to
+ * a single byte.
+ */
+ if (mod <= 0x100)
+ val = (val & 0xff) ^ (val >> 8);
+ }
+
+ return val % mod;
+}
+
+/* Choose random choice from weights
+ *
+ * Given an array of float weights, pseudorandomly select an index
+ * corresponding to a weight. The probability that the ith index will
+ * be selected depends upon its weight -- if an index has weight W, and
+ * the sum of all weights equals D, then it has a W/D chance of
+ * being selected.
+ *
+ * If a weight equals INFINITY, then test_rand_choice guarantees that it will
+ * select that weight's corresponding index. If multiple weights equal
+ * INFINITY, then a TEST_ASSERT is triggered. NAN weights also trigger
+ * TEST_ASSERTs.
+ *
+ * Args:
+ * num - The number of weights
+ * weights - The weights for each choice. The cumulutaive sum must be
+ * greater than 0. A weight of 0.0 never gets chosen.
+ * Return:
+ * On success, returns a choice with range [0, len). TEST_ASSERTs triggered
+ * on errors.
+ */
+unsigned int test_rand_choice(unsigned int num, const float weights[])
+{
+ unsigned int i;
+ double denom;
+ double total;
+ float value;
+ unsigned int prev_non_zero;
+
+ TEST_ASSERT(num > 0, "%s Need at least one weight, "
+ "num: %u", __func__, num);
+
+ /* Calculate the denom and check validity of inputs.
+ * Weights cannot be negative. If a weight with value
+ * INFINITY is encountered, then that weight's index is
+ * immediately returned (provided that it's the only
+ * weight with that value).
+ */
+ denom = 0;
+ for (i = 0; i < num; i++) {
+ value = weights[i];
+ TEST_ASSERT(!signbit(value), "%s Encountered negative "
+ "weight, index: %u value: %g",
+ __func__, i, value);
+ TEST_ASSERT(!isnan(value), "%s Encountered NaN"
+ " weight, index: %u value: %g",
+ __func__, i, value);
+ if (isinf(value)) {
+ if (has_infinity(num - i - 1, weights + i + 1))
+ TEST_ASSERT(false, "%s weights has multiple"
+ " infinities", __func__);
+ else
+ return i;
+ }
+ denom += value;
+ }
+ TEST_ASSERT(denom > 0, "%s Cumulative weights sum must be "
+ "greater than 0, sum: %g", __func__, denom);
+
+ /* Choose the index to return. */
+ value = drand48();
+ total = 0.0;
+ prev_non_zero = 0;
+ for (i = 0; i < num; i++) {
+ if (weights[i] != 0.0)
+ prev_non_zero = i;
+ total += weights[i];
+ if (value < total / denom)
+ break;
+ }
+
+ /* If we went through the entire array without
+ * selecting an index, we might have had bad luck with floating
+ * point rounding -- if that's the case, then return the index
+ * of the highest non-zero weight.
+ */
+ return i < num ? i : prev_non_zero;
+}
+
+/* Check for INFINITY values.
+ *
+ * If the weights array has at least one INFINITY value, return true;
+ * else, return false.
+ */
+static bool has_infinity(unsigned int num, const float weights[])
+{
+ unsigned int i;
+ for (i = 0; i < num; i++) {
+ if (isinf(weights[i]))
+ return true;
+ }
+ return false;
+
+}
+void test_delay(double amt)
+{
+ struct timespec amt_ts;
+
+ amt_ts = test_double2ts(amt);
+
+ test_delay_ts(&amt_ts);
+}
+
+void test_delay_ts(const struct timespec *amt)
+{
+ int rv;
+ struct timespec start, end;
+
+ TS_VALIDATE(amt);
+
+ /* Get the time at which we started */
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ /* Calculate the time to delay until */
+ test_ts_sum(&end, &start, amt);
+
+ /* Delay until that time */
+ rv = test_delay_until(&end, 0);
+ TEST_ASSERT(rv == 0, "test_delay_ts call to test_delay_until "
+ "unexpected rv, rv: %i", rv);
+}
+
+/* test_delay_until
+ *
+ * Waits until after the time given by time or when pid is non-zero, until
+ * the process specified by pid completes.
+ * Returns:
+ * 0 - return due to time expired
+ * 1 - return due to pid process completed
+ */
+int test_delay_until(const struct timespec *end, pid_t pid)
+{
+ int rv;
+ siginfo_t status;
+ struct timespec current, remaining;
+ struct timespec poll_delta = { 0, 300000000ULL }; /* 0.3 secs */
+
+ TS_VALIDATE(end);
+
+ for (;;) {
+ /* All done if beyond end time */
+ clock_gettime(CLOCK_MONOTONIC, ¤t);
+ if (test_ts_cmp(¤t, end) >= 0)
+ break;
+
+ /* Wait the smaller of remaining or poll time */
+ /* Calculate the amount of time remaining */
+ remaining = test_ts_delta(¤t, end);
+
+ /* Reduce remaining time to the poll time, when it
+ * is greater than the poll time and there is a need
+ * to poll for process completion.
+ */
+ if ((test_ts_cmp(&remaining, &poll_delta) > 0)
+ && (pid != 0))
+ remaining = poll_delta;
+
+ /* Sleep */
+ (void) nanosleep(&remaining, NULL);
+
+ /* All done if process specified by pid exited.
+ * Note, waitid call made with WNOWAIT, so that the
+ * exit status is still available. This leaves the process
+ * as a zombie.
+ */
+ if (pid != 0) {
+ rv = waitid(P_PID, pid, &status,
+ WEXITED | WNOHANG | WNOWAIT);
+ TEST_ASSERT(rv == 0, "test_delay_until waitid failed, "
+ "rv: %i errno: %i", rv, errno);
+ if (status.si_pid == pid)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+double test_ts2double(const struct timespec *val)
+{
+ double rv;
+
+ rv = val->tv_sec;
+ rv += (double) val->tv_nsec / nsecs_per_sec;
+
+ return rv;
+}
+
+struct timespec test_double2ts(double amt)
+{
+ struct timespec rv;
+
+ rv.tv_sec = floor(amt);
+ rv.tv_nsec = (amt - rv.tv_sec) * nsecs_per_sec;
+ /* TODO: Handle cases where amt is negative */
+ while ((unsigned) rv.tv_nsec >= nsecs_per_sec) {
+ rv.tv_nsec -= nsecs_per_sec;
+ rv.tv_sec++;
+ }
+
+ return rv;
+}
+
+struct timespec test_ts_delta(const struct timespec *first,
+ const struct timespec *second)
+{
+ struct timespec rv;
+
+ TEST_ASSERT(first != NULL, " ");
+ TEST_ASSERT(second != NULL, " ");
+ TS_VALIDATE(first);
+ TS_VALIDATE(second);
+ rv.tv_sec = second->tv_sec - first->tv_sec;
+ if (second->tv_nsec >= first->tv_nsec) {
+ rv.tv_nsec = second->tv_nsec - first->tv_nsec;
+ } else {
+ rv.tv_nsec = (second->tv_nsec + nsecs_per_sec) - first->tv_nsec;
+ rv.tv_sec--;
+ }
+
+ return rv;
+}
+
+void test_ts_sum(struct timespec *sum, const struct timespec *t1,
+ const struct timespec *t2)
+{
+ struct timespec result;
+
+ TS_VALIDATE(t1);
+ TS_VALIDATE(t2);
+
+ result.tv_sec = t1->tv_sec + t2->tv_sec;
+ if ((result.tv_sec < t1->tv_sec) || (result.tv_sec < t2->tv_sec))
+ goto max;
+ result.tv_nsec = t1->tv_nsec + t2->tv_nsec;
+ if (result.tv_nsec >= nsecs_per_sec) {
+ result.tv_nsec -= nsecs_per_sec;
+ TEST_ASSERT(result.tv_nsec < nsecs_per_sec,
+ "Too many nsecs after carry adjustment, "
+ "result.tv_nsec: %li", result.tv_nsec);
+ result.tv_sec++;
+ if (result.tv_sec <= 0)
+ goto max;
+ }
+
+ sum->tv_sec = result.tv_sec;
+ sum->tv_nsec = result.tv_nsec;
+ return;
+
+max:
+ sum->tv_sec = LONG_MAX;
+ sum->tv_nsec = nsecs_per_sec - 1;
+ return;
+}
+
+void test_ts_minus(struct timespec *minus, const struct timespec *t1,
+ const struct timespec *t2)
+{
+ struct timespec result;
+
+ TS_VALIDATE(t1);
+ TS_VALIDATE(t2);
+
+ /* So far the test_ts_* functions only support positive time. For
+ * now, fail cases where the subtraction would produce a negative
+ * result.
+ */
+ TEST_ASSERT(test_ts_cmp(t1, t2) >= 0, "t1 < t2,\n"
+ " t1->tv_sec: %lu t1->tv_nsec: %lu\n"
+ " t2->tv_sec: %lu t2->tv_nsec: %lu\n",
+ t1->tv_sec, t1->tv_nsec, t2->tv_sec, t2->tv_nsec);
+
+ result.tv_sec = t1->tv_sec - t2->tv_sec;
+ result.tv_nsec = t1->tv_nsec - t2->tv_nsec;
+ if (result.tv_nsec < 0) {
+ result.tv_nsec += nsecs_per_sec;
+ result.tv_sec -= 1;
+ TEST_ASSERT((result.tv_nsec >= 0)
+ && (result.tv_nsec < nsecs_per_sec),
+ "tv_nsec still negative, tv_sec: %lu tv_nsec: %lu",
+ result.tv_sec, result.tv_nsec);
+ }
+ TEST_ASSERT((result.tv_nsec >= 0)
+ && (result.tv_nsec < nsecs_per_sec),
+ "tv_nsec negative, tv_sec: %lu tv_nsec: %lu",
+ result.tv_sec, result.tv_nsec);
+
+ minus->tv_sec = result.tv_sec;
+ minus->tv_nsec = result.tv_nsec;
+
+ return;
+}
+
+int test_ts_cmp(const struct timespec *t1, const struct timespec *t2)
+{
+ TS_VALIDATE(t1);
+ TS_VALIDATE(t2);
+
+ if (t1->tv_sec < t2->tv_sec)
+ return -1;
+ if (t1->tv_sec > t2->tv_sec)
+ return 1;
+
+ if (t1->tv_nsec < t2->tv_nsec)
+ return -1;
+ if (t1->tv_nsec > t2->tv_nsec)
+ return 1;
+
+ return 0;
+}
+
+char *test_debugfs_mnt_point(void)
+{
+ FILE *fp;
+ char buf[200];
+ char *chptr;
+ char *device, *mnt_path, *fs_type;
+
+ /* Determine debugfs mount point */
+ fp = fopen(MOUNTS_PATH, "r");
+ TEST_ASSERT(fp != NULL, "test_debugfs_mnt_point error opening %s, "
+ "errno: %i", MOUNTS_PATH, errno);
+ while (fgets(buf, ARRAY_SIZE(buf), fp) != NULL) {
+ TEST_ASSERT(strlen(buf) < (ARRAY_SIZE(buf) - 1),
+ "test_debugfs_mnt_point line from %s too long,\n"
+ " line: %s", MOUNTS_PATH, buf);
+
+ /* If present remove trailing newline */
+ if ((strlen(buf) > 0) && (buf[strlen(buf) - 1] == '\n'))
+ buf[strlen(buf) - 1] = '\0';
+
+ /* Parse mount line
+ * The beginning of each line expected to be of the form:
+ *
+ * device mount_path fs_type
+ *
+ * The fs_type may be the last field or the may be additional
+ * space separated fields beyond fs_type.
+ */
+ device = buf;
+ mnt_path = strchr(device, ' ');
+ TEST_ASSERT(mnt_path != NULL, "test_debugfs_mnt_point "
+ "mount path parse error,\n"
+ " line: %s", buf);
+ mnt_path++;
+
+ fs_type = strchr(mnt_path, ' ');
+ TEST_ASSERT(fs_type != NULL, "test_debugfs_mnt_point "
+ "fs type parse error,\n"
+ " line: %s", buf);
+ fs_type++;
+ chptr = strchr(fs_type, ' ');
+
+ TEST_ASSERT((mnt_path - device) > 1, "test_debugfs_mnt_point "
+ "device too short,\n"
+ " line: %s", buf);
+ TEST_ASSERT((fs_type - mnt_path) > 1, "test_debugfs_mnt_point "
+ "mnt_path too short,\n"
+ " line: %s", buf);
+ TEST_ASSERT(((chptr == NULL) && (strlen(fs_type) > 0))
+ || (chptr - fs_type) > 1, "test_debugfs_mnt_point "
+ "fs_type too short,\n"
+ " line: %s", buf);
+
+ *(mnt_path - 1) = '\0';
+ *(fs_type - 1) = '\0';
+ if (chptr != NULL)
+ *chptr = '\0';
+
+ /* Skip all but debugfs filesystem type */
+ if (strcmp(DEBUGFS_TYPE, fs_type) != 0)
+ continue;
+
+ /* Line describing debugfs found */
+ fclose(fp);
+ return test_dyn_sprintf("%s/", mnt_path);
+ }
+ TEST_ASSERT(feof(fp), "test_debugfs_mnt_point error reading from %s",
+ MOUNTS_PATH);
+
+ fclose(fp);
+ return NULL;
+}
+
+struct known_sig_code {
+ int val;
+ const char *name;
+} known_sig_code[] = {
+ {CLD_EXITED, "EXITED"},
+ {CLD_KILLED, "KILLED"},
+ {CLD_DUMPED, "DUMPED"},
+ {CLD_TRAPPED, "TRAPPED"},
+ {CLD_STOPPED, "STOPPED"},
+ {CLD_CONTINUED, "CONTINUED"},
+};
+
+struct known_sig_status {
+ int val;
+ const char *name;
+} known_sig_status[] = {
+ {SIGHUP, "SIGHUP"},
+ {SIGINT, "SIGINT"},
+ {SIGQUIT, "SIGQUIT"},
+ {SIGILL, "SIGILL"},
+ {SIGTRAP, "SIGTRAP"},
+ {SIGBUS, "SIGBUS"},
+ {SIGFPE, "SIGFPE"},
+ {SIGKILL, "SIGKILL"},
+ {SIGSEGV, "SIGSEGV"},
+ {SIGTERM, "SIGTERM"},
+};
+
+void test_dump_siginfo(FILE *file, siginfo_t *sig)
+{
+ int code = sig->si_code;
+ int status = sig->si_status;
+ struct known_sig_code *codep;
+ struct known_sig_status *statusp;
+
+ /* Display si_code */
+ fprintf(file, " si_code: %u", code);
+ for (codep = known_sig_code; codep < known_sig_code
+ + ARRAY_SIZE(known_sig_code); codep++) {
+ if (code == codep->val)
+ break;
+ }
+ if (codep < (known_sig_code + ARRAY_SIZE(known_sig_code)))
+ fprintf(file, " (%s)", codep->name);
+
+ /* Display si_status */
+ fprintf(file, " si_status: %u", status);
+ if ((code == CLD_KILLED) || (code == CLD_DUMPED)) {
+ for (statusp = known_sig_status; statusp <
+ known_sig_status + ARRAY_SIZE(known_sig_status);
+ statusp++) {
+ if (status == statusp->val)
+ break;
+ }
+ if (statusp < known_sig_status + ARRAY_SIZE(known_sig_status))
+ fprintf(file, " (%s)", statusp->name);
+ }
+ fputs("\n", file);
+
+ /* Display PID */
+ fprintf(file, " pid: %i\n", sig->si_pid);
+}
+
+uint64_t test_tsc_freq(int cpu)
+{
+ int rv;
+ FILE *f;
+ char *path;
+ long freq_khz;
+
+ path = test_dyn_sprintf("/sys/devices/system/cpu/cpu%d/tsc_freq_khz",
+ cpu);
+ f = fopen(path, "r");
+ TEST_ASSERT(f != NULL, "test_tsc_freq failed to open %s, errno: %i",
+ path, errno);
+
+ rv = fscanf(f, "%ld\n", &freq_khz);
+ TEST_ASSERT(rv == 1, "test_tsc_freq fscanf failed, rv: %i "
+ "ferror(f): %i errno: %i", rv, ferror(f), errno);
+
+ fclose(f);
+ free(path);
+
+ return freq_khz * 1000;
+}
+
+/*
+ * Hex Dump
+ *
+ * Displays in hex the contents of the memory starting at the location
+ * pointed to by buf, for the number of bytes given by size.
+ *
+ * ARGS:
+ * stream - File stream to display the output to.
+ * buf - Starting address of memory to be dumped.
+ * size - Number of bytes to be dumped.
+ * addr_start - Address shown for first byte dumped.
+ * indent - Number of spaces prefixed to start of each line.
+ */
+#if CHAR_BIT != 8
+#error "test_xdump impementation depends on 8 bits per byte."
+#endif
+void test_xdump(FILE *stream, const void *buf, size_t size,
+ intptr_t addr_start, uint8_t indent)
+{
+ int rv;
+ const unsigned char *ptr = buf, *start = buf;
+ size_t num = size;
+ char *linep;
+
+ /* Constants for various amounts within a single line of ouput.
+ * Each line has the following format:
+ *
+ * aaaaaa: xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx xx
+ *
+ * Where "aaaaaa" is the address and each " xx" is the dump of
+ * a single byte. Up to 16 bytes are dumped per line, which is
+ * given by the value of bytes_per_line. Some of these constants
+ * use a cast to char, such as in the expression sizeof((char) ':').
+ * The cast to char is needed, because character constants are auto
+ * promoted to int. The above expression could have been specified
+ * as sizeof(char), but the ':' is used to express what character
+ * in the line output this expression is for.
+ */
+ const unsigned int bytes_per_line = 16;
+ const unsigned int hex_digits_per_byte = 2;
+ const unsigned int addr_max_char = sizeof(uintptr_t)
+ * hex_digits_per_byte;
+ const unsigned int max_line = addr_max_char
+ + sizeof((char) ':')
+ + (bytes_per_line * ((sizeof((char) ' '))
+ + hex_digits_per_byte))
+ + sizeof((char) '\0');
+ char line[max_line];
+
+ linep = line;
+ while (num) {
+ if (((ptr - start) % bytes_per_line) == 0) {
+ if (linep != line) {
+ fprintf(stream, "%*s%s\n",
+ indent, "", line);
+ }
+ linep = line;
+ rv = snprintf(linep, ARRAY_SIZE(line) - (linep - line),
+ "%0*llx:", addr_max_char,
+ (long long) (ptr - start) + addr_start);
+ linep += rv;
+ }
+
+ /* Check that there is at least room for 4
+ * more characters. The 4 characters being
+ * a space, 2 hex digits and the terminating
+ * '\0'.
+ */
+ assert((ARRAY_SIZE(line) - 4) >= (linep - line));
+ rv = snprintf(linep, ARRAY_SIZE(line) - (linep - line),
+ " %02x", *ptr++);
+ linep += rv;
+ num--;
+ }
+
+ if (linep != line)
+ fprintf(stream, "%*s%s\n", indent, "", line);
+}
+
+/* Read Config String
+ *
+ * Args:
+ * name - name of configuration variable
+ *
+ * Returns:
+ * Pointer to dynamically allocated string, with setting of the
+ * configuration variable. For error conditions specified below,
+ * NULL is returned, with errno indicating which condition occurred.
+ * All other errors (e.g. insufficient memory) cause a TEST_ASSERT failure.
+
+ * Errors:
+ * ESRCH - No such configuration variable
+ * ENOENT - Configuration variable exists, but is not set.
+ *
+ * Reads the kernel configuration via /proc/config.gz and returns information
+ * about the configuration variable specified by name. Uncompressed lines
+ * from /proc/config.gz are expected to be of the following forms:
+ *
+ * # comment text
+ * # CONFIG_FOO is not set
+ * CONFIG_FOO=string
+ *
+ * Comment lines begin with '#' and don't end with "is not set".
+ * Lines starting with '#' and ending with "is not set" describe
+ * configuration variables no setting. While the final form describes
+ * configuration variables with a setting. The primary purpose of the
+ * routine is to locate a configuration variable with a setting and
+ * return a dynamically allocated string that contains the setting. The
+ * not set case is also handled, by noticing the variable and returning
+ * NULL with errno equal to ENOENT.
+ */
+char *test_config_str(const char *name)
+{
+ int status;
+ FILE *stream;
+ char *line = NULL;
+ char *rv_str = NULL;
+ size_t line_len = 0;
+ ssize_t getline_rv;
+ static const char *not_set_str = " is not set";
+ size_t not_set_len = strlen(not_set_str);
+ enum completion_reason {
+ NOT_FOUND,
+ NOT_SET,
+ SETTING_FOUND,
+ } completion_reason = NOT_FOUND;
+
+ stream = popen("/bin/gunzip -c /proc/config.gz", "r");
+ TEST_ASSERT(stream != NULL, "test_config_str popen failed, "
+ "errno: %i", errno);
+
+ while ((getline_rv = getline(&line, &line_len, stream)) != -1) {
+ /* If present, remove trailing newline */
+ if ((getline_rv > 0) && (line[getline_rv - 1] == '\n'))
+ line[getline_rv - 1] = '\0';
+
+ /* Skip blank lines */
+ if (strlen(line) == 0)
+ continue;
+
+ /* Skip comment lines that don't end with not set. */
+ if ((line[0] == '#') && ((strlen(line) < not_set_len)
+ || (strcmp(line + (strlen(line) - not_set_len),
+ not_set_str) != 0)))
+ continue;
+
+ /* Configuration setting or not set line? */
+ if (line[0] != '#') {
+ /* Configuration setting */
+ /* Lines with a configuration setting should
+ * start with "CONFIG_"
+ */
+ TEST_ASSERT(strncmp(line, "CONFIG_",
+ strlen("CONFIG_")) == 0, "test_config_str "
+ "test_config_str \"CONFIG_\" expected,\n"
+ " line: %s", line);
+
+ /* Skip unless this line describes the configuration
+ * variable specified by name.
+ */
+ if ((strncmp(line, name, strlen(name)) != 0)
+ || (*(line + strlen(name)) != '='))
+ continue;
+
+ completion_reason = SETTING_FOUND;
+ rv_str = strdup(line + strlen(name) + 1);
+ TEST_ASSERT(rv_str != NULL, "Insufficient Memory");
+ break;
+ } else {
+ /* Not set line */
+ /* Not set lines should at least start with
+ * "# CONFIG_"
+ */
+ TEST_ASSERT(strncmp(line, "# CONFIG_",
+ strlen("# CONFIG_")) == 0, "test_config_str "
+ "test_config_str \"# CONFIG_\" expected,\n"
+ " line: %s", line);
+
+ /* Skip unless this line describes the configuration
+ * variable specified by name.
+ */
+ if ((strncmp(line + 2, name, strlen(name)) != 0)
+ || (*(line + 2 + strlen(name)) != ' '))
+ continue;
+
+ completion_reason = NOT_SET;
+ break;
+ }
+ }
+
+ /* If needed, read rest of stream. Could just close the stream,
+ * but it is implementation and timing dependent whether the
+ * gunzip will do exit(0), exit(1), or end due to a SIGPIPE.
+ * Easier to just read the rest of the input and treat everything
+ * but exit(0) as an error.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+ while (!feof(stream) && !ferror(stream))
+ getline(&line, &line_len, stream);
+#pragma GCC diagnostic pop
+ TEST_ASSERT(!ferror(stream), "test_config_str stream error, "
+ "errno: %i", errno);
+
+ status = pclose(stream);
+ TEST_ASSERT(WIFEXITED(status) && (WEXITSTATUS(status) == 0),
+ "test_config_str unexpected exit status,\n"
+ " status: 0x%x\n"
+ " WIFEXITED: %i\n"
+ " WEXITSTATUS: %i\n"
+ " WIFSIGNALED: %i\n"
+ " WTERMSIG: %i",
+ status, WIFEXITED(status), WEXITSTATUS(status),
+ WIFSIGNALED(status), WTERMSIG(status));
+
+ switch (completion_reason) {
+ case NOT_FOUND:
+ errno = ESRCH;
+ break;
+
+ case NOT_SET:
+ errno = ENOENT;
+ break;
+
+ case SETTING_FOUND:
+ break;
+
+ default:
+ TEST_ASSERT(false, "test_config_str unknown completion "
+ "reason, completion_reason: %i", completion_reason);
+ }
+
+ return rv_str;
+}
+
+/* Prototypes for syscalls that don't have a prototype within the
+ * system headers.
+ */
+int capset(cap_user_header_t header, const cap_user_data_t data);
+int capget(cap_user_header_t header, cap_user_data_t data);
+
+/* Capability Get
+ *
+ * Reads the current capability set for the process specified by pid and
+ * returns them in a dynamically allocated area pointed to by cap.
+ *
+ * Args:
+ * pid - Process ID
+ * cap - Pointer to the capability set pointer.
+ *
+ * Returns:
+ * Zero on success, -1 on error.
+ *
+ * Errors:
+ * EFAULT - Bad memory address
+ * ESRCH - No such process
+ */
+int test_cap_get(pid_t pid, test_cap_t *cap)
+{
+ int rv;
+
+ struct __user_cap_header_struct header = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ .pid = pid,
+ };
+
+ TEST_ASSERT(cap != NULL, "test_cap_get cap NULL pointer");
+
+ if (*cap == NULL)
+ free(*cap);
+
+ *cap = calloc(_LINUX_CAPABILITY_U32S_3,
+ sizeof(struct __user_cap_data_struct));
+ TEST_ASSERT(*cap != NULL, "Insufficient Memory");
+
+ rv = capget(&header, *cap);
+
+ return rv;
+}
+
+/* Capability Set
+ *
+ * Set the current capability set, for the process given by pid, to the
+ * capabilities pointed to by cap.
+ *
+ * Args:
+ * pid - Process ID
+ * cap - Pointer to the capability set pointer.
+ *
+ * Returns:
+ * Zero on success, -1 on error.
+ *
+ * Errors:
+ * EFAULT - Bad memory address
+ * EPERM - Attempt to add a capability to the permitted set, or to
+ * set a capability in the effective or inheritable sets that
+ * is not in the permitted set.
+ * ESRCH - No such process
+ */
+int test_cap_set(pid_t pid, const test_cap_t *cap)
+{
+ int rv;
+
+ TEST_ASSERT(cap != NULL, "test_cap_get cap NULL pointer");
+ TEST_ASSERT((*cap) != NULL, "test_cap_get *cap NULL pointer");
+
+ struct __user_cap_header_struct header = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ .pid = pid,
+ };
+
+ rv = capset(&header, *cap);
+
+ return rv;
+}
+
+/* Capability Flag Fetch
+ *
+ * Returns the current setting of a specified capability trait.
+ *
+ * Args:
+ * cap - Pointer to capability set
+ * group - Which group of capabilities, effective, permitted, or
+ * inheritable.
+ * trait - Index of particular trait. Available indexes are specified
+ * within <linux/capability.h> as CAP_ defines.
+ *
+ * Returns:
+ * Setting of specified trait, TEST_ASSERT used for detected errors
+ * (e.g. invalid group).
+ */
+bool test_cap_flag_fetch(const test_cap_t *cap, test_cap_group_t group,
+ unsigned int trait)
+{
+ uint32_t *valp;
+
+ TEST_ASSERT(CAP_TO_INDEX(trait) < _LINUX_CAPABILITY_U32S_3,
+ "test_cap_flag_fetch trait out of range, trait: %u", trait);
+
+ switch (group) {
+ case TEST_CAP_EFFECTIVE:
+ valp = &(*cap + CAP_TO_INDEX(trait))->effective;
+ break;
+
+ case TEST_CAP_PERMITTED:
+ valp = &(*cap + CAP_TO_INDEX(trait))->permitted;
+ break;
+
+ case TEST_CAP_INHERITABLE:
+ valp = &(*cap + CAP_TO_INDEX(trait))->inheritable;
+ break;
+
+ default:
+ TEST_ASSERT(false, "test_cap_flag_fetch unknown group, "
+ "group: 0x%x", group);
+ /* Not Reached */
+ valp = NULL; /* Silences compiler warning */
+ }
+
+ return (*valp & CAP_TO_MASK(trait)) != 0;
+}
+
+/* Capability Flag Assign
+ *
+ * Set the current setting of a specified capability trait to the value
+ * given by rval. Note, cap points to an in memory copy of a capability
+ * set, which allows unprivileged code to manipulate the capability set.
+ * Although unprivileged code will obtain an error if they attempt to use
+ * test_cap_set() to make the in-memory copy with a disallowed change
+ * effective.
+ *
+ * Args:
+ * cap - Pointer to capability set
+ * group - Which group of capabilities, effective, permitted, or
+ * inheritable.
+ * trait - Index of particular trait. Available indexes are specified
+ * within <linux/capability.h> as CAP_ defines.
+ * rval - New setting
+ *
+ * Returns:
+ * Nothing, TEST_ASSERT used on detected failure (e.g. invalid group).
+ */
+void test_cap_flag_assign(test_cap_t *cap, test_cap_group_t group,
+ unsigned int trait, bool rval)
+{
+ uint32_t *valp;
+
+ switch (group) {
+ case TEST_CAP_EFFECTIVE:
+ valp = &(*cap + CAP_TO_INDEX(trait))->effective;
+ break;
+
+ case TEST_CAP_PERMITTED:
+ valp = &(*cap + CAP_TO_INDEX(trait))->permitted;
+ break;
+
+ case TEST_CAP_INHERITABLE:
+ valp = &(*cap + CAP_TO_INDEX(trait))->inheritable;
+ break;
+
+ default:
+ TEST_ASSERT(false, "test_cap_flag_assign unknown group, "
+ "group: 0x%x", group);
+ /* Not Reached */
+ valp = NULL; /* Silences compiler warning */
+ }
+
+ *valp &= ~CAP_TO_MASK(trait);
+ if (rval)
+ *valp |= CAP_TO_MASK(trait);
+}
+
+/* Significant Bits Floating-Point Comparison
+ *
+ * Determine the number of significant bits between given expected
+ * and actual values. The number of significant bits is
+ * determined by the following factors:
+ *
+ * 1. Number of matching significant bits within the mantissa after
+ * they have been adjusted so that the exponent of the expected
+ * and actual values would be the same.
+ *
+ * 2. The difference between the bits less significant than the
+ * matching significant bits. The amount can be any value
+ * from 0.5 to the number of less significant bits - 0.5. The
+ * case of 0.5 occurs with the maximum possible difference. While
+ * a value of the number of less significant bits - 0.5 occurs
+ * for a difference in the less significant bits equal to 1.0.
+ *
+ * 3. Number of leading zeros for certain subnormal cases.
+ * A subnormal value is a non-zero value that is so small that
+ * the value can't be normalized. The number of leading zeros
+ * are counted for cases where both the actual and difference
+ * between the expected and actual values are subnormal.
+ *
+ * Args:
+ * expected - expected value for the comparison
+ * actual - actual value for the comparison
+ *
+ * Returns:
+ * Number of significant bits by which expected and actual match.
+ * Zero if expected or actual is equal to infinite, -infinite, or NaN.
+ */
+float test_sgniff(long double expected, float actual)
+{
+ return sgnif(expected, actual, FLT_MANT_DIG, FLT_MIN);
+}
+
+float test_sgnif(long double expected, double actual)
+{
+ return sgnif(expected, actual, DBL_MANT_DIG, DBL_MIN);
+}
+
+float test_sgnifl(long double expected, long double actual)
+{
+ return sgnif(expected, actual, LDBL_MANT_DIG, LDBL_MIN);
+}
+
+static float sgnif(long double expected, long double actual,
+ unsigned int mant_dig, long double min_normalized)
+{
+ long double diff, scaled;
+ float matched_bits;
+ unsigned int matched_subnormal = 0;
+ long double tmp_val;
+
+ /* Return 0 for any case where expected or actual is INF, -INF,
+ * or NaN. A design choice was made to always return 0 for
+ * these cases, even for cases where expected and actual are
+ * equal. Design choice was based on the risk of expected
+ * accidentally being calculated as INF, -INF, or NaN, for the
+ * same reason that an incorrect program calculates the same
+ * value. Instead of potentially not noticing an improperly
+ * calculated expected value, it is left to the caller to handle
+ * cases where INF, -INF, or NaN is expected.
+ */
+ if (isinf(expected) || isinf(actual)
+ || isnan(expected) || isnan(actual))
+ return 0.0;
+
+ diff = fabsl(expected - actual);
+ if (diff == 0.0)
+ return mant_dig;
+ scaled = fabsl(expected) / diff;
+ if (scaled == 0.0)
+ return mant_dig;
+ matched_bits = log2l(scaled);
+
+ /* Count leading zeros as matches in cases where actual
+ * is subnormal (!0.0 yet to small to normalize) and difference
+ * is less than min normalized.
+ */
+ if ((fabsl(actual) != 0.0) && (fabsl(actual) < min_normalized)) {
+ if (matched_bits < 0.0)
+ matched_bits = 0.0;
+ for (tmp_val = diff, matched_subnormal = 0;
+ fabsl(tmp_val) < min_normalized; tmp_val *= FLT_RADIX) {
+ matched_subnormal++;
+ TEST_ASSERT(matched_subnormal <= mant_dig,
+ "%s subnormal with leading zeros > mant_dig,\n"
+ " matched_subnormal: %u\n mant_dig: %u\n"
+ " actual: %Lg tmp_val: %Lg",
+ __func__, matched_subnormal, mant_dig,
+ actual, tmp_val);
+ }
+ matched_bits += matched_subnormal;
+ }
+
+ /* Bound the number of matched bits to:
+ *
+ * [0.0, mant_dig]
+ *
+ * A negative number of matched bits occurs when the sign of
+ * expected and actual differ. When the sign doesn't match
+ * consider the number of matched bits to be 0.0.
+ *
+ * Due to rounding error the calculated number of matched bits
+ * can be slightly greater than the number of available bits.
+ * For such cases the number of matched bits is bound to the
+ * number of available bits.
+ */
+ if (signbit(matched_bits))
+ matched_bits = 0.0;
+
+ if (matched_bits > mant_dig)
+ matched_bits = mant_dig;
+
+ return matched_bits;
+}
+
+/* Fetch memory mapping information
+ *
+ * Search a user-specified process' mappings for an
+ * address. If the search is successful, then the test_pg_info
+ * struct is populated with the mapping's starting address,
+ * ending address, size, protections, and shared status.
+ *
+ * Note: On success, the retrieved contents will be self-consistent,
+ * but they could describe the mapping's contents from the time
+ * this routine was entered to the time it returns. On failure,
+ * we're only guaranteed that at some time between entry and
+ * completion of this routine, a mapping with the requested address
+ * did not exist. No atomicity is guaranteed between multiple calls.
+ * Note: Caller must ensure that the process with the specified pid
+ * stays alive while this function executes; otherwise, a TEST_ASSERT
+ * may be raised.
+ *
+ * Args:
+ * pid - The process whose mappings we'll search. 0 defaults to the
+ * the current process.
+ * addr - The address for which to retrieve a mapping.
+ *
+ * Output:
+ * info - On success, a structure populated with information
+ * about the mapping containing the searched-for address.
+ *
+ * Return:
+ * Zero on success; on error, -1 and errno is set
+ *
+ * Errors:
+ * ENOENT - addr not mapped
+ */
+int test_pg_info(pid_t pid, uint64_t addr, struct test_pg_info *info)
+{
+ char *path;
+ int tmp_ret;
+ char *buf;
+ int rv;
+
+ /* Construct the path to the proc maps file.
+ * If pid is zero, then default to the current process' maps file.
+ */
+ if (pid == 0) {
+ path = test_dyn_sprintf("/proc/self/maps");
+ } else {
+ /* Validate the pid before setting the path:
+ * Kill will set errno to ESRCH if the requested
+ * pid does not exist. Invoking kill with signal number
+ * 0 doesn't actually send a signal to the process;
+ * the primary reason to do this is to check if a
+ * pid exists.
+ *
+ * A process could, however, exit after it successfully
+ * receives the signal 0 but before this routine completes.
+ * Such an exit might render us unable to read the maps file
+ * and may trigger a less informative TEST_ASSERT. It's
+ * the caller's responsibility to ensure that the process
+ * exists throughout the entirety of this routine's lifetime.
+ * Our check here is merely a cautious one.
+ */
+ tmp_ret = kill(pid, 0);
+ TEST_ASSERT(tmp_ret == 0, "%s requested pid "
+ "does not exist, pid: %d errno: %d", __func__,
+ pid, errno);
+ path = test_dyn_sprintf("/proc/%u/maps", pid);
+ }
+
+ /* Read the file in. Though reads might be subject to races, each
+ * line in the fetched buffer should be self-consistent.
+ */
+ tmp_ret = test_seq_read(path, &buf, NULL);
+ TEST_ASSERT(tmp_ret == TEST_UTIL_SUCCESS, "%s test read (seq)"
+ "failure, path: %s ", __func__, path);
+
+ /* Retrieve the mapping. */
+ rv = test_pg_info_map(buf, addr, info);
+
+ /* Perform necessary clean-up. */
+ free(path);
+ free(buf);
+ return rv;
+}
+
+/* Fetch memory mapping information from buffer
+ *
+ * This function behaves similarly to test_pg_info: Given an address,
+ * it retrieves information about a mapping containing that address.
+ * Unlike test_pg_info, this function queries a user-supplied map buffer
+ * for a suitable mapping.
+ *
+ * With test_pg_info_map and a snapshot of a proc maps file,
+ * clients can safely perform multiple queries of a process'
+ * mappings, even when the process' mappings are actively changing.
+ *
+ * A typical usage pattern might involve:
+ * 1) taking a snapshot of a process-specific maps file with
+ * test_read(true, ...), and
+ * 2) searching that snapshot with test_pg_info_map
+ * where 2) can be repeated as many times as desired.
+ *
+ * Args:
+ * map - A null-terminated string, formatted as a /proc/\*\/maps file,
+ * to be queried for the address.
+ * addr - The address for which to retrieve a mapping.
+ *
+ * Output:
+ * info - On success, a structure populated with information
+ * about the mapping containing the searched-for address.
+ *
+ * Return:
+ * Zero on success; on error, -1 and errno is set
+ *
+ * Errors:
+ * ENOENT - addr not mapped
+ */
+
+int test_pg_info_map(const char *map, uint64_t addr, struct test_pg_info *info)
+{
+ int tmp_ret;
+ int rv;
+ const char *rest;
+ size_t max_nibbles;
+ uint64_t curr_start;
+ uint64_t curr_end;
+ uint64_t inclusive_end;
+ char perm[CONST_STRLEN("rwxp")];
+ int prot;
+ bool shared;
+
+ /* Search for a mapping that includes addr. Populate info if such a
+ * mapping is found.
+ *
+ * Each line of the /proc/[pid]/maps file has the following format:
+ *
+ * [start_address]-[end_address + 1] [perms] [offset] \
+ * [dev] [inode] [pathname]
+ *
+ * test_pg_info returns information from the first three fields.
+ * For more information on the proc maps file, see man proc(5).
+ */
+ rest = map;
+ while (true) {
+ /* Parse the line. */
+ tmp_ret = sscanf(rest, "%" PRIx64 "-%" PRIx64 " %4s %*[^\n]",
+ &curr_start, &curr_end, perm);
+ if (tmp_ret == EOF) {
+ rv = -1;
+ errno = ENOENT;
+ goto done;
+ }
+ if (tmp_ret != 3) {
+ TEST_ASSERT(false,
+ "%s Parsing error, line: %.*s ""rv: %d",
+ __func__, (int)line_len(rest), rest, tmp_ret);
+ }
+
+ /* Convert the exclusive end address to an inclusive one.
+ * This is typically done by subtracting 1 from the former;
+ * however, if the exclusive end address is 0, then we need
+ * to determine the width of desired inclusive address
+ * in order to appropriately wrap to the address maximum.
+ */
+ if (curr_end != 0) {
+ inclusive_end = curr_end - 1;
+ } else {
+ max_nibbles = proc_maps_max_nibbles(map);
+
+ /* It's undefined to left-shift a value by a number
+ * greater than its data type's bit-width.
+ */
+ if (max_nibbles == TEST_PG_MAX_NIBBLES)
+ inclusive_end = ((uint64_t) 0) - 1;
+ else
+ inclusive_end = ((uint64_t)1 <<
+ (max_nibbles * 4)) - 1;
+ }
+ TEST_ASSERT((curr_start % getpagesize()) == 0,
+ "%s start address should be divisible by page size, "
+ "curr_start: %" PRIx64 " page_size: %d",
+ __func__, curr_start, getpagesize());
+ TEST_ASSERT((inclusive_end % getpagesize())
+ == (getpagesize() - 1),
+ "%s end address does not lie before a "
+ " page boundary, inclusive_end: %" PRIx64
+ " page_size: %d",
+ __func__, inclusive_end, getpagesize());
+ TEST_ASSERT(inclusive_end > curr_start, "%s end addr not "
+ "less than start addr, inclusive_end: %" PRIx64
+ " curr_start: %" PRIx64, __func__, inclusive_end,
+ curr_start);
+
+ /* If we've found a suitable mapping, save its state. */
+ if (curr_start <= addr && addr <= inclusive_end) {
+ parse_perm(perm, &prot, &shared);
+ info->start = curr_start;
+ info->end = inclusive_end;
+ info->size = (size_t)(inclusive_end - curr_start + 1);
+ info->prot = prot;
+ info->shared = shared;
+ rv = 0;
+ goto done;
+ }
+
+ /* Advance to the next line. */
+ rest = strchr(rest, '\n');
+ if (rest == NULL) {
+ rv = -1;
+ errno = ENOENT;
+ goto done;
+ }
+ rest++;
+ }
+done:
+ TEST_ASSERT(((rv == 0) || (rv == -1)) &&
+ ((rv == 0) || (errno != 0)),
+ "%s Invalid completion of function, "
+ "rv: %d errno: %d", __func__, rv, errno);
+ return rv;
+}
+
+/* Set prot to carry the flags indicated in the character array
+ * of permissions. Permissions are represented as three contiguous
+ * characters, with a letter for a granted permission and a dash for a
+ * withheld permission, in the order "read, write, execute." An
+ * 's' or a 'p' is appended if the mapping is shared or private,
+ * respectively.
+ *
+ * As an example, private, RO memory would be represented as: r--p
+ */
+static void parse_perm(const char *perm, int *prot, bool *shared)
+{
+ *prot = 0;
+ *shared = false;
+
+ if (perm[0] == 'r')
+ *prot |= PROT_READ;
+ if (perm[1] == 'w')
+ *prot |= PROT_WRITE;
+ if (perm[2] == 'x')
+ *prot |= PROT_EXEC;
+ if (perm[3] == 's')
+ *shared = true;
+ else
+ *shared = false;
+
+ if (*prot == 0)
+ prot = PROT_NONE;
+}
+
+/* Correctness-Testable Memory Allocation
+ *
+ * Provides the user with size bytes of memory. Users can specify flags
+ * to which they want the memory to conform; these flags provide
+ * safeguards that allow test_malloc to validate the integrity of the memory
+ * it allocates. If no alignment is requested, then test_malloc guarantees
+ * that the returned address will be aligned by the size of the largest
+ * fundamental type that could fit within the structure.
+ * test_malloc does not guarantee that the memory will be aligned
+ * by higher powers of 2; as such, if the alloc size is less than
+ * __BIGGEST_ALIGNMENT, then test_malloc may produce alignments less than
+ * __BIGGEST_ALIGNMENT__
+ *
+ * Supported flags include:
+ * TEST_MALLOC_PROT_BEFORE: Insert a guard page with protection PROT_NONE
+ * before the user paylod.
+ * TEST_MALLOC_PROT_AFTER: Insert a guard page with protection PROT_NONE
+ * after the user payload.
+ *
+ * Note: If users request a size that's an integer multiple of the
+ * page size, then they may request both a before and after guard page.
+ * Else, users must either request exactly one or zero guard pages.
+ *
+ * TEST_MALLOC_ALIGN: Align the user payload to the power-of-2 number
+ * of bytes specified. When possible, the
+ * returned address won't be aligned by higher
+ * powers of two.
+ *
+ * TEST_MALLOC_MMAP_FD: Mmap for the user area an fd provided in the
+ * optional list. Requires PROT_BEFORE, _AFTER,
+ * and _ALIGN to be set. Requires a valid fd to
+ * be passed (after alignment size) in the list
+ * of optional arguments.
+ *
+ * TEST_MALLOC_MMAP_FD_OFFSET: If doing mmap of an fd, rather than mmap at
+ * offset zero use the provided offset (passed
+ * after the fd).
+ *
+ * Supported optional arguments include (must be provided in this order):
+ * size_t align_bytes: align by a power-of-two number of bytes. align_bytes
+ * must be less than both the requested size and the
+ * system's page size. Where possible, the address
+ * is not aligned to powers of 2 greater than align_bytes.
+ * If align_bytes is 0, the structure is aligned to the
+ * largest type that could fit in the structure --
+ * when possible, no larger alignments are satisfied.
+ *
+ * int fd: mmap this fd for the user area if _MMAP_FD passed as a
+ * flag. Must be valid fd of course. Must follow
+ * align_bytes.
+ *
+ * off_t offset: offset at which to mmap if _MMAP_FD and valid fd. If
+ * not provided assumed zero. Must follow fd. Must have
+ * set _MMAP_FD_OFFSET flag.
+ *
+ * Note: If a trailing guard page is requested with an alignment
+ * that would require a trailing buffer, a TEST_ASSERT
+ * is triggered.
+ *
+ * Red zones: A red zone is placed on each side of the user payload. If guard
+ * pages are present, then the red zones bleed into these pages.
+ *
+ * Args:
+ * size - The number of bytes to allocate for the user. Must be non-zero.
+ * flags - The bitvector into which flags are or'd into.
+ * ... - Optional arguments. Currently, only size_t align_bytes.
+ *
+ * Return:
+ * A pointer to the beginning of the user's payload. NULL is never returned;
+ * a TEST_ASSERT is triggered in out-of-memory conditions.
+ */
+void *test_malloc(size_t size, uint32_t flags, ...)
+{
+ size_t align_bytes;
+ int pos;
+ struct test_malloc_alloc *allocp;
+ int fd = -1;
+ off_t offset = 0;
+
+ /* Validate input. */
+ TEST_ASSERT(size != 0, "%s size must be non-zero",
+ __func__);
+ if ((flags & TEST_MALLOC_PROT_BEFORE) &&
+ (flags & TEST_MALLOC_PROT_AFTER))
+ TEST_ASSERT(size % getpagesize() == 0,
+ "%s When two guard pages "
+ "are requested, size must be a multiple of the page size, "
+ "size: %zu page_size: %d", __func__, size, getpagesize());
+ if (flags & TEST_MALLOC_MMAP_FD) {
+ TEST_ASSERT(flags & TEST_MALLOC_PROT_BEFORE,
+ "%s Set mmap fd flag but not required to mmap at "
+ "page boundary", __func__);
+ TEST_ASSERT(flags & TEST_MALLOC_PROT_AFTER,
+ "%s Set mmap fd flag but not required to mmap "
+ "ending at a page boundary", __func__);
+ TEST_ASSERT(flags & TEST_MALLOC_ALIGN,
+ "%s Set mmap fd flag but not required to mmap "
+ "with an alignment", __func__);
+ }
+
+ /* Parse the optional arguments. */
+ va_list ap;
+ va_start(ap, flags);
+ align_bytes = 0;
+ if (flags & TEST_MALLOC_ALIGN) {
+ align_bytes = va_arg(ap, size_t);
+
+ /* align_bytes must be either 0 or a power of two, and
+ * must no greater than the requested payload size
+ * and the page size.
+ */
+ TEST_ASSERT((align_bytes & (align_bytes - 1)) == 0, "%s "
+ "alignment must be 0 or a power of 2, align_bytes: %zu",
+ __func__, align_bytes);
+ TEST_ASSERT(align_bytes <= size, "%s Cannot align greater "
+ "than size bytes, align_bytes: %zu size: %zu",
+ __func__, align_bytes, size);
+ TEST_ASSERT(align_bytes <= getpagesize(),
+ "%s alignment can be no greater than "
+ "TEST_MALLOC_MAX_ALIGN, align_bytes: %zu "
+ "MAX_ALIGN: %d", __func__, align_bytes,
+ getpagesize());
+ }
+
+ if (flags & TEST_MALLOC_MMAP_FD) {
+ fd = va_arg(ap, int);
+ TEST_ASSERT((align_bytes % getpagesize()) == 0,
+ "%s When mmaping an fd must pass an alignment that "
+ "is a multiple of a page size (instead of %u)",
+ __func__, (unsigned int) align_bytes);
+ TEST_ASSERT(fd >= 0,
+ "%s Invalid fd %d passed for mmaping",
+ __func__, fd);
+ if (flags & TEST_MALLOC_MMAP_FD_OFFSET) {
+ offset = va_arg(ap, off_t);
+ TEST_ASSERT((offset % getpagesize()) == 0,
+ "%s When mmaping an fd must pass an offset "
+ " that is a multiple of a page size (instead "
+ "of %llu)", __func__,
+ (unsigned long long) offset);
+ }
+ }
+
+ if (align_bytes == 0) {
+ /* Even if the user hasn't explicitly requested an alignment,
+ * we need to ensure that the structure is properly aligned
+ * (for portability's sake).
+ *
+ * We assume that the allocation size supplied to this
+ * function accounts for compiler-added padding. When
+ * aligning, we guarantee that the structure will
+ * be aligned by the size of its greatest fundamental
+ * type, and will ensure that it is not aligned by
+ * higher powers of two. When we cannot determine
+ * the size of the greatest fundamental type in the structure,
+ * (e.g., if the allocation size were 4 bytes, then
+ * we don't know whether the structure consists of
+ * 2 16 byte members or one 32 byte members),
+ * we assume the larger of the possible
+ * types (continuing our example, the 4 byte structure
+ * would be aligned by 4).
+ *
+ * Alignments are as follows:
+ * size alignment
+ * 1 1
+ * 2 2
+ * 3* 1
+ * 4 4
+ * 5 1
+ * 6 2
+ * 7 1
+ * 8 8
+ *
+ * and so on -- the pattern continues with the
+ * size modulo __BIGGEST_ALIGNMENT__.
+ *
+ * *If a structure consisted of a uint16_t and a uint8_t,
+ * then the compiler would have padded its size to 4 --
+ * the smallest multiple of uint16_t. Thus, we know that
+ * the structure must consist of 3 uint8_ts. Similar
+ * reasoning can be applied to sizes 6 and 7. If the
+ * user changes alignment requirements and overrides the
+ * default structure packing (by, say, using #pragma pack),
+ * then he is fully responsible for generating code
+ * that accesses the structure in an alignment-agnostic
+ * manner.
+ */
+ pos = ffs(__BIGGEST_ALIGNMENT__ | size);
+ TEST_ASSERT(pos != 0, "%s No LSB set (according to ffs)"
+ ", rv: %d size: %zu biggest alignment: %d",
+ __func__, pos, size, __BIGGEST_ALIGNMENT__);
+ align_bytes = 1 << (pos - 1);
+ }
+ va_end(ap);
+
+ /* Allocate space to track this allocation. */
+ allocp = malloc(sizeof(*allocp));
+ TEST_ASSERT(allocp != NULL, "%s Insufficient memory, "
+ "requested size: %zu", __func__, sizeof(*allocp));
+
+ /* Create the allocation and return its user address. */
+ malloc_create(allocp, size, flags, align_bytes, fd, offset);
+ TEST_ASSERT(allocp->user_addr != NULL, "%s unexpected "
+ " NULL pointer after malloc_create", __func__);
+ return allocp->user_addr;
+}
+
+/* Free memory allocated with test_malloc()
+ *
+ * Given a pointer to memory allocated with test_malloc(), free it and any
+ * other memory that test_malloc() allocated to maintain it (including the alloc
+ * struct). Additionally, vet the red zones to ensure that they haven't changed.
+ *
+ * TEST_ASSERTS are triggered if the pointer is invalid (i.e., NULL pointer or
+ * not allocated by test_malloc()).
+ *
+ * Args:
+ * ptr - a pointer to memory allocated with test_malloc
+ */
+void test_malloc_free(void *addr)
+{
+ struct test_malloc_alloc *found;
+ int tmp;
+
+ TEST_ASSERT(addr != NULL, "%s invalid argument (NULL pointer)",
+ __func__);
+
+ /* Find the alloc and remove it from the list. */
+ found = malloc_query(addr, true);
+ TEST_ASSERT(found != NULL, "%s couldn't find pointer in alloc list "
+ "addr: %p", __func__, addr);
+
+ if (found->mmaped) {
+ tmp = munmap(found->start_addr, found->alloc_size);
+ TEST_ASSERT(tmp == 0,
+ "%s failed to munmap, start_addr: %p size: %zu "
+ "addr: %p rv: %i errno: %d", __func__,
+ found->start_addr, found->alloc_size, addr, tmp, errno);
+ } else {
+ free(found->start_addr);
+ }
+
+ free(found);
+}
+
+/*
+ * Protect the supplied pointer with requested protections.
+ *
+ * Args:
+ * addr - The test-malloc-ed address to protect.
+ * prot - the bitwise or of one or more of PROT_READ, PROT_WRITE,
+ * PROT_EXEC, and PROT_NONE.
+ */
+void test_malloc_chg_prot(const void *addr, int prot)
+{
+ struct test_malloc_alloc *allocp;
+ void *prot_addr;
+ size_t prot_len;
+ int tmp;
+
+ /* Find the allocation corresponding to the supplied pointer,
+ * but do not remove it from the list.
+ */
+ allocp = malloc_query(addr, false);
+ TEST_ASSERT(allocp != NULL, "%s couldn't find pointer in alloc list "
+ "addr: %p", __func__, addr);
+
+ /* We must be allowed to change permissions. */
+ TEST_ASSERT(allocp->flags & TEST_MALLOC_ALLOW_PROT_CHG,
+ "%s Payload does not have the "
+ "TEST_MALLOC_ALLOW_PROT_CHG flag, user_addr: %p",
+ __func__, addr);
+ TEST_ASSERT(allocp->mmaped == true, "%s Memory allocated "
+ "with ALLOW_PROT_CHG was not mmaped, start_addr: %p"
+ "alloc size: %zu", __func__, allocp->start_addr,
+ allocp->alloc_size);
+
+ /* Calculate the address and length to mprotect.
+ *
+ * Since mprotect requires the address to be a multiple
+ * of the page size, we can't simply apply the new protections
+ * to the user address for user_size bytes.
+ */
+ if (allocp->flags & TEST_MALLOC_PROT_BEFORE) {
+ /* If there's a leading guard page, then
+ * the user address must sit on a page boundary.
+ * In this case, we can in fact apply our protections
+ * this address.
+ */
+ prot_addr = allocp->user_addr;
+ prot_len = allocp->user_size;
+ } else if (allocp->flags & TEST_MALLOC_PROT_AFTER) {
+ /* If there's a trailing guard page, then there's
+ * no guarantee that the user address will sit on
+ * a page boundary. Thus, we apply the protections
+ * to the starting address. Because the user payload
+ * will be preceded by a red zone buffer, we need
+ * to spread the protections over user_size +
+ * RED_ZONE_SIZE bytes.
+ */
+ prot_addr = allocp->start_addr;
+ prot_len = allocp->user_size +
+ TEST_MALLOC_RED_ZONE_SIZE;
+ } else {
+ /* If there are no guard pages, we can simply apply the
+ * protections to the entire allocation.
+ */
+ prot_addr = allocp->start_addr;
+ prot_len = allocp->alloc_size;
+ }
+
+ /* Apply the protections. */
+ tmp = mprotect(prot_addr, prot_len, prot);
+ TEST_ASSERT(tmp == 0, "%s failed to mprotect, "
+ "addr: %p len: %zu rv: %d errno: %d",
+ __func__, prot_addr, prot_len, tmp, errno);
+}
+
+/* Retrive flags for allocation
+ *
+ * Args:
+ * addr - The test-malloc-ed address for which to retrieve flags
+ *
+ * Return:
+ * the allocation's flags. On error, a TEST_ASSERT is triggered.
+ */
+uint32_t test_malloc_get_flags(const void *addr)
+{
+ struct test_malloc_alloc *allocp;
+
+ /* Find the allocation corresponding to the supplied pointer,
+ * but do not remove it from the list.
+ */
+ allocp = malloc_query(addr, false);
+ TEST_ASSERT(allocp != NULL, "%s couldn't find pointer in alloc list "
+ "addr: %p", __func__, addr);
+ return allocp->flags;
+
+}
+
+/* Satisfies a test_malloc request and populates a pre-allocated alloc. */
+static void malloc_create(struct test_malloc_alloc *allocp,
+ size_t size, uint32_t flags, size_t align_bytes, int fd, off_t offset)
+{
+ size_t alloc_size;
+ void *user_addr;
+ void *start_addr;
+ bool use_mmap = flags & (TEST_MALLOC_ALLOW_PROT_CHG |
+ TEST_MALLOC_PROT_BEFORE | TEST_MALLOC_PROT_AFTER);
+ size_t pad_before_size;
+ size_t pad_after_size;
+ size_t align_buffer;
+ size_t tmp;
+
+ /* Each allocation will always have two red zones -- if guard pages
+ * pages are requested, then the red zones are nested within them.
+ */
+ pad_before_size = flags & TEST_MALLOC_PROT_BEFORE ? getpagesize() :
+ TEST_MALLOC_RED_ZONE_SIZE;
+ pad_after_size = flags & TEST_MALLOC_PROT_AFTER ? getpagesize() :
+ TEST_MALLOC_RED_ZONE_SIZE;
+
+ /* If there are no boundary pages, then we can ensure that
+ * payloads are not aligned by higher powers of two. While we only
+ * need align_bytes - 1 to get the specified alignment, we need
+ * align_bytes * 2 - 1 to ensure that alignment is not met
+ * at higher powers of 2.
+ */
+ if (!(flags & (TEST_MALLOC_PROT_BEFORE | TEST_MALLOC_PROT_AFTER)))
+ align_buffer = align_bytes * 2 - 1;
+ else
+ align_buffer = 0;
+
+ /* Calculate the allocation size, taking guard pages, red zones,
+ * and alignment into account.
+ */
+ alloc_size = size + pad_before_size + pad_after_size + align_buffer;
+
+ /* Allocate the memory. */
+ if (use_mmap) {
+ start_addr = mmap(NULL, alloc_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ TEST_ASSERT(start_addr != MAP_FAILED, "%s Anon mmap failed, "
+ "requested size: %zu", __func__, alloc_size);
+ } else
+ start_addr = malloc(alloc_size);
+ TEST_ASSERT(start_addr != NULL, "%s Insufficient memory, "
+ "requested size: %zu", __func__, alloc_size);
+
+ /* Find the user_addr, taking alignment into account
+ *
+ * The entire user payload plus red zones are filled with poison data;
+ * only the red zones, however, are validated upon free.
+ *
+ * Note: We don't explicity work to meet alignment requests when a
+ * leading guard page is requested. Its presence guarantees that the
+ * user address will sit on a page boundary and thus be aligned.
+ */
+ if (flags & TEST_MALLOC_PROT_BEFORE) {
+ /* If there's a leading guard page, then the user payload
+ * and the poison data must lie exactly one page after it.
+ */
+ user_addr = PTR_ADD(start_addr, getpagesize());
+
+ /* We won't add buffers between the user payload and
+ * guard page in order to meet alignment requests.
+ */
+ TEST_ASSERT((uintptr_t)user_addr
+ % align_bytes == 0, "%s cannot align "
+ "structure, size: %zu align_bytes: %zu",
+ __func__, size, align_bytes);
+
+ } else if (flags & TEST_MALLOC_PROT_AFTER) {
+ /* If there's a trailing guard page and no leading guard page,
+ * then the user payload sits size bytes before it.
+ */
+ user_addr = PTR_ADD(start_addr,
+ CEIL_BYTES_TO_PAGES(size + TEST_MALLOC_RED_ZONE_SIZE)
+ * getpagesize() - size);
+
+ /* We won't add buffers between the user payload and
+ * guard page in order to meet alignment requests.
+ */
+ TEST_ASSERT((uintptr_t)user_addr
+ % align_bytes == 0, "%s cannot align "
+ "structure, size: %zu align_bytes: %zu",
+ __func__, size, align_bytes);
+ } else {
+ /* Otherwise, if there are no guard pages, then the user
+ * address lies at least RED_ZONE_SIZE bytes ahead of the
+ * starting addresses. If we need to align the address,
+ * however, the user address may be pushed up further.
+ */
+ user_addr = PTR_ADD(start_addr, TEST_MALLOC_RED_ZONE_SIZE);
+
+ /* Align user_addr by align_bytes. */
+ tmp = (uintptr_t)user_addr % align_bytes;
+ user_addr = (tmp != 0)
+ ? PTR_ADD(user_addr, (align_bytes - tmp))
+ : user_addr;
+
+ /* user_addr shouldn't be divisible by
+ * powers of two greater than align_bytes.
+ */
+ user_addr = ((uintptr_t)user_addr % (align_bytes * 2)
+ == 0)
+ ? PTR_ADD(user_addr, align_bytes)
+ : user_addr;
+ }
+
+ /* Protect the guard pages. */
+ if (flags & TEST_MALLOC_PROT_BEFORE)
+ mprotect(start_addr, getpagesize(), PROT_NONE);
+ if (flags & TEST_MALLOC_PROT_AFTER)
+ mprotect(PTR_ADD(user_addr, size), getpagesize(), PROT_NONE);
+
+ /* If fd, remap the user_addr backing it up with fd. First, munmap the
+ * target user region (we don't [want a | care about the] private anon
+ * region there). Leave the guard pages, before and after, in place,
+ * as PROT_NONE. Then, mmap again into the user region the intended
+ * fd, offset, and size. Later when cleaning up, a single munmap of
+ * [before guard page, fd mmap, after guard page] will clean up
+ * everything in one fell swoop. */
+ if (flags & TEST_MALLOC_MMAP_FD) {
+ void *check;
+ TEST_ASSERT(0 == munmap(user_addr, size),
+ "%s Could not munmap the actual user area "
+ "in other to re map with fd (errno %d)",
+ __func__, errno);
+ check = mmap(user_addr, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_FIXED, fd, offset);
+ TEST_ASSERT(check == user_addr,
+ "%s Could not remap fd %d at address %p "
+ "with request size: %llu (errno %d)",
+ __func__, fd, user_addr,
+ (unsigned long long) size, errno);
+ }
+
+ /* Update the alloc and add it to the head of the list. */
+ allocp->next = alloc_list;
+ allocp->user_addr = user_addr;
+ allocp->start_addr = start_addr;
+ allocp->user_size = size;
+ allocp->alloc_size = alloc_size;
+ allocp->flags = flags;
+ allocp->mmaped = use_mmap;
+ alloc_list = allocp;
+}
+
+/* Retrieves an alloc from the list and, if remove is true,
+ * unlinks it from the list as well.
+ */
+static struct test_malloc_alloc *malloc_query(const void *addr, bool unlink)
+{
+ struct test_malloc_alloc *curr, *prev;
+ curr = prev = NULL;
+ for (curr = alloc_list; curr != NULL; curr = curr->next) {
+ if (curr->user_addr == addr)
+ break;
+ prev = curr;
+ }
+ if (curr && unlink) {
+ if (prev)
+ prev->next = curr->next;
+ else
+ alloc_list = curr->next;
+ }
+ return curr;
+}
+
+/* Retrieve the maximum width in nibbles of all the addresses
+ * represented in map, where map is a string in the format
+ * of a valid /proc/\*\/maps file.
+ */
+static int proc_maps_max_nibbles(const char *map)
+{
+ const char *chptr1, *chptr2;
+ int curr_nibbles;
+ int max_nibbles = -1;
+ const char *rest = map;
+ while (true) {
+ /* See if the length of the first address gives us a new max. */
+ for (chptr1 = chptr2 = rest; isxdigit(*chptr2); chptr2++)
+ ;
+ if (*chptr2 != '-') {
+ TEST_ASSERT(false, "%s Parsing error, line: %.*s",
+ __func__, (int)line_len(rest), rest);
+ }
+ curr_nibbles = chptr2 - chptr1;
+ if (curr_nibbles > max_nibbles)
+ max_nibbles = curr_nibbles;
+
+ /* See if the length of the 2nd address gives us a new max. */
+ for (chptr1 = ++chptr2; isxdigit(*chptr2); chptr2++)
+ ;
+ if (*chptr2 != ' ') {
+ TEST_ASSERT(false, "%s Parsing error, line: %.*s",
+ __func__, (int)line_len(rest), rest);
+ }
+ curr_nibbles = chptr2 - chptr1;
+ if (curr_nibbles > max_nibbles)
+ max_nibbles = curr_nibbles;
+
+ /* Advance to the next line. */
+ rest = strchr(rest, '\n');
+ if (rest == NULL || *(++rest) == '\0')
+ break;
+ }
+
+ /* The width must be non-zero and no greater than the maximum allowed
+ * bit-width. */
+ TEST_ASSERT((max_nibbles > 0) && (max_nibbles <= TEST_PG_MAX_NIBBLES),
+ "%s invalid max_nibbles (likely because "
+ "the maps file is invalid), max_nibbles: %d "
+ "map:\n%s\n", __func__, max_nibbles, map);
+ return max_nibbles;
+}
+
+/* Given a string, returns the number of characters included in the range
+ * [str, '\n'). If no newline is present, returns the length of the string.
+ */
+static size_t line_len(const char *str)
+{
+ const char *chptr;
+ chptr = strchrnul(str, '\n');
+ return chptr - str;
+}
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Write of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be written.
+ * count - Number of bytes to write.
+ *
+ * Output:
+ * buf - Starting address of data to be written.
+ *
+ * Return:
+ * On success, number of bytes written.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+ ssize_t write_rv;
+ ssize_t num_written = 0;
+ size_t num_left = count;
+ const char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+ * write(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ write_rv = write(fd, ptr, num_left);
+
+ switch (write_rv) {
+ case -1:
+ if ((errno = EAGAIN) || (errno == EINTR))
+ continue;
+ TEST_ASSERT(false, "Unexpected write failure,\n"
+ " rv: %zi errno: %i", write_rv, errno);
+ /* NOT REACHED */
+ exit(1);
+
+ default:
+ TEST_ASSERT(write_rv >= 0, "Unexpected rv from write,\n"
+ " rv: %zi errno: %i", write_rv, errno);
+ TEST_ASSERT(write_rv <= num_left, "More bytes written "
+ "then requested,\n"
+ " rv: %zi num_left: %zi", write_rv, num_left);
+ num_written += write_rv;
+ num_left -= write_rv;
+ ptr = ptr + write_rv;
+ break;
+ }
+ } while (num_written < count);
+
+ return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Read of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read. A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read. It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be read.
+ * count - Number of bytes to read.
+ *
+ * Output:
+ * buf - Starting address of where to write the bytes read.
+ *
+ * Return:
+ * On success, number of bytes read.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+ ssize_t read_rv;
+ ssize_t num_read = 0;
+ size_t num_left = count;
+ void *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "If count is zero" portion of
+ * read(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ read_rv = read(fd, ptr, num_left);
+
+ switch (read_rv) {
+ case -1:
+ if ((errno = EAGAIN) || (errno == EINTR))
+ continue;
+ TEST_ASSERT(false, "Unexpected read failure,\n"
+ " rv: %zi errno: %i", read_rv, errno);
+ break;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rv: %zi num_read: %zi num_left: %zu",
+ read_rv, num_read, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(read_rv > 0, "Unexpected rv from read,\n"
+ " rv: %zi errno: %i", read_rv, errno);
+ TEST_ASSERT(read_rv <= num_left, "More bytes read "
+ "then requested,\n"
+ " rv: %zi num_left: %zi", read_rv, num_left);
+ num_read += read_rv;
+ num_left -= read_rv;
+ ptr = (void *) ((uintptr_t) ptr + read_rv);
+ break;
+ }
+ } while (num_read < count);
+
+ return num_read;
+}
+
+/* Read contents of sequential file
+ *
+ * Given a path to a sequential file, allocate and return a buffer that
+ * contains its contents. We do NOT guarantee a thread safe read; that is,
+ * other processes can race with our attempt to read the provided
+ * path. However, each line read should be self-consistent.
+ *
+ * This function could be useful to read, say, sequential files.
+ *
+ * Args:
+ * path - the pathname to the file to open
+ *
+ * Output:
+ * size - If not supplied as NULL, points to the number of bytes
+ * held by the output buffer.
+ * buf - A pointer to the allocated buffer.
+ *
+ * Return:
+ * On success, returns TEST_UTIL_SUCCESS. Failures trigger
+ * TEST_ASSERTs.
+ */
+int test_seq_read(const char *path, char **bufp, size_t *sizep)
+{
+ int fd;
+ int tmp;
+ char *buf;
+ size_t buf_len;
+ int tmp_read;
+ size_t read_bytes;
+ size_t max_read;
+ off_t prev_partial_offset;
+ size_t buf_initial_size;
+ size_t buf_growth_amt;
+
+ /* Validate input. */
+ TEST_ASSERT(bufp != NULL, "%s unexpected NULL pointer ",
+ __func__);
+ TEST_ASSERT(path != NULL, "%s unexpected NULL pointer ",
+ __func__);
+
+ /* Open the file. */
+ fd = open(path, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "%s failed to open file, path: %s errno: %d",
+ __func__, path, errno);
+
+ /* Initial buf size and growth amount. Each time the size
+ * of the buffer is found to be insufficient, it is grown
+ * by the growth amount.
+ *
+ * Note: For the forward progress detection logic (see
+ * use of prev_partial_offset) to be valid, the growth
+ * amount must be >= the length of the longest line.
+ */
+ buf_initial_size = getpagesize();
+ buf_growth_amt = 2 * getpagesize();
+ TEST_ASSERT(buf_growth_amt >= getpagesize(), "%s buf_growth_amt "
+ "is too small, buf_growth_amt: %zu page_size: %d",
+ __func__, buf_growth_amt, getpagesize());
+
+ /* Allocate the buffer. */
+ buf_len = buf_initial_size;
+ buf = malloc(buf_len);
+ TEST_ASSERT(buf != NULL, "%s insufficent memory, "
+ "buf_len: %zu", __func__, buf_len);
+
+ /* Fetch the file.
+ *
+ * For all seq_files, we guarantee that the retrieved data will be
+ * self-consistent in each line. Seq_files are read by
+ * seq_read, which buffers each line as it begins to read it
+ * -- thus the assumption of self-consistent lines per single reads.
+ * For more information, see fs/seq_file.c and fs/task_mmu.c
+ *
+ * In order to guarantee this line-level self-consistency, we cannot
+ * read partial lines. If we have reason to believe that a partial
+ * read occurred (i.e., if the last byte read was not a newline),
+ * then we lseek back to the beginning of the file, increase the
+ * size of the buffer if necessary, and begin reading once again.
+ *
+ * If we're reading a generic file, then we can't guarantee
+ * any atomicity.
+ */
+ read_bytes = 0;
+ prev_partial_offset = 0;
+ while (true) {
+ max_read = buf_len - read_bytes - 1;
+ tmp_read = read(fd, buf + read_bytes, max_read);
+ TEST_ASSERT(tmp_read >= 0 && tmp_read <= max_read,
+ "%s failed call to system call read, "
+ "fd: %d read_bytes: %zu rv: %d, errno: %d.",
+ __func__, fd, read_bytes, tmp_read, errno);
+ read_bytes += tmp_read;
+
+ /* If we've successfully read the entire file, then
+ * read should have returned 0.
+ */
+ if (tmp_read == 0) {
+ buf[read_bytes] = '\0';
+ break;
+ }
+
+ /* Cautiously check that we can support this line length. */
+ tmp = line_len(buf + read_bytes);
+ TEST_ASSERT(tmp <= buf_growth_amt, "%s insufficetly small "
+ "growth amount, buf_growth_amt: %zu line_len: %d",
+ __func__, buf_growth_amt, tmp);
+
+ /* If the last byte read was not a newline, then we've
+ * violated our atomicity guarantee -- i.e., that
+ * the contents of the buffer will have self-consistent
+ * lines. Unfortunately, that means we'll have to reread
+ * the fd from byte 0.
+ */
+ if ((buf[read_bytes - 1] != '\n')) {
+
+ /* If we hit a partial line, we should be at an
+ * offset greater than the one we were at the last
+ * time we hit a partial line.
+ */
+ TEST_ASSERT(read_bytes > prev_partial_offset, "%s "
+ "No forward progress, prev_partial_offset: %zu "
+ "read_bytes: %zu", __func__,
+ prev_partial_offset, read_bytes);
+ TEST_ASSERT(read_bytes == buf_len - 1,
+ "%s partial line encountered before entire "
+ "buffer was consumed, read_bytes: %zu "
+ "buf_len: %zu", __func__, read_bytes,
+ buf_len - 1);
+
+ prev_partial_offset = read_bytes;
+ tmp = lseek(fd, SEEK_SET, 0);
+ TEST_ASSERT(tmp == 0, "%s failed to lseek to "
+ "byte 0, fd: %d errno: %d",
+ __func__, fd, errno);
+
+ /* Since we're reading from the beginning of the
+ * fd, at the start of the next iteration we'll have
+ * read 0 bytes.
+ */
+ read_bytes = 0;
+ }
+
+ /* If we read as much as we requested, then
+ * it's very likely that we haven't read the entire file yet.
+ * We'll cautiously increase the size of our buffer.
+ */
+ if (tmp_read == max_read) {
+ buf_len += 2 * getpagesize();
+ buf = realloc(buf, buf_len);
+ TEST_ASSERT(buf != NULL,
+ "%s Insufficient memory while reallocating, "
+ "buf_len: %zu", __func__, buf_len);
+ }
+ }
+
+ /* Perform the necessary clean-up and store the output. */
+ close(fd);
+ *bufp = buf;
+ if (sizep != NULL)
+ *sizep = read_bytes;
+ return TEST_UTIL_SUCCESS;
+}
+
+void test_elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+ off_t offset_rv;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in and validate ELF Identification Record.
+ * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+ * of the ELF header, which is at the beginning of the ELF file.
+ * For now it is only safe to read the first EI_NIDENT bytes. Once
+ * read and validated, the value of e_ehsize can be used to determine
+ * the real size of the ELF header.
+ */
+ unsigned char ident[EI_NIDENT];
+ test_read(fd, ident, sizeof(ident));
+ TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+ && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+ "ELF MAGIC Mismatch,\n"
+ " filename: %s\n"
+ " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+ " Expected: %02x %02x %02x %02x",
+ filename,
+ ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+ ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+ TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+ "Current implementation only able to handle ELFCLASS64,\n"
+ " filename: %s\n"
+ " ident[EI_CLASS]: %02x\n"
+ " expected: %02x",
+ filename,
+ ident[EI_CLASS], ELFCLASS64);
+ TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2LSB))
+ || ((BYTE_ORDER == BIG_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+ "implementation only able to handle\n"
+ "cases where the host and ELF file endianness\n"
+ "is the same:\n"
+ " host BYTE_ORDER: %u\n"
+ " host LITTLE_ENDIAN: %u\n"
+ " host BIG_ENDIAN: %u\n"
+ " ident[EI_DATA]: %u\n"
+ " ELFDATA2LSB: %u\n"
+ " ELFDATA2MSB: %u",
+ BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+ ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+ TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+ "Current implementation only able to handle current "
+ "ELF version,\n"
+ " filename: %s\n"
+ " ident[EI_VERSION]: %02x\n"
+ " expected: %02x",
+ filename, ident[EI_VERSION], EV_CURRENT);
+
+ /* Read in the ELF header.
+ * With the ELF Identification portion of the ELF header
+ * validated, especially that the value at EI_VERSION is
+ * as expected, it is now safe to read the entire ELF header.
+ */
+ offset_rv = lseek(fd, 0, SEEK_SET);
+ TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+ " rv: %zi expected: %i", offset_rv, 0);
+ test_read(fd, hdrp, sizeof(*hdrp));
+ TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+ "Unexpected physical header size,\n"
+ " hdrp->e_phentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_phentsize, sizeof(Elf64_Phdr));
+ TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+ "Unexpected section header size,\n"
+ " hdrp->e_shentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* Test ELF Get Symbol Info
+ *
+ * Look up and return information about a specified symbol, within a specified
+ * ELF file (i.e. executable, object file). Note, that archive files
+ * contain ELF files are not currently supported. The symbol name is given
+ * by name, while the path to the ELF file is given by filename. When found,
+ * information about the symbol is returned in the structure pointed to
+ * by symbp.
+ *
+ * TODO(lhuemill): Simplify implementation by using libbfd.
+ *
+ * Args:
+ * filename - Path to ELF file
+ * name - Symbol name
+ *
+ * Output:
+ * symbp - Information about specified symbol.
+ *
+ * Return:
+ * On success, returns 0.
+ * Symbol not found, returns -1, with errno equal to ENOENT.
+ * All other unexpected conditions cause a TEST_ASSERT failure.
+ */
+int test_elfsymb_get(const char *filename, const char *name,
+ struct test_elfsymb *symbp)
+{
+ bool symb_found = false;
+ off_t offset, offset_rv;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in the ELF header. */
+ Elf64_Ehdr hdr;
+ test_elfhdr_get(filename, &hdr);
+
+ /* For each section header.
+ * The following ELF header members specify the location
+ * and size of the section headers:
+ *
+ * e_shoff - File offset to start of section headers
+ * e_shentsize - Size of each section header
+ * e_shnum - Number of section header entries
+ */
+ for (unsigned int n1 = 0; n1 < hdr.e_shnum; n1++) {
+ /* Seek to the beginning of the section header. */
+ offset = hdr.e_shoff + (n1 * hdr.e_shentsize);
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Failed to seek to begining of section header %u,\n"
+ " filename: %s\n"
+ " rv: %jd errno: %i",
+ n1, filename, (intmax_t) offset_rv, errno);
+
+ /* Read in the section header */
+ Elf64_Shdr shdr;
+ test_read(fd, &shdr, sizeof(shdr));
+
+ /* Skip if this section doesn't contain symbols. */
+ if ((shdr.sh_type != SHT_SYMTAB)
+ && (shdr.sh_type != SHT_DYNSYM))
+ continue;
+
+ /* Obtain corresponding string table.
+ * The sh_link member of a symbol table section header,
+ * specifies which section contains the string table
+ * for these symbol names.
+ */
+ Elf64_Shdr strtab_shdr;
+ offset = hdr.e_shoff + (shdr.sh_link * hdr.e_shentsize);
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Failed to seek to begining of section header %u,\n"
+ " filename: %s\n"
+ " rv: %jd errno: %i",
+ n1, filename, (intmax_t) offset_rv, errno);
+ test_read(fd, &strtab_shdr, sizeof(strtab_shdr));
+
+ char *strtab = malloc(strtab_shdr.sh_size);
+ TEST_ASSERT(strtab, "Insufficient Memory");
+ offset = strtab_shdr.sh_offset;
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Seek to string table failed,\n"
+ " rv: %zi expected: %jd",
+ (intmax_t) offset_rv, offset);
+ test_read(fd, strtab, strtab_shdr.sh_size);
+
+ /* For each symbol */
+ for (unsigned int n2 = 0;
+ n2 < (shdr.sh_size / sizeof(Elf64_Sym)); n2++) {
+ Elf64_Sym sym;
+
+ offset = shdr.sh_offset + (n2 * sizeof(sym));
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Seek to start of symbol entries failed,\n"
+ " offset: %jd\n"
+ " rv: %jd expected: %jd",
+ (intmax_t) offset, (intmax_t) offset_rv,
+ (intmax_t) offset);
+
+ test_read(fd, &sym, sizeof(sym));
+
+ /* Is this the symbol were searching for? */
+ if (strcmp(strtab + sym.st_name, name) == 0) {
+ symbp->value = sym.st_value;
+ symbp->size = sym.st_size;
+ symb_found = true;
+ break;
+ }
+ }
+ free(strtab);
+
+ /* If the symbol was found, no need to search additional
+ * sections that describe symbols. Although highly unlikely,
+ * when two or more entries exist for the same symbol name,
+ * only information about the first occurrence found is
+ * returned.
+ */
+ if (symb_found)
+ break;
+ }
+
+ close(fd);
+
+ if (!symb_found) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Given a virtual address in our address space, get count
+ * /proc/self/pageflags entries.
+ */
+void extract_pageflags(void *addr, unsigned int count, uint64_t *buffer)
+{
+ off_t offset, rvo;
+ ssize_t rv;
+ size_t readsz;
+ int fd = open("/proc/self/pageflags", O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open pageflags file "
+ "rv: %i errno: %i", fd, errno);
+
+ TEST_ASSERT((((unsigned long) addr) % getpagesize()) == 0,
+ "Please pass page-aligned address (%p) to extract_"
+ "pageflags", addr);
+
+ offset = (((off_t) addr) / getpagesize()) * sizeof(uint64_t);
+ rvo = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(rvo == offset, "%s failed to lseek pageflags to byte %llu "
+ "(%llu), va 0x%lx fd: %d errno: %d",
+ __func__,
+ (unsigned long long) offset,
+ (unsigned long long) rvo,
+ (unsigned long) addr, fd, errno);
+
+ readsz = count * sizeof(uint64_t);
+ rv = read(fd, buffer, readsz);
+ TEST_ASSERT(rv == readsz,
+ "%s could not read %lu pageflags (%ld) errno %d",
+ __func__, (unsigned long) readsz, (long) rv, errno);
+ TEST_ASSERT(0 == close(fd), "%s failed to close pageflags errno %d",
+ __func__, errno);
+}
+
+/* Initialization function that sets up the test_utils
+ * environment.
+ */
+static void __attribute__((constructor)) test_init(void)
+{
+ srand48(0);
+}
diff --git a/gtests/tests/set_sregs_test.c b/gtests/tests/set_sregs_test.c
new file mode 100644
index 0000000..76f1e71
--- /dev/null
+++ b/gtests/tests/set_sregs_test.c
@@ -0,0 +1,66 @@
+/*
+ * gtests/tests/set_sregs_test.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * set_sregs_test
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+int set_sregs(kvm_util_vm_t *vm, uint32_t vcpuid, const struct kvm_sregs *sregs)
+{
+ return ioctl(vcpu_fd(vm, vcpuid), KVM_SET_SREGS, sregs);
+}
+
+void guest_code(void)
+{
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ kvm_util_vm_t *vm;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, guest_code);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.apic_base = 1 << 10;
+ rc = set_sregs(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+ sregs.apic_base);
+ sregs.apic_base = 1 << 11;
+ rc = set_sregs(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+ sregs.apic_base);
+
+ kvm_util_vm_free(&vm);
+
+ return 0;
+}
diff --git a/gtests/tests/vmx_tsc_adjust_test.c b/gtests/tests/vmx_tsc_adjust_test.c
new file mode 100644
index 0000000..4844255
--- /dev/null
+++ b/gtests/tests/vmx_tsc_adjust_test.c
@@ -0,0 +1,215 @@
+/*
+ * gtests/tests/vmx_tsc_adjust_test.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define PAGE_SIZE 4096
+#define VCPU_ID 5
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+ PORT_ABORT = 0x1000,
+ PORT_REPORT,
+ PORT_DONE,
+};
+
+struct vmx_page {
+ vm_vaddr_t virt;
+ vm_paddr_t phys;
+};
+
+enum {
+ VMXON_PAGE = 0,
+ VMCS_PAGE,
+ MSR_BITMAP_PAGE,
+
+ NUM_VMX_PAGES,
+};
+
+struct kvm_single_msr {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+} __attribute__((packed));
+
+/* The virtual machine object. */
+static kvm_util_vm_t *vm;
+
+/* Array of vmx_page descriptors that is shared with the guest. */
+struct vmx_page *vmx_pages;
+
+static void __exit_to_l0(uint16_t port, unsigned long arg)
+{
+ __asm__ __volatile__("in %[port], %%al"
+ :
+ : [port]"d"(port), "D"(arg)
+ : "rax");
+}
+
+#define exit_to_l0(_port, _arg) __exit_to_l0(_port, (unsigned long) (_arg))
+
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \
+} while (0)
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+ int64_t adjust;
+
+ adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+ exit_to_l0(PORT_REPORT, adjust);
+ GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+ uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+ wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_page *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+ prepare_for_vmx_operation();
+
+ /* Enter VMX root operation. */
+ *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision();
+ GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys));
+
+ /* Load a VMCS. */
+ *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision();
+ GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys));
+ GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ /* Jump into L2. */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ exit_to_l0(PORT_DONE, 0);
+}
+
+static void allocate_vmx_page(struct vmx_page *page)
+{
+ vm_vaddr_t virt;
+
+ virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0);
+ memset(addr_vmvirt2hvirt(vm, virt), 0, PAGE_SIZE);
+
+ page->virt = virt;
+ page->phys = addr_vmvirt2vmphy(vm, virt);
+}
+
+static vm_vaddr_t allocate_vmx_pages(void)
+{
+ vm_vaddr_t vmx_pages_vaddr;
+ int i;
+
+ vmx_pages_vaddr = vm_vaddr_alloc(
+ vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0);
+
+ vmx_pages = (void *) addr_vmvirt2hvirt(vm, vmx_pages_vaddr);
+
+ for (i = 0; i < NUM_VMX_PAGES; i++)
+ allocate_vmx_page(&vmx_pages[i]);
+
+ return vmx_pages_vaddr;
+}
+
+void report(int64_t val)
+{
+ fprintf(stderr,
+ "IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+ val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_vaddr;
+
+ vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vmx_pages_vaddr = allocate_vmx_pages();
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_regs regs;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ vcpu_regs_get(vm, VCPU_ID, ®s);
+
+ switch (run->io.port) {
+ case PORT_ABORT:
+ TEST_ASSERT(false, "%s", (const char *) regs.rdi);
+ /* NOT REACHED */
+ case PORT_REPORT:
+ report(regs.rdi);
+ break;
+ case PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+ }
+ }
+
+done:
+ return 0;
+}
diff --git a/x86/Makefile.common b/x86/Makefile.common
index 5f7eac4..47e7bc9 100644
--- a/x86/Makefile.common
+++ b/x86/Makefile.common
@@ -33,7 +33,7 @@
libgcc := $(shell $(CC) -m$(bits) --print-libgcc-file-name)
-# We want to keep intermediate file: %.elf and %.o
+# We want to keep intermediate file: %.elf and %.o
.PRECIOUS: %.elf %.o
FLATLIBS = lib/libcflat.a $(libgcc)
@@ -61,7 +61,18 @@
OBJDIRS += api
endif
-test_cases: $(tests-common) $(tests) $(tests-api)
+GTEST_DIR = gtests/tests
+GTEST_LIBDIR = gtests/lib
+GTEST_INCDIR = gtests/include
+
+ifdef GTESTS
+tests-gtests = $(GTEST_DIR)/set_sregs_test $(GTEST_DIR)/vmx_tsc_adjust_test
+
+OBJDIRS += gtests/lib
+OBJDIRS += gtests/tests
+endif
+
+test_cases: $(tests-common) $(tests) $(tests-api) $(tests-gtests)
$(TEST_DIR)/%.o: CFLAGS += -std=gnu99 -ffreestanding -I $(SRCDIR)/lib -I $(SRCDIR)/lib/x86 -I lib
@@ -81,7 +92,9 @@
arch_clean:
$(RM) $(TEST_DIR)/*.o $(TEST_DIR)/*.flat $(TEST_DIR)/*.elf \
$(TEST_DIR)/.*.d lib/x86/.*.d \
- $(tests-api) api/*.o api/*.a api/.*.d
+ $(tests-api) api/*.o api/*.a api/.*.d \
+ $(tests-gtests) $(GTEST_DIR)/*.o $(GTEST_DIR)/.*.d \
+ $(GTEST_LIBDIR)/*.o $(GTEST_LIBDIR)/*.a $(GTEST_LIBDIR)/.*.d
api/%.o: CXXFLAGS += -m32 -std=gnu++11
@@ -92,3 +105,15 @@
$(AR) rcs $@ $^
$(tests-api) : % : %.o api/libapi.a
+
+$(GTEST_LIBDIR)/%.o: CFLAGS += -std=gnu99 -I $(GTEST_INCDIR) \
+ -I $(KERNEL)/usr/include
+
+$(GTEST_LIBDIR)/libgtests.a: $(GTEST_LIBDIR)/kvm_util.o $(GTEST_LIBDIR)/test_util.o \
+ $(GTEST_LIBDIR)/test_sparsebit.o
+ $(AR) rcs $@ $^
+
+$(GTEST_DIR)/%.o: CFLAGS += -std=gnu99 -I $(GTEST_INCDIR) -I $(KERNEL)/usr/include
+
+$(tests-gtests) : % : %.o $(GTEST_LIBDIR)/libgtests.a
+ $(CC) -static -o $@ $^ -lm