| #include <sys/epoll.h> |
| #include <sys/un.h> |
| #include <sys/types.h> |
| #include <sys/socket.h> |
| #include <sys/eventfd.h> |
| #include <dirent.h> |
| |
| #include "kvm/kvm-ipc.h" |
| #include "kvm/rwsem.h" |
| #include "kvm/read-write.h" |
| #include "kvm/util.h" |
| #include "kvm/kvm.h" |
| #include "kvm/builtin-debug.h" |
| #include "kvm/strbuf.h" |
| #include "kvm/kvm-cpu.h" |
| #include "kvm/8250-serial.h" |
| |
| struct kvm_ipc_head { |
| u32 type; |
| u32 len; |
| }; |
| |
| #define KVM_IPC_MAX_MSGS 16 |
| |
| #define KVM_SOCK_SUFFIX ".sock" |
| #define KVM_SOCK_SUFFIX_LEN ((ssize_t)sizeof(KVM_SOCK_SUFFIX) - 1) |
| |
| extern __thread struct kvm_cpu *current_kvm_cpu; |
| static void (*msgs[KVM_IPC_MAX_MSGS])(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg); |
| static DECLARE_RWSEM(msgs_rwlock); |
| static int epoll_fd, server_fd, stop_fd; |
| static pthread_t thread; |
| |
| static int kvm__create_socket(struct kvm *kvm) |
| { |
| char full_name[PATH_MAX]; |
| int s; |
| struct sockaddr_un local; |
| int len, r; |
| |
| /* This usually 108 bytes long */ |
| BUILD_BUG_ON(sizeof(local.sun_path) < 32); |
| |
| snprintf(full_name, sizeof(full_name), "%s/%s%s", |
| kvm__get_dir(), kvm->cfg.guest_name, KVM_SOCK_SUFFIX); |
| |
| s = socket(AF_UNIX, SOCK_STREAM, 0); |
| if (s < 0) { |
| perror("socket"); |
| return s; |
| } |
| |
| local.sun_family = AF_UNIX; |
| strlcpy(local.sun_path, full_name, sizeof(local.sun_path)); |
| len = strlen(local.sun_path) + sizeof(local.sun_family); |
| r = bind(s, (struct sockaddr *)&local, len); |
| /* Check for an existing socket file */ |
| if (r < 0 && errno == EADDRINUSE) { |
| r = connect(s, (struct sockaddr *)&local, len); |
| if (r == 0) { |
| /* |
| * If we could connect, there is already a guest |
| * using this same name. This should not happen |
| * for PID derived names, but could happen for user |
| * provided guest names. |
| */ |
| pr_err("Guest socket file %s already exists.", |
| full_name); |
| r = -EEXIST; |
| goto fail; |
| } |
| if (errno == ECONNREFUSED) { |
| /* |
| * This is a ghost socket file, with no-one listening |
| * on the other end. Since kvmtool will only bind |
| * above when creating a new guest, there is no |
| * danger in just removing the file and re-trying. |
| */ |
| unlink(full_name); |
| pr_info("Removed ghost socket file \"%s\".", full_name); |
| r = bind(s, (struct sockaddr *)&local, len); |
| } |
| } |
| if (r < 0) { |
| perror("bind"); |
| goto fail; |
| } |
| |
| r = listen(s, 5); |
| if (r < 0) { |
| perror("listen"); |
| goto fail; |
| } |
| |
| return s; |
| |
| fail: |
| close(s); |
| return r; |
| } |
| |
| void kvm__remove_socket(const char *name) |
| { |
| char full_name[PATH_MAX]; |
| |
| snprintf(full_name, sizeof(full_name), "%s/%s%s", |
| kvm__get_dir(), name, KVM_SOCK_SUFFIX); |
| unlink(full_name); |
| } |
| |
| int kvm__get_sock_by_instance(const char *name) |
| { |
| int s, len, r; |
| char sock_file[PATH_MAX]; |
| struct sockaddr_un local; |
| |
| snprintf(sock_file, sizeof(sock_file), "%s/%s%s", |
| kvm__get_dir(), name, KVM_SOCK_SUFFIX); |
| s = socket(AF_UNIX, SOCK_STREAM, 0); |
| |
| local.sun_family = AF_UNIX; |
| strlcpy(local.sun_path, sock_file, sizeof(local.sun_path)); |
| len = strlen(local.sun_path) + sizeof(local.sun_family); |
| |
| r = connect(s, (struct sockaddr *)&local, len); |
| if (r < 0 && errno == ECONNREFUSED) { |
| /* Clean up the ghost socket file */ |
| unlink(local.sun_path); |
| pr_info("Removed ghost socket file \"%s\".", sock_file); |
| return r; |
| } else if (r < 0) { |
| return r; |
| } |
| |
| return s; |
| } |
| |
| static bool is_socket(const char *base_path, const struct dirent *dent) |
| { |
| switch (dent->d_type) { |
| case DT_SOCK: |
| return true; |
| |
| case DT_UNKNOWN: { |
| char path[PATH_MAX]; |
| struct stat st; |
| |
| sprintf(path, "%s/%s", base_path, dent->d_name); |
| if (stat(path, &st)) |
| return false; |
| |
| return S_ISSOCK(st.st_mode); |
| } |
| default: |
| return false; |
| } |
| } |
| |
| int kvm__enumerate_instances(int (*callback)(const char *name, int fd)) |
| { |
| int sock; |
| DIR *dir; |
| struct dirent *entry; |
| int ret = 0; |
| const char *path; |
| |
| path = kvm__get_dir(); |
| |
| dir = opendir(path); |
| if (!dir) |
| return -errno; |
| |
| for (;;) { |
| entry = readdir(dir); |
| if (!entry) |
| break; |
| if (is_socket(path, entry)) { |
| ssize_t name_len = strlen(entry->d_name); |
| char *p; |
| |
| if (name_len <= KVM_SOCK_SUFFIX_LEN) |
| continue; |
| |
| p = &entry->d_name[name_len - KVM_SOCK_SUFFIX_LEN]; |
| if (memcmp(KVM_SOCK_SUFFIX, p, KVM_SOCK_SUFFIX_LEN)) |
| continue; |
| |
| *p = 0; |
| sock = kvm__get_sock_by_instance(entry->d_name); |
| if (sock < 0) |
| continue; |
| ret = callback(entry->d_name, sock); |
| close(sock); |
| if (ret < 0) |
| break; |
| } |
| } |
| |
| closedir(dir); |
| |
| return ret; |
| } |
| |
| int kvm_ipc__register_handler(u32 type, void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg)) |
| { |
| if (type >= KVM_IPC_MAX_MSGS) |
| return -ENOSPC; |
| |
| down_write(&msgs_rwlock); |
| msgs[type] = cb; |
| up_write(&msgs_rwlock); |
| |
| return 0; |
| } |
| |
| int kvm_ipc__send(int fd, u32 type) |
| { |
| struct kvm_ipc_head head = {.type = type, .len = 0,}; |
| |
| if (write_in_full(fd, &head, sizeof(head)) < 0) |
| return -1; |
| |
| return 0; |
| } |
| |
| int kvm_ipc__send_msg(int fd, u32 type, u32 len, u8 *msg) |
| { |
| struct kvm_ipc_head head = {.type = type, .len = len,}; |
| |
| if (write_in_full(fd, &head, sizeof(head)) < 0) |
| return -1; |
| |
| if (write_in_full(fd, msg, len) < 0) |
| return -1; |
| |
| return 0; |
| } |
| |
| static int kvm_ipc__handle(struct kvm *kvm, int fd, u32 type, u32 len, u8 *data) |
| { |
| void (*cb)(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg); |
| |
| if (type >= KVM_IPC_MAX_MSGS) |
| return -ENOSPC; |
| |
| down_read(&msgs_rwlock); |
| cb = msgs[type]; |
| up_read(&msgs_rwlock); |
| |
| if (cb == NULL) { |
| pr_warning("No device handles type %u\n", type); |
| return -ENODEV; |
| } |
| |
| cb(kvm, fd, type, len, data); |
| |
| return 0; |
| } |
| |
| static int kvm_ipc__new_conn(int fd) |
| { |
| int client; |
| struct epoll_event ev; |
| |
| client = accept(fd, NULL, NULL); |
| if (client < 0) |
| return -1; |
| |
| ev.events = EPOLLIN | EPOLLRDHUP; |
| ev.data.fd = client; |
| if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client, &ev) < 0) { |
| close(client); |
| return -1; |
| } |
| |
| return client; |
| } |
| |
| static void kvm_ipc__close_conn(int fd) |
| { |
| epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, NULL); |
| close(fd); |
| } |
| |
| static int kvm_ipc__receive(struct kvm *kvm, int fd) |
| { |
| struct kvm_ipc_head head; |
| u8 *msg = NULL; |
| u32 n; |
| |
| n = read(fd, &head, sizeof(head)); |
| if (n != sizeof(head)) |
| goto done; |
| |
| msg = malloc(head.len); |
| if (msg == NULL) |
| goto done; |
| |
| n = read_in_full(fd, msg, head.len); |
| if (n != head.len) |
| goto done; |
| |
| kvm_ipc__handle(kvm, fd, head.type, head.len, msg); |
| |
| return 0; |
| |
| done: |
| free(msg); |
| return -1; |
| } |
| |
| static void *kvm_ipc__thread(void *param) |
| { |
| struct epoll_event event; |
| struct kvm *kvm = param; |
| |
| kvm__set_thread_name("kvm-ipc"); |
| |
| for (;;) { |
| int nfds; |
| |
| nfds = epoll_wait(epoll_fd, &event, 1, -1); |
| if (nfds > 0) { |
| int fd = event.data.fd; |
| |
| if (fd == stop_fd && event.events & EPOLLIN) { |
| break; |
| } else if (fd == server_fd) { |
| int client, r; |
| |
| client = kvm_ipc__new_conn(fd); |
| /* |
| * Handle multiple IPC cmd at a time |
| */ |
| do { |
| r = kvm_ipc__receive(kvm, client); |
| } while (r == 0); |
| |
| } else if (event.events & (EPOLLERR | EPOLLRDHUP | EPOLLHUP)) { |
| kvm_ipc__close_conn(fd); |
| } else { |
| kvm_ipc__receive(kvm, fd); |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| static void kvm__pid(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) |
| { |
| pid_t pid = getpid(); |
| int r = 0; |
| |
| if (type == KVM_IPC_PID) |
| r = write(fd, &pid, sizeof(pid)); |
| |
| if (r < 0) |
| pr_warning("Failed sending PID"); |
| } |
| |
| static void handle_stop(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) |
| { |
| if (WARN_ON(type != KVM_IPC_STOP || len)) |
| return; |
| |
| kvm__reboot(kvm); |
| } |
| |
| /* Pause/resume the guest using SIGUSR2 */ |
| static int is_paused; |
| |
| static void handle_pause(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) |
| { |
| if (WARN_ON(len)) |
| return; |
| |
| if (type == KVM_IPC_RESUME && is_paused) { |
| kvm->vm_state = KVM_VMSTATE_RUNNING; |
| kvm__continue(kvm); |
| } else if (type == KVM_IPC_PAUSE && !is_paused) { |
| kvm->vm_state = KVM_VMSTATE_PAUSED; |
| ioctl(kvm->vm_fd, KVM_KVMCLOCK_CTRL); |
| kvm__pause(kvm); |
| } else { |
| return; |
| } |
| |
| is_paused = !is_paused; |
| } |
| |
| static void handle_vmstate(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) |
| { |
| int r = 0; |
| |
| if (type == KVM_IPC_VMSTATE) |
| r = write(fd, &kvm->vm_state, sizeof(kvm->vm_state)); |
| |
| if (r < 0) |
| pr_warning("Failed sending VMSTATE"); |
| } |
| |
| /* |
| * Serialize debug printout so that the output of multiple vcpus does not |
| * get mixed up: |
| */ |
| static int printout_done; |
| |
| static void handle_sigusr1(int sig) |
| { |
| struct kvm_cpu *cpu = current_kvm_cpu; |
| int fd = kvm_cpu__get_debug_fd(); |
| |
| if (!cpu || cpu->needs_nmi) |
| return; |
| |
| dprintf(fd, "\n #\n # vCPU #%ld's dump:\n #\n", cpu->cpu_id); |
| kvm_cpu__show_registers(cpu); |
| kvm_cpu__show_code(cpu); |
| kvm_cpu__show_page_tables(cpu); |
| fflush(stdout); |
| printout_done = 1; |
| } |
| |
| static void handle_debug(struct kvm *kvm, int fd, u32 type, u32 len, u8 *msg) |
| { |
| int i; |
| struct debug_cmd_params *params; |
| u32 dbg_type; |
| u32 vcpu; |
| |
| if (WARN_ON(type != KVM_IPC_DEBUG || len != sizeof(*params))) |
| return; |
| |
| params = (void *)msg; |
| dbg_type = params->dbg_type; |
| vcpu = params->cpu; |
| |
| if (dbg_type & KVM_DEBUG_CMD_TYPE_SYSRQ) |
| serial8250__inject_sysrq(kvm, params->sysrq); |
| |
| if (dbg_type & KVM_DEBUG_CMD_TYPE_NMI) { |
| if ((int)vcpu >= kvm->nrcpus) |
| return; |
| |
| kvm->cpus[vcpu]->needs_nmi = 1; |
| pthread_kill(kvm->cpus[vcpu]->thread, SIGUSR1); |
| } |
| |
| if (!(dbg_type & KVM_DEBUG_CMD_TYPE_DUMP)) |
| return; |
| |
| for (i = 0; i < kvm->nrcpus; i++) { |
| struct kvm_cpu *cpu = kvm->cpus[i]; |
| |
| if (!cpu) |
| continue; |
| |
| printout_done = 0; |
| |
| kvm_cpu__set_debug_fd(fd); |
| pthread_kill(cpu->thread, SIGUSR1); |
| /* |
| * Wait for the vCPU to dump state before signalling |
| * the next thread. Since this is debug code it does |
| * not matter that we are burning CPU time a bit: |
| */ |
| while (!printout_done) |
| sleep(0); |
| } |
| |
| close(fd); |
| |
| serial8250__inject_sysrq(kvm, 'p'); |
| } |
| |
| int kvm_ipc__init(struct kvm *kvm) |
| { |
| int ret; |
| int sock = kvm__create_socket(kvm); |
| struct epoll_event ev = {0}; |
| |
| server_fd = sock; |
| |
| epoll_fd = epoll_create(KVM_IPC_MAX_MSGS); |
| if (epoll_fd < 0) { |
| perror("epoll_create"); |
| ret = epoll_fd; |
| goto err; |
| } |
| |
| ev.events = EPOLLIN | EPOLLET; |
| ev.data.fd = sock; |
| if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev) < 0) { |
| pr_err("Failed adding socket to epoll"); |
| ret = -EFAULT; |
| goto err_epoll; |
| } |
| |
| stop_fd = eventfd(0, 0); |
| if (stop_fd < 0) { |
| perror("eventfd"); |
| ret = stop_fd; |
| goto err_epoll; |
| } |
| |
| ev.events = EPOLLIN | EPOLLET; |
| ev.data.fd = stop_fd; |
| if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, stop_fd, &ev) < 0) { |
| pr_err("Failed adding stop event to epoll"); |
| ret = -EFAULT; |
| goto err_stop; |
| } |
| |
| if (pthread_create(&thread, NULL, kvm_ipc__thread, kvm) != 0) { |
| pr_err("Failed starting IPC thread"); |
| ret = -EFAULT; |
| goto err_stop; |
| } |
| |
| kvm_ipc__register_handler(KVM_IPC_PID, kvm__pid); |
| kvm_ipc__register_handler(KVM_IPC_DEBUG, handle_debug); |
| kvm_ipc__register_handler(KVM_IPC_PAUSE, handle_pause); |
| kvm_ipc__register_handler(KVM_IPC_RESUME, handle_pause); |
| kvm_ipc__register_handler(KVM_IPC_STOP, handle_stop); |
| kvm_ipc__register_handler(KVM_IPC_VMSTATE, handle_vmstate); |
| signal(SIGUSR1, handle_sigusr1); |
| |
| return 0; |
| |
| err_stop: |
| close(stop_fd); |
| err_epoll: |
| close(epoll_fd); |
| err: |
| return ret; |
| } |
| base_init(kvm_ipc__init); |
| |
| int kvm_ipc__exit(struct kvm *kvm) |
| { |
| u64 val = 1; |
| int ret; |
| |
| ret = write(stop_fd, &val, sizeof(val)); |
| if (ret < 0) |
| return ret; |
| |
| close(server_fd); |
| close(epoll_fd); |
| |
| kvm__remove_socket(kvm->cfg.guest_name); |
| |
| return ret; |
| } |
| base_exit(kvm_ipc__exit); |