| // SPDX-License-Identifier: GPL-2.0-only |
| #include <xen/xen.h> |
| #include <xen/events.h> |
| #include <xen/grant_table.h> |
| #include <xen/hvm.h> |
| #include <xen/interface/vcpu.h> |
| #include <xen/interface/xen.h> |
| #include <xen/interface/memory.h> |
| #include <xen/interface/hvm/params.h> |
| #include <xen/features.h> |
| #include <xen/platform_pci.h> |
| #include <xen/xenbus.h> |
| #include <xen/page.h> |
| #include <xen/interface/sched.h> |
| #include <xen/xen-ops.h> |
| #include <asm/xen/hypervisor.h> |
| #include <asm/xen/hypercall.h> |
| #include <asm/system_misc.h> |
| #include <asm/efi.h> |
| #include <linux/interrupt.h> |
| #include <linux/irqreturn.h> |
| #include <linux/module.h> |
| #include <linux/of.h> |
| #include <linux/of_fdt.h> |
| #include <linux/of_irq.h> |
| #include <linux/of_address.h> |
| #include <linux/cpuidle.h> |
| #include <linux/cpufreq.h> |
| #include <linux/cpu.h> |
| #include <linux/console.h> |
| #include <linux/pvclock_gtod.h> |
| #include <linux/reboot.h> |
| #include <linux/time64.h> |
| #include <linux/timekeeping.h> |
| #include <linux/timekeeper_internal.h> |
| #include <linux/acpi.h> |
| #include <linux/virtio_anchor.h> |
| |
| #include <linux/mm.h> |
| |
| static struct start_info _xen_start_info; |
| struct start_info *xen_start_info = &_xen_start_info; |
| EXPORT_SYMBOL(xen_start_info); |
| |
| enum xen_domain_type xen_domain_type = XEN_NATIVE; |
| EXPORT_SYMBOL(xen_domain_type); |
| |
| struct shared_info xen_dummy_shared_info; |
| struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; |
| |
| DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); |
| static struct vcpu_info __percpu *xen_vcpu_info; |
| |
| /* Linux <-> Xen vCPU id mapping */ |
| DEFINE_PER_CPU(uint32_t, xen_vcpu_id); |
| EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); |
| |
| /* These are unused until we support booting "pre-ballooned" */ |
| unsigned long xen_released_pages; |
| struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; |
| |
| static __read_mostly unsigned int xen_events_irq; |
| static __read_mostly phys_addr_t xen_grant_frames; |
| |
| #define GRANT_TABLE_INDEX 0 |
| #define EXT_REGION_INDEX 1 |
| |
| uint32_t xen_start_flags; |
| EXPORT_SYMBOL(xen_start_flags); |
| |
| int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, |
| int nr, struct page **pages) |
| { |
| return xen_xlate_unmap_gfn_range(vma, nr, pages); |
| } |
| EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); |
| |
| static void xen_read_wallclock(struct timespec64 *ts) |
| { |
| u32 version; |
| struct timespec64 now, ts_monotonic; |
| struct shared_info *s = HYPERVISOR_shared_info; |
| struct pvclock_wall_clock *wall_clock = &(s->wc); |
| |
| /* get wallclock at system boot */ |
| do { |
| version = wall_clock->version; |
| rmb(); /* fetch version before time */ |
| now.tv_sec = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec; |
| now.tv_nsec = wall_clock->nsec; |
| rmb(); /* fetch time before checking version */ |
| } while ((wall_clock->version & 1) || (version != wall_clock->version)); |
| |
| /* time since system boot */ |
| ktime_get_ts64(&ts_monotonic); |
| *ts = timespec64_add(now, ts_monotonic); |
| } |
| |
| static int xen_pvclock_gtod_notify(struct notifier_block *nb, |
| unsigned long was_set, void *priv) |
| { |
| /* Protected by the calling core code serialization */ |
| static struct timespec64 next_sync; |
| |
| struct xen_platform_op op; |
| struct timespec64 now, system_time; |
| struct timekeeper *tk = priv; |
| |
| now.tv_sec = tk->xtime_sec; |
| now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); |
| system_time = timespec64_add(now, tk->wall_to_monotonic); |
| |
| /* |
| * We only take the expensive HV call when the clock was set |
| * or when the 11 minutes RTC synchronization time elapsed. |
| */ |
| if (!was_set && timespec64_compare(&now, &next_sync) < 0) |
| return NOTIFY_OK; |
| |
| op.cmd = XENPF_settime64; |
| op.u.settime64.mbz = 0; |
| op.u.settime64.secs = now.tv_sec; |
| op.u.settime64.nsecs = now.tv_nsec; |
| op.u.settime64.system_time = timespec64_to_ns(&system_time); |
| (void)HYPERVISOR_platform_op(&op); |
| |
| /* |
| * Move the next drift compensation time 11 minutes |
| * ahead. That's emulating the sync_cmos_clock() update for |
| * the hardware RTC. |
| */ |
| next_sync = now; |
| next_sync.tv_sec += 11 * 60; |
| |
| return NOTIFY_OK; |
| } |
| |
| static struct notifier_block xen_pvclock_gtod_notifier = { |
| .notifier_call = xen_pvclock_gtod_notify, |
| }; |
| |
| static int xen_starting_cpu(unsigned int cpu) |
| { |
| struct vcpu_register_vcpu_info info; |
| struct vcpu_info *vcpup; |
| int err; |
| |
| /* |
| * VCPUOP_register_vcpu_info cannot be called twice for the same |
| * vcpu, so if vcpu_info is already registered, just get out. This |
| * can happen with cpu-hotplug. |
| */ |
| if (per_cpu(xen_vcpu, cpu) != NULL) |
| goto after_register_vcpu_info; |
| |
| pr_info("Xen: initializing cpu%d\n", cpu); |
| vcpup = per_cpu_ptr(xen_vcpu_info, cpu); |
| |
| info.mfn = percpu_to_gfn(vcpup); |
| info.offset = xen_offset_in_page(vcpup); |
| |
| err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), |
| &info); |
| BUG_ON(err); |
| per_cpu(xen_vcpu, cpu) = vcpup; |
| |
| if (!xen_kernel_unmapped_at_usr()) |
| xen_setup_runstate_info(cpu); |
| |
| after_register_vcpu_info: |
| enable_percpu_irq(xen_events_irq, 0); |
| return 0; |
| } |
| |
| static int xen_dying_cpu(unsigned int cpu) |
| { |
| disable_percpu_irq(xen_events_irq); |
| return 0; |
| } |
| |
| void xen_reboot(int reason) |
| { |
| struct sched_shutdown r = { .reason = reason }; |
| int rc; |
| |
| rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); |
| BUG_ON(rc); |
| } |
| |
| static int xen_restart(struct notifier_block *nb, unsigned long action, |
| void *data) |
| { |
| xen_reboot(SHUTDOWN_reboot); |
| |
| return NOTIFY_DONE; |
| } |
| |
| static struct notifier_block xen_restart_nb = { |
| .notifier_call = xen_restart, |
| .priority = 192, |
| }; |
| |
| static void xen_power_off(void) |
| { |
| xen_reboot(SHUTDOWN_poweroff); |
| } |
| |
| static irqreturn_t xen_arm_callback(int irq, void *arg) |
| { |
| xen_evtchn_do_upcall(); |
| return IRQ_HANDLED; |
| } |
| |
| static __initdata struct { |
| const char *compat; |
| const char *prefix; |
| const char *version; |
| bool found; |
| } hyper_node = {"xen,xen", "xen,xen-", NULL, false}; |
| |
| static int __init fdt_find_hyper_node(unsigned long node, const char *uname, |
| int depth, void *data) |
| { |
| const void *s = NULL; |
| int len; |
| |
| if (depth != 1 || strcmp(uname, "hypervisor") != 0) |
| return 0; |
| |
| if (of_flat_dt_is_compatible(node, hyper_node.compat)) |
| hyper_node.found = true; |
| |
| s = of_get_flat_dt_prop(node, "compatible", &len); |
| if (strlen(hyper_node.prefix) + 3 < len && |
| !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix))) |
| hyper_node.version = s + strlen(hyper_node.prefix); |
| |
| /* |
| * Check if Xen supports EFI by checking whether there is the |
| * "/hypervisor/uefi" node in DT. If so, runtime services are available |
| * through proxy functions (e.g. in case of Xen dom0 EFI implementation |
| * they call special hypercall which executes relevant EFI functions) |
| * and that is why they are always enabled. |
| */ |
| if (IS_ENABLED(CONFIG_XEN_EFI)) { |
| if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) && |
| !efi_runtime_disabled()) |
| set_bit(EFI_RUNTIME_SERVICES, &efi.flags); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * see Documentation/devicetree/bindings/arm/xen.txt for the |
| * documentation of the Xen Device Tree format. |
| */ |
| void __init xen_early_init(void) |
| { |
| of_scan_flat_dt(fdt_find_hyper_node, NULL); |
| if (!hyper_node.found) { |
| pr_debug("No Xen support\n"); |
| return; |
| } |
| |
| if (hyper_node.version == NULL) { |
| pr_debug("Xen version not found\n"); |
| return; |
| } |
| |
| pr_info("Xen %s support found\n", hyper_node.version); |
| |
| xen_domain_type = XEN_HVM_DOMAIN; |
| |
| xen_setup_features(); |
| |
| if (xen_feature(XENFEAT_dom0)) |
| xen_start_flags |= SIF_INITDOMAIN|SIF_PRIVILEGED; |
| |
| if (!console_set_on_cmdline && !xen_initial_domain()) |
| add_preferred_console("hvc", 0, NULL); |
| } |
| |
| static void __init xen_acpi_guest_init(void) |
| { |
| #ifdef CONFIG_ACPI |
| struct xen_hvm_param a; |
| int interrupt, trigger, polarity; |
| |
| a.domid = DOMID_SELF; |
| a.index = HVM_PARAM_CALLBACK_IRQ; |
| |
| if (HYPERVISOR_hvm_op(HVMOP_get_param, &a) |
| || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) { |
| xen_events_irq = 0; |
| return; |
| } |
| |
| interrupt = a.value & 0xff; |
| trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE |
| : ACPI_LEVEL_SENSITIVE; |
| polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW |
| : ACPI_ACTIVE_HIGH; |
| xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity); |
| #endif |
| } |
| |
| #ifdef CONFIG_XEN_UNPOPULATED_ALLOC |
| /* |
| * A type-less specific Xen resource which contains extended regions |
| * (unused regions of guest physical address space provided by the hypervisor). |
| */ |
| static struct resource xen_resource = { |
| .name = "Xen unused space", |
| }; |
| |
| int __init arch_xen_unpopulated_init(struct resource **res) |
| { |
| struct device_node *np; |
| struct resource *regs, *tmp_res; |
| uint64_t min_gpaddr = -1, max_gpaddr = 0; |
| unsigned int i, nr_reg = 0; |
| int rc; |
| |
| if (!xen_domain()) |
| return -ENODEV; |
| |
| if (!acpi_disabled) |
| return -ENODEV; |
| |
| np = of_find_compatible_node(NULL, NULL, "xen,xen"); |
| if (WARN_ON(!np)) |
| return -ENODEV; |
| |
| /* Skip region 0 which is reserved for grant table space */ |
| while (of_get_address(np, nr_reg + EXT_REGION_INDEX, NULL, NULL)) |
| nr_reg++; |
| |
| if (!nr_reg) { |
| pr_err("No extended regions are found\n"); |
| of_node_put(np); |
| return -EINVAL; |
| } |
| |
| regs = kcalloc(nr_reg, sizeof(*regs), GFP_KERNEL); |
| if (!regs) { |
| of_node_put(np); |
| return -ENOMEM; |
| } |
| |
| /* |
| * Create resource from extended regions provided by the hypervisor to be |
| * used as unused address space for Xen scratch pages. |
| */ |
| for (i = 0; i < nr_reg; i++) { |
| rc = of_address_to_resource(np, i + EXT_REGION_INDEX, ®s[i]); |
| if (rc) |
| goto err; |
| |
| if (max_gpaddr < regs[i].end) |
| max_gpaddr = regs[i].end; |
| if (min_gpaddr > regs[i].start) |
| min_gpaddr = regs[i].start; |
| } |
| |
| xen_resource.start = min_gpaddr; |
| xen_resource.end = max_gpaddr; |
| |
| /* |
| * Mark holes between extended regions as unavailable. The rest of that |
| * address space will be available for the allocation. |
| */ |
| for (i = 1; i < nr_reg; i++) { |
| resource_size_t start, end; |
| |
| /* There is an overlap between regions */ |
| if (regs[i - 1].end + 1 > regs[i].start) { |
| rc = -EINVAL; |
| goto err; |
| } |
| |
| /* There is no hole between regions */ |
| if (regs[i - 1].end + 1 == regs[i].start) |
| continue; |
| |
| start = regs[i - 1].end + 1; |
| end = regs[i].start - 1; |
| |
| tmp_res = kzalloc(sizeof(*tmp_res), GFP_KERNEL); |
| if (!tmp_res) { |
| rc = -ENOMEM; |
| goto err; |
| } |
| |
| tmp_res->name = "Unavailable space"; |
| tmp_res->start = start; |
| tmp_res->end = end; |
| |
| rc = insert_resource(&xen_resource, tmp_res); |
| if (rc) { |
| pr_err("Cannot insert resource %pR (%d)\n", tmp_res, rc); |
| kfree(tmp_res); |
| goto err; |
| } |
| } |
| |
| *res = &xen_resource; |
| |
| err: |
| of_node_put(np); |
| kfree(regs); |
| return rc; |
| } |
| #endif |
| |
| static void __init xen_dt_guest_init(void) |
| { |
| struct device_node *xen_node; |
| struct resource res; |
| |
| xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); |
| if (!xen_node) { |
| pr_err("Xen support was detected before, but it has disappeared\n"); |
| return; |
| } |
| |
| xen_events_irq = irq_of_parse_and_map(xen_node, 0); |
| |
| if (of_address_to_resource(xen_node, GRANT_TABLE_INDEX, &res)) { |
| pr_err("Xen grant table region is not found\n"); |
| of_node_put(xen_node); |
| return; |
| } |
| of_node_put(xen_node); |
| xen_grant_frames = res.start; |
| } |
| |
| static int __init xen_guest_init(void) |
| { |
| struct xen_add_to_physmap xatp; |
| struct shared_info *shared_info_page = NULL; |
| int rc, cpu; |
| |
| if (!xen_domain()) |
| return 0; |
| |
| if (IS_ENABLED(CONFIG_XEN_VIRTIO)) |
| virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc); |
| |
| if (!acpi_disabled) |
| xen_acpi_guest_init(); |
| else |
| xen_dt_guest_init(); |
| |
| if (!xen_events_irq) { |
| pr_err("Xen event channel interrupt not found\n"); |
| return -ENODEV; |
| } |
| |
| /* |
| * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI |
| * parameters are found. Force enable runtime services. |
| */ |
| if (efi_enabled(EFI_RUNTIME_SERVICES)) |
| xen_efi_runtime_setup(); |
| |
| shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL); |
| |
| if (!shared_info_page) { |
| pr_err("not enough memory\n"); |
| return -ENOMEM; |
| } |
| xatp.domid = DOMID_SELF; |
| xatp.idx = 0; |
| xatp.space = XENMAPSPACE_shared_info; |
| xatp.gpfn = virt_to_gfn(shared_info_page); |
| if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) |
| BUG(); |
| |
| HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; |
| |
| /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info |
| * page, we use it in the event channel upcall and in some pvclock |
| * related functions. |
| * The shared info contains exactly 1 CPU (the boot CPU). The guest |
| * is required to use VCPUOP_register_vcpu_info to place vcpu info |
| * for secondary CPUs as they are brought up. |
| * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. |
| */ |
| xen_vcpu_info = alloc_percpu(struct vcpu_info); |
| if (xen_vcpu_info == NULL) |
| return -ENOMEM; |
| |
| /* Direct vCPU id mapping for ARM guests. */ |
| for_each_possible_cpu(cpu) |
| per_cpu(xen_vcpu_id, cpu) = cpu; |
| |
| if (!xen_grant_frames) { |
| xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); |
| rc = xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, |
| &xen_auto_xlat_grant_frames.vaddr, |
| xen_auto_xlat_grant_frames.count); |
| } else |
| rc = gnttab_setup_auto_xlat_frames(xen_grant_frames); |
| if (rc) { |
| free_percpu(xen_vcpu_info); |
| return rc; |
| } |
| gnttab_init(); |
| |
| /* |
| * Making sure board specific code will not set up ops for |
| * cpu idle and cpu freq. |
| */ |
| disable_cpuidle(); |
| disable_cpufreq(); |
| |
| xen_init_IRQ(); |
| |
| if (request_percpu_irq(xen_events_irq, xen_arm_callback, |
| "events", &xen_vcpu)) { |
| pr_err("Error request IRQ %d\n", xen_events_irq); |
| return -EINVAL; |
| } |
| |
| if (!xen_kernel_unmapped_at_usr()) |
| xen_time_setup_guest(); |
| |
| if (xen_initial_domain()) |
| pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); |
| |
| return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING, |
| "arm/xen:starting", xen_starting_cpu, |
| xen_dying_cpu); |
| } |
| early_initcall(xen_guest_init); |
| |
| static int __init xen_pm_init(void) |
| { |
| if (!xen_domain()) |
| return -ENODEV; |
| |
| pm_power_off = xen_power_off; |
| register_restart_handler(&xen_restart_nb); |
| if (!xen_initial_domain()) { |
| struct timespec64 ts; |
| xen_read_wallclock(&ts); |
| do_settimeofday64(&ts); |
| } |
| |
| return 0; |
| } |
| late_initcall(xen_pm_init); |
| |
| |
| /* empty stubs */ |
| void xen_arch_pre_suspend(void) { } |
| void xen_arch_post_suspend(int suspend_cancelled) { } |
| void xen_timer_resume(void) { } |
| void xen_arch_resume(void) { } |
| void xen_arch_suspend(void) { } |
| |
| |
| /* In the hypercall.S file. */ |
| EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_console_io); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_sched_op); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_hvm_op); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op_raw); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); |
| EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); |
| EXPORT_SYMBOL_GPL(privcmd_call); |