blob: d509a2489a5d7fb635f0c41c98389c07085d179b [file] [log] [blame]
/*
* Initialize machine setup information
*
* Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com>
* Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com>
*
* This work is licensed under the terms of the GNU LGPL, version 2.
*/
#include "libcflat.h"
#include "fwcfg.h"
#include "alloc_phys.h"
#include "argv.h"
#include "desc.h"
#include "apic.h"
#include "apic-defs.h"
#include "asm/setup.h"
#include "atomic.h"
#include "pmu.h"
#include "processor.h"
#include "smp.h"
extern char edata;
struct mbi_bootinfo {
u32 flags;
u32 mem_lower;
u32 mem_upper;
u32 boot_device;
u32 cmdline;
u32 mods_count;
u32 mods_addr;
u32 reserved[4]; /* 28-43 */
u32 mmap_length;
u32 mmap_addr;
u32 reserved0[3]; /* 52-63 */
u32 bootloader;
u32 reserved1[5]; /* 68-87 */
u32 size;
};
struct mbi_module {
u32 start, end;
u32 cmdline;
u32 unused;
};
struct mbi_mem {
u32 size;
u64 base_addr;
u64 length;
u32 type;
} __attribute__((packed));
#define ENV_SIZE 16384
void setup_env(char *env, int size);
void setup_multiboot(struct mbi_bootinfo *bootinfo);
void setup_libcflat(void);
char *initrd;
u32 initrd_size;
static char env[ENV_SIZE];
static struct mbi_bootinfo *bootinfo;
#define HUGEPAGE_SIZE (1 << 21)
#ifdef __x86_64__
void find_highmem(void)
{
/* Memory above 4 GB is only supported on 64-bit systems. */
if (!(bootinfo->flags & 64))
return;
u64 upper_end = bootinfo->mem_upper * 1024ull;
u64 best_start = (uintptr_t) &edata;
u64 best_end = upper_end;
u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM);
if (max_end == 0)
max_end = -1ull;
bool found = false;
uintptr_t mmap = bootinfo->mmap_addr;
while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) {
struct mbi_mem *mem = (void *)mmap;
mmap += mem->size + 4;
if (mem->type != 1)
continue;
if (mem->base_addr <= (uintptr_t) &edata ||
(mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end))
continue;
if (mem->length < best_end - best_start)
continue;
if (mem->base_addr >= max_end)
continue;
best_start = mem->base_addr;
best_end = mem->base_addr + mem->length;
if (best_end > max_end)
best_end = max_end;
found = true;
}
if (found) {
best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE;
best_end = best_end & -HUGEPAGE_SIZE;
phys_alloc_init(best_start, best_end - best_start);
}
}
/* Setup TSS for the current processor, and return TSS offset within GDT */
unsigned long setup_tss(u8 *stacktop)
{
u32 id;
tss64_t *tss_entry;
id = pre_boot_apic_id();
/* Runtime address of current TSS */
tss_entry = &tss[id];
/* Update TSS */
memset((void *)tss_entry, 0, sizeof(tss64_t));
/* Update TSS descriptors; each descriptor takes up 2 entries */
set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0);
return TSS_MAIN + id * 16;
}
#else
/* Setup TSS for the current processor, and return TSS offset within GDT */
unsigned long setup_tss(u8 *stacktop)
{
u32 id;
tss32_t *tss_entry;
id = pre_boot_apic_id();
/* Runtime address of current TSS */
tss_entry = &tss[id];
/* Update TSS */
memset((void *)tss_entry, 0, sizeof(tss32_t));
tss_entry->ss0 = KERNEL_DS;
/* Update descriptors for TSS and percpu data segment. */
set_gdt_entry(TSS_MAIN + id * 8,
(unsigned long)tss_entry, 0xffff, 0x89, 0);
set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8,
(unsigned long)stacktop - 4096, 0xfffff, 0x93, 0xc0);
return TSS_MAIN + id * 8;
}
#endif
void setup_multiboot(struct mbi_bootinfo *bi)
{
struct mbi_module *mods;
bootinfo = bi;
u64 best_start = (uintptr_t) &edata;
u64 best_end = bootinfo->mem_upper * 1024ull;
phys_alloc_init(best_start, best_end - best_start);
if (bootinfo->mods_count != 1)
return;
mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr;
initrd = (char *)(uintptr_t) mods->start;
initrd_size = mods->end - mods->start;
}
static void setup_gdt_tss(void)
{
size_t tss_offset;
/* 64-bit setup_tss does not use the stacktop argument. */
tss_offset = setup_tss(NULL);
load_gdt_tss(tss_offset);
}
#ifdef CONFIG_EFI
static struct percpu_data __percpu_data[MAX_TEST_CPUS];
static void setup_segments64(void)
{
/* Update data segments */
write_ds(KERNEL_DS);
write_es(KERNEL_DS);
write_fs(KERNEL_DS);
write_gs(KERNEL_DS);
write_ss(KERNEL_DS);
/*
* Update the code segment by putting it on the stack before the return
* address, then doing a far return: this will use the new code segment
* along with the address.
*/
asm volatile("pushq %1\n\t"
"lea 1f(%%rip), %0\n\t"
"pushq %0\n\t"
"lretq\n\t"
"1:"
:: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS));
}
static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo)
{
int i;
unsigned long free_mem_pages = 0;
unsigned long free_mem_start = 0;
struct efi_boot_memmap *map = &(efi_bootinfo->mem_map);
efi_memory_desc_t *buffer = *map->map;
efi_memory_desc_t *d = NULL;
/*
* The 'buffer' contains multiple descriptors that describe memory
* regions maintained by UEFI. This code records the largest free
* EFI_CONVENTIONAL_MEMORY region which will be used to set up the
* memory allocator, so that the memory allocator can work in the
* largest free continuous memory region.
*/
for (i = 0; i < *(map->map_size); i += *(map->desc_size)) {
d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]);
if (d->type == EFI_CONVENTIONAL_MEMORY) {
if (free_mem_pages < d->num_pages) {
free_mem_pages = d->num_pages;
free_mem_start = d->phys_addr;
}
}
}
if (free_mem_pages == 0) {
return EFI_OUT_OF_RESOURCES;
}
phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT);
return EFI_SUCCESS;
}
static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo)
{
efi_status_t status;
struct acpi_table_rsdp *rsdp;
/*
* RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used
* by kvm-unit-tests x86's memory allocator. So it is not necessary to
* copy the data structure to another memory region to prevent
* unintentional overwrite.
*/
status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp);
if (status != EFI_SUCCESS) {
return status;
}
set_efi_rsdp(rsdp);
return EFI_SUCCESS;
}
/* Defined in cstart64.S or efistart64.S */
extern u8 ptl4;
extern u8 ptl3;
extern u8 ptl2;
static void setup_page_table(void)
{
pgd_t *curr_pt;
phys_addr_t flags;
int i;
/* Set default flags */
flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
/* Set AMD SEV C-Bit for page table entries */
flags |= get_amd_sev_c_bit_mask();
/* Level 4 */
curr_pt = (pgd_t *)&ptl4;
curr_pt[0] = ((phys_addr_t)&ptl3) | flags;
/* Level 3 */
curr_pt = (pgd_t *)&ptl3;
for (i = 0; i < 4; i++) {
curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags;
}
/* Level 2 */
curr_pt = (pgd_t *)&ptl2;
flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK;
for (i = 0; i < 4 * 512; i++) {
curr_pt[i] = ((phys_addr_t) i << 21) | flags;
}
if (amd_sev_es_enabled()) {
setup_ghcb_pte((pgd_t *)&ptl4);
}
/* Load 4-level page table */
write_cr3((ulong)&ptl4);
}
efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo)
{
efi_status_t status;
const char *phase;
status = setup_memory_allocator(efi_bootinfo);
if (status != EFI_SUCCESS) {
printf("Failed to set up memory allocator: ");
switch (status) {
case EFI_OUT_OF_RESOURCES:
printf("No free memory region\n");
break;
default:
printf("Unknown error\n");
break;
}
return status;
}
status = setup_rsdp(efi_bootinfo);
if (status != EFI_SUCCESS) {
printf("Cannot find RSDP in EFI system table\n");
return status;
}
phase = "AMD SEV";
status = setup_amd_sev();
/* Continue if AMD SEV is not supported, but skip SEV-ES setup */
if (status == EFI_SUCCESS) {
phase = "AMD SEV-ES";
status = setup_amd_sev_es();
}
if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) {
printf("%s setup failed, error = 0x%lx\n", phase, status);
return status;
}
setup_gdt_tss();
setup_segments64();
setup_idt();
load_idt();
/*
* Load GS.base with the per-vCPU data. This must be done after
* loading the IDT as reading the APIC ID may #VC when running
* as an SEV-ES guest
*/
wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]);
/*
* Resetting the APIC sets the per-vCPU APIC ops and so must be
* done after loading GS.base with the per-vCPU data.
*/
reset_apic();
mask_pic_interrupts();
setup_page_table();
enable_apic();
save_id();
bsp_rest_init();
return EFI_SUCCESS;
}
#endif /* CONFIG_EFI */
void setup_libcflat(void)
{
if (initrd) {
/* environ is currently the only file in the initrd */
u32 size = MIN(initrd_size, ENV_SIZE);
const char *str;
memcpy(env, initrd, size);
setup_env(env, size);
if ((str = getenv("BOOTLOADER")) && atol(str) != 0)
add_setup_arg("bootloader");
}
}
void save_id(void)
{
set_bit(apic_id(), online_cpus);
}
void ap_start64(void)
{
setup_gdt_tss();
reset_apic();
load_idt();
save_id();
enable_apic();
enable_x2apic();
ap_online();
}
void bsp_rest_init(void)
{
bringup_aps();
enable_x2apic();
smp_init();
pmu_init();
}