x86: Move ap_init() to smp.c
ap_init() copies the SIPI vector to lowmem, sends INIT/SIPI to APs
and waits on the APs to come up.
Port this routine to C from asm and move it to smp.c to allow sharing
this functionality between the EFI (-fPIC) and non-EFI builds.
Call ap_init() from the EFI setup path to reset the APs to a known
location.
Signed-off-by: Varad Gautam <varad.gautam@suse.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220615232943.1465490-4-seanjc@google.com
diff --git a/lib/alloc_page.h b/lib/alloc_page.h
index eed2ba0..060e041 100644
--- a/lib/alloc_page.h
+++ b/lib/alloc_page.h
@@ -13,6 +13,9 @@
#define AREA_ANY_NUMBER 0xff
+/* The realmode trampoline gets relocated here during bootup. */
+#define RM_TRAMPOLINE_ADDR 0
+
#define AREA_ANY 0x00000
#define AREA_MASK 0x0ffff
diff --git a/lib/x86/setup.c b/lib/x86/setup.c
index 8a4fa3c..2004e7f 100644
--- a/lib/x86/setup.c
+++ b/lib/x86/setup.c
@@ -323,6 +323,7 @@
load_idt();
mask_pic_interrupts();
enable_apic();
+ ap_init();
enable_x2apic();
smp_init();
setup_page_table();
diff --git a/lib/x86/smp.c b/lib/x86/smp.c
index 683b25d..81dacef 100644
--- a/lib/x86/smp.c
+++ b/lib/x86/smp.c
@@ -1,11 +1,16 @@
#include <libcflat.h>
+
+#include <asm/barrier.h>
+
#include "processor.h"
#include "atomic.h"
#include "smp.h"
#include "apic.h"
#include "fwcfg.h"
#include "desc.h"
+#include "alloc_page.h"
+#include "asm/page.h"
#define IPI_VECTOR 0x20
@@ -18,6 +23,13 @@
static volatile bool ipi_wait;
static int _cpu_count;
static atomic_t active_cpus;
+extern u8 rm_trampoline, rm_trampoline_end;
+#ifdef __i386__
+extern u8 ap_rm_gdt_descr;
+#endif
+
+/* The BSP is online from time zero. */
+atomic_t cpu_online_count = { .counter = 1 };
static __attribute__((used)) void ipi(void)
{
@@ -114,8 +126,6 @@
int i;
void ipi_entry(void);
- _cpu_count = fwcfg_get_nb_cpus();
-
setup_idt();
init_apic_map();
set_idt_entry(IPI_VECTOR, ipi_entry, 0);
@@ -142,3 +152,61 @@
atomic_inc(&active_cpus);
}
+
+static void setup_rm_gdt(void)
+{
+#ifdef __i386__
+ struct descriptor_table_ptr *rm_gdt =
+ (struct descriptor_table_ptr *) (&ap_rm_gdt_descr - &rm_trampoline);
+ /*
+ * On i386, place the gdt descriptor to be loaded from SIPI vector right after
+ * the vector code.
+ */
+ sgdt(rm_gdt);
+#endif
+}
+
+void ap_init(void)
+{
+ void *rm_trampoline_dst = RM_TRAMPOLINE_ADDR;
+ size_t rm_trampoline_size = (&rm_trampoline_end - &rm_trampoline) + 1;
+ assert(rm_trampoline_size < PAGE_SIZE);
+
+ asm volatile("cld");
+
+ /*
+ * Fill the trampoline page with with INT3 (0xcc) so that any AP
+ * that goes astray within the first page gets a fault.
+ */
+ memset(rm_trampoline_dst, 0xcc /* INT3 */, PAGE_SIZE);
+
+ memcpy(rm_trampoline_dst, &rm_trampoline, rm_trampoline_size);
+
+ setup_rm_gdt();
+
+#ifdef CONFIG_EFI
+ /*
+ * apic_icr_write() is unusable on CONFIG_EFI until percpu area gets set up.
+ * Use raw writes to APIC_ICR to send IPIs for time being.
+ */
+ volatile u32 *apic_icr = (volatile u32 *) (APIC_DEFAULT_PHYS_BASE + APIC_ICR);
+
+ /* INIT */
+ *apic_icr = APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT;
+
+ /* SIPI */
+ *apic_icr = APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP;
+#else
+ /* INIT */
+ apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT, 0);
+
+ /* SIPI */
+ apic_icr_write(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP, 0);
+#endif
+
+ _cpu_count = fwcfg_get_nb_cpus();
+
+ printf("smp: waiting for %d APs\n", _cpu_count - 1);
+ while (_cpu_count != atomic_read(&cpu_online_count))
+ cpu_relax();
+}
diff --git a/lib/x86/smp.h b/lib/x86/smp.h
index bd303c2..77f8a19 100644
--- a/lib/x86/smp.h
+++ b/lib/x86/smp.h
@@ -3,6 +3,8 @@
#include <stddef.h>
#include <asm/spinlock.h>
+#include "libcflat.h"
+#include "atomic.h"
/* Offsets into the per-cpu page. */
struct percpu_data {
@@ -78,5 +80,8 @@
void on_cpu_async(int cpu, void (*function)(void *data), void *data);
void on_cpus(void (*function)(void *data), void *data);
void smp_reset_apic(void);
+void ap_init(void);
+
+extern atomic_t cpu_online_count;
#endif
diff --git a/x86/cstart.S b/x86/cstart.S
index 8aa29ee..88d25fc 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S
@@ -124,26 +124,7 @@
push %eax
call exit
-ap_init:
- cld
- sgdtl ap_rm_gdt_descr // must be close to rm_trampoline for real mode access to work
- lea rm_trampoline, %esi
- xor %edi, %edi
- mov $(rm_trampoline_end - rm_trampoline), %ecx
- rep movsb
- mov $APIC_DEFAULT_PHYS_BASE, %eax
- movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT), APIC_ICR(%eax)
- movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP), APIC_ICR(%eax)
- call fwcfg_get_nb_cpus
-1: pause
- cmpw %ax, cpu_online_count
- jne 1b
- ret
-
online_cpus:
.fill (max_cpus + 7) / 8, 1, 0
-.align 2
-cpu_online_count: .word 1
-
#include "trampolines.S"
diff --git a/x86/cstart64.S b/x86/cstart64.S
index 129ef0b..f0d12fb 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -2,7 +2,6 @@
#include "apic-defs.h"
.globl online_cpus
-.globl cpu_online_count
ipi_vector = 0x20
@@ -227,21 +226,3 @@
online_cpus:
.fill (max_cpus + 7) / 8, 1, 0
-
-ap_init:
- cld
- lea rm_trampoline, %rsi
- xor %rdi, %rdi
- mov $(rm_trampoline_end - rm_trampoline), %rcx
- rep movsb
- mov $APIC_DEFAULT_PHYS_BASE, %eax
- movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT), APIC_ICR(%rax)
- movl $(APIC_DEST_ALLBUT | APIC_DEST_PHYSICAL | APIC_DM_STARTUP), APIC_ICR(%rax)
- call fwcfg_get_nb_cpus
-1: pause
- cmpw %ax, cpu_online_count
- jne 1b
- ret
-
-.align 2
-cpu_online_count: .word 1
diff --git a/x86/efi/efistart64.S b/x86/efi/efistart64.S
index 017abba..7d50f96 100644
--- a/x86/efi/efistart64.S
+++ b/x86/efi/efistart64.S
@@ -57,3 +57,18 @@
pushq $0x08 /* 2nd entry in gdt64: 64-bit code segment */
pushq %rdi
lretq
+
+.code16
+
+.globl rm_trampoline
+rm_trampoline:
+
+.globl sipi_entry
+sipi_entry:
+ jmp sipi_entry
+
+.globl sipi_end
+sipi_end:
+
+.globl rm_trampoline_end
+rm_trampoline_end:
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index fac8c00..aa26fff 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -15,8 +15,6 @@
#define LATENCY_RUNS 1000000
-extern u16 cpu_online_count;
-
u64 tsc_start;
u64 tsc_end;
@@ -1778,20 +1776,20 @@
on_cpu(1, get_tss_entry, &tss_entry);
- orig_cpu_count = cpu_online_count;
+ orig_cpu_count = atomic_read(&cpu_online_count);
apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_INIT | APIC_INT_ASSERT,
id_map[1]);
delay(100000000ULL);
- --cpu_online_count;
+ atomic_dec(&cpu_online_count);
tss_entry->type &= ~DESC_BUSY;
apic_icr_write(APIC_DEST_PHYSICAL | APIC_DM_STARTUP, id_map[1]);
- for (i = 0; i < 5 && cpu_online_count < orig_cpu_count; i++)
+ for (i = 0; i < 5 && atomic_read(&cpu_online_count) < orig_cpu_count; i++)
delay(100000000ULL);
}
@@ -1802,7 +1800,7 @@
static bool init_startup_check(struct svm_test *test)
{
- return cpu_online_count == orig_cpu_count;
+ return atomic_read(&cpu_online_count) == orig_cpu_count;
}
static volatile bool init_intercept;