| /* SPDX-License-Identifier: GPL-2.0-only */ |
| /* |
| * relocate_kernel.S - put the kernel image in place to boot |
| * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> |
| */ |
| |
| #include <linux/linkage.h> |
| #include <linux/stringify.h> |
| #include <asm/alternative.h> |
| #include <asm/page_types.h> |
| #include <asm/kexec.h> |
| #include <asm/processor-flags.h> |
| #include <asm/pgtable_types.h> |
| #include <asm/nospec-branch.h> |
| #include <asm/unwind_hints.h> |
| |
| /* |
| * Must be relocatable PIC code callable as a C function, in particular |
| * there must be a plain RET and not jump to return thunk. |
| */ |
| |
| #define PTR(x) (x << 3) |
| #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
| |
| /* |
| * control_page + KEXEC_CONTROL_CODE_MAX_SIZE |
| * ~ control_page + PAGE_SIZE are used as data storage and stack for |
| * jumping back |
| */ |
| #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) |
| |
| /* Minimal CPU state */ |
| #define RSP DATA(0x0) |
| #define CR0 DATA(0x8) |
| #define CR3 DATA(0x10) |
| #define CR4 DATA(0x18) |
| |
| /* other data */ |
| #define CP_PA_TABLE_PAGE DATA(0x20) |
| #define CP_PA_SWAP_PAGE DATA(0x28) |
| #define CP_PA_BACKUP_PAGES_MAP DATA(0x30) |
| |
| .text |
| .align PAGE_SIZE |
| .code64 |
| SYM_CODE_START_NOALIGN(relocate_range) |
| SYM_CODE_START_NOALIGN(relocate_kernel) |
| UNWIND_HINT_END_OF_STACK |
| ANNOTATE_NOENDBR |
| /* |
| * %rdi indirection_page |
| * %rsi page_list |
| * %rdx start address |
| * %rcx preserve_context |
| * %r8 host_mem_enc_active |
| */ |
| |
| /* Save the CPU context, used for jumping back */ |
| pushq %rbx |
| pushq %rbp |
| pushq %r12 |
| pushq %r13 |
| pushq %r14 |
| pushq %r15 |
| pushf |
| |
| movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 |
| movq %rsp, RSP(%r11) |
| movq %cr0, %rax |
| movq %rax, CR0(%r11) |
| movq %cr3, %rax |
| movq %rax, CR3(%r11) |
| movq %cr4, %rax |
| movq %rax, CR4(%r11) |
| |
| /* Save CR4. Required to enable the right paging mode later. */ |
| movq %rax, %r13 |
| |
| /* zero out flags, and disable interrupts */ |
| pushq $0 |
| popfq |
| |
| /* Save SME active flag */ |
| movq %r8, %r12 |
| |
| /* |
| * get physical address of control page now |
| * this is impossible after page table switch |
| */ |
| movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 |
| |
| /* get physical address of page table now too */ |
| movq PTR(PA_TABLE_PAGE)(%rsi), %r9 |
| |
| /* get physical address of swap page now */ |
| movq PTR(PA_SWAP_PAGE)(%rsi), %r10 |
| |
| /* save some information for jumping back */ |
| movq %r9, CP_PA_TABLE_PAGE(%r11) |
| movq %r10, CP_PA_SWAP_PAGE(%r11) |
| movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11) |
| |
| /* Switch to the identity mapped page tables */ |
| movq %r9, %cr3 |
| |
| /* setup a new stack at the end of the physical control page */ |
| lea PAGE_SIZE(%r8), %rsp |
| |
| /* jump to identity mapped page */ |
| addq $(identity_mapped - relocate_kernel), %r8 |
| pushq %r8 |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| SYM_CODE_END(relocate_kernel) |
| |
| SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) |
| UNWIND_HINT_END_OF_STACK |
| /* set return address to 0 if not preserving context */ |
| pushq $0 |
| /* store the start address on the stack */ |
| pushq %rdx |
| |
| /* |
| * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP |
| * below. |
| */ |
| movq %cr4, %rax |
| andq $~(X86_CR4_CET), %rax |
| movq %rax, %cr4 |
| |
| /* |
| * Set cr0 to a known state: |
| * - Paging enabled |
| * - Alignment check disabled |
| * - Write protect disabled |
| * - No task switch |
| * - Don't do FP software emulation. |
| * - Protected mode enabled |
| */ |
| movq %cr0, %rax |
| andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax |
| orl $(X86_CR0_PG | X86_CR0_PE), %eax |
| movq %rax, %cr0 |
| |
| /* |
| * Set cr4 to a known state: |
| * - physical address extension enabled |
| * - 5-level paging, if it was enabled before |
| * - Machine check exception on TDX guest, if it was enabled before. |
| * Clearing MCE might not be allowed in TDX guests, depending on setup. |
| * |
| * Use R13 that contains the original CR4 value, read in relocate_kernel(). |
| * PAE is always set in the original CR4. |
| */ |
| andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d |
| ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST |
| movq %r13, %cr4 |
| |
| /* Flush the TLB (needed?) */ |
| movq %r9, %cr3 |
| |
| /* |
| * If SME is active, there could be old encrypted cache line |
| * entries that will conflict with the now unencrypted memory |
| * used by kexec. Flush the caches before copying the kernel. |
| */ |
| testq %r12, %r12 |
| jz .Lsme_off |
| wbinvd |
| .Lsme_off: |
| |
| movq %rcx, %r11 |
| call swap_pages |
| |
| /* |
| * To be certain of avoiding problems with self-modifying code |
| * I need to execute a serializing instruction here. |
| * So I flush the TLB by reloading %cr3 here, it's handy, |
| * and not processor dependent. |
| */ |
| movq %cr3, %rax |
| movq %rax, %cr3 |
| |
| /* |
| * set all of the registers to known values |
| * leave %rsp alone |
| */ |
| |
| testq %r11, %r11 |
| jnz .Lrelocate |
| xorl %eax, %eax |
| xorl %ebx, %ebx |
| xorl %ecx, %ecx |
| xorl %edx, %edx |
| xorl %esi, %esi |
| xorl %edi, %edi |
| xorl %ebp, %ebp |
| xorl %r8d, %r8d |
| xorl %r9d, %r9d |
| xorl %r10d, %r10d |
| xorl %r11d, %r11d |
| xorl %r12d, %r12d |
| xorl %r13d, %r13d |
| xorl %r14d, %r14d |
| xorl %r15d, %r15d |
| |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| |
| .Lrelocate: |
| popq %rdx |
| leaq PAGE_SIZE(%r10), %rsp |
| ANNOTATE_RETPOLINE_SAFE |
| call *%rdx |
| |
| /* get the re-entry point of the peer system */ |
| movq 0(%rsp), %rbp |
| leaq relocate_kernel(%rip), %r8 |
| movq CP_PA_SWAP_PAGE(%r8), %r10 |
| movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi |
| movq CP_PA_TABLE_PAGE(%r8), %rax |
| movq %rax, %cr3 |
| lea PAGE_SIZE(%r8), %rsp |
| call swap_pages |
| movq $virtual_mapped, %rax |
| pushq %rax |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| SYM_CODE_END(identity_mapped) |
| |
| SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) |
| UNWIND_HINT_END_OF_STACK |
| ANNOTATE_NOENDBR // RET target, above |
| movq RSP(%r8), %rsp |
| movq CR4(%r8), %rax |
| movq %rax, %cr4 |
| movq CR3(%r8), %rax |
| movq CR0(%r8), %r8 |
| movq %rax, %cr3 |
| movq %r8, %cr0 |
| movq %rbp, %rax |
| |
| popf |
| popq %r15 |
| popq %r14 |
| popq %r13 |
| popq %r12 |
| popq %rbp |
| popq %rbx |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| SYM_CODE_END(virtual_mapped) |
| |
| /* Do the copies */ |
| SYM_CODE_START_LOCAL_NOALIGN(swap_pages) |
| UNWIND_HINT_END_OF_STACK |
| movq %rdi, %rcx /* Put the page_list in %rcx */ |
| xorl %edi, %edi |
| xorl %esi, %esi |
| jmp 1f |
| |
| 0: /* top, read another word for the indirection page */ |
| |
| movq (%rbx), %rcx |
| addq $8, %rbx |
| 1: |
| testb $0x1, %cl /* is it a destination page? */ |
| jz 2f |
| movq %rcx, %rdi |
| andq $0xfffffffffffff000, %rdi |
| jmp 0b |
| 2: |
| testb $0x2, %cl /* is it an indirection page? */ |
| jz 2f |
| movq %rcx, %rbx |
| andq $0xfffffffffffff000, %rbx |
| jmp 0b |
| 2: |
| testb $0x4, %cl /* is it the done indicator? */ |
| jz 2f |
| jmp 3f |
| 2: |
| testb $0x8, %cl /* is it the source indicator? */ |
| jz 0b /* Ignore it otherwise */ |
| movq %rcx, %rsi /* For ever source page do a copy */ |
| andq $0xfffffffffffff000, %rsi |
| |
| movq %rdi, %rdx |
| movq %rsi, %rax |
| |
| movq %r10, %rdi |
| movl $512, %ecx |
| rep ; movsq |
| |
| movq %rax, %rdi |
| movq %rdx, %rsi |
| movl $512, %ecx |
| rep ; movsq |
| |
| movq %rdx, %rdi |
| movq %r10, %rsi |
| movl $512, %ecx |
| rep ; movsq |
| |
| lea PAGE_SIZE(%rax), %rsi |
| jmp 0b |
| 3: |
| ANNOTATE_UNRET_SAFE |
| ret |
| int3 |
| SYM_CODE_END(swap_pages) |
| |
| .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc |
| SYM_CODE_END(relocate_range); |