| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Copyright (c) 2014, The Linux Foundation. All rights reserved. |
| */ |
| #include <linux/kernel.h> |
| #include <linux/mm.h> |
| #include <linux/module.h> |
| #include <linux/mem_encrypt.h> |
| #include <linux/sched.h> |
| #include <linux/vmalloc.h> |
| |
| #include <asm/cacheflush.h> |
| #include <asm/pgtable-prot.h> |
| #include <asm/set_memory.h> |
| #include <asm/tlbflush.h> |
| #include <asm/kfence.h> |
| |
| struct page_change_data { |
| pgprot_t set_mask; |
| pgprot_t clear_mask; |
| }; |
| |
| bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED); |
| |
| bool can_set_direct_map(void) |
| { |
| /* |
| * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear |
| * map to be mapped at page granularity, so that it is possible to |
| * protect/unprotect single pages. |
| * |
| * KFENCE pool requires page-granular mapping if initialized late. |
| * |
| * Realms need to make pages shared/protected at page granularity. |
| */ |
| return rodata_full || debug_pagealloc_enabled() || |
| arm64_kfence_can_set_direct_map() || is_realm_world(); |
| } |
| |
| static int change_page_range(pte_t *ptep, unsigned long addr, void *data) |
| { |
| struct page_change_data *cdata = data; |
| pte_t pte = __ptep_get(ptep); |
| |
| pte = clear_pte_bit(pte, cdata->clear_mask); |
| pte = set_pte_bit(pte, cdata->set_mask); |
| |
| __set_pte(ptep, pte); |
| return 0; |
| } |
| |
| /* |
| * This function assumes that the range is mapped with PAGE_SIZE pages. |
| */ |
| static int __change_memory_common(unsigned long start, unsigned long size, |
| pgprot_t set_mask, pgprot_t clear_mask) |
| { |
| struct page_change_data data; |
| int ret; |
| |
| data.set_mask = set_mask; |
| data.clear_mask = clear_mask; |
| |
| ret = apply_to_page_range(&init_mm, start, size, change_page_range, |
| &data); |
| |
| /* |
| * If the memory is being made valid without changing any other bits |
| * then a TLBI isn't required as a non-valid entry cannot be cached in |
| * the TLB. |
| */ |
| if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask)) |
| flush_tlb_kernel_range(start, start + size); |
| return ret; |
| } |
| |
| static int change_memory_common(unsigned long addr, int numpages, |
| pgprot_t set_mask, pgprot_t clear_mask) |
| { |
| unsigned long start = addr; |
| unsigned long size = PAGE_SIZE * numpages; |
| unsigned long end = start + size; |
| struct vm_struct *area; |
| int i; |
| |
| if (!PAGE_ALIGNED(addr)) { |
| start &= PAGE_MASK; |
| end = start + size; |
| WARN_ON_ONCE(1); |
| } |
| |
| /* |
| * Kernel VA mappings are always live, and splitting live section |
| * mappings into page mappings may cause TLB conflicts. This means |
| * we have to ensure that changing the permission bits of the range |
| * we are operating on does not result in such splitting. |
| * |
| * Let's restrict ourselves to mappings created by vmalloc (or vmap). |
| * Those are guaranteed to consist entirely of page mappings, and |
| * splitting is never needed. |
| * |
| * So check whether the [addr, addr + size) interval is entirely |
| * covered by precisely one VM area that has the VM_ALLOC flag set. |
| */ |
| area = find_vm_area((void *)addr); |
| if (!area || |
| end > (unsigned long)kasan_reset_tag(area->addr) + area->size || |
| !(area->flags & VM_ALLOC)) |
| return -EINVAL; |
| |
| if (!numpages) |
| return 0; |
| |
| /* |
| * If we are manipulating read-only permissions, apply the same |
| * change to the linear mapping of the pages that back this VM area. |
| */ |
| if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || |
| pgprot_val(clear_mask) == PTE_RDONLY)) { |
| for (i = 0; i < area->nr_pages; i++) { |
| __change_memory_common((u64)page_address(area->pages[i]), |
| PAGE_SIZE, set_mask, clear_mask); |
| } |
| } |
| |
| /* |
| * Get rid of potentially aliasing lazily unmapped vm areas that may |
| * have permissions set that deviate from the ones we are setting here. |
| */ |
| vm_unmap_aliases(); |
| |
| return __change_memory_common(start, size, set_mask, clear_mask); |
| } |
| |
| int set_memory_ro(unsigned long addr, int numpages) |
| { |
| return change_memory_common(addr, numpages, |
| __pgprot(PTE_RDONLY), |
| __pgprot(PTE_WRITE)); |
| } |
| |
| int set_memory_rw(unsigned long addr, int numpages) |
| { |
| return change_memory_common(addr, numpages, |
| __pgprot(PTE_WRITE), |
| __pgprot(PTE_RDONLY)); |
| } |
| |
| int set_memory_nx(unsigned long addr, int numpages) |
| { |
| return change_memory_common(addr, numpages, |
| __pgprot(PTE_PXN), |
| __pgprot(PTE_MAYBE_GP)); |
| } |
| |
| int set_memory_x(unsigned long addr, int numpages) |
| { |
| return change_memory_common(addr, numpages, |
| __pgprot(PTE_MAYBE_GP), |
| __pgprot(PTE_PXN)); |
| } |
| |
| int set_memory_valid(unsigned long addr, int numpages, int enable) |
| { |
| if (enable) |
| return __change_memory_common(addr, PAGE_SIZE * numpages, |
| __pgprot(PTE_VALID), |
| __pgprot(0)); |
| else |
| return __change_memory_common(addr, PAGE_SIZE * numpages, |
| __pgprot(0), |
| __pgprot(PTE_VALID)); |
| } |
| |
| int set_direct_map_invalid_noflush(struct page *page) |
| { |
| struct page_change_data data = { |
| .set_mask = __pgprot(0), |
| .clear_mask = __pgprot(PTE_VALID), |
| }; |
| |
| if (!can_set_direct_map()) |
| return 0; |
| |
| return apply_to_page_range(&init_mm, |
| (unsigned long)page_address(page), |
| PAGE_SIZE, change_page_range, &data); |
| } |
| |
| int set_direct_map_default_noflush(struct page *page) |
| { |
| struct page_change_data data = { |
| .set_mask = __pgprot(PTE_VALID | PTE_WRITE), |
| .clear_mask = __pgprot(PTE_RDONLY), |
| }; |
| |
| if (!can_set_direct_map()) |
| return 0; |
| |
| return apply_to_page_range(&init_mm, |
| (unsigned long)page_address(page), |
| PAGE_SIZE, change_page_range, &data); |
| } |
| |
| static int __set_memory_enc_dec(unsigned long addr, |
| int numpages, |
| bool encrypt) |
| { |
| unsigned long set_prot = 0, clear_prot = 0; |
| phys_addr_t start, end; |
| int ret; |
| |
| if (!is_realm_world()) |
| return 0; |
| |
| if (!__is_lm_address(addr)) |
| return -EINVAL; |
| |
| start = __virt_to_phys(addr); |
| end = start + numpages * PAGE_SIZE; |
| |
| if (encrypt) |
| clear_prot = PROT_NS_SHARED; |
| else |
| set_prot = PROT_NS_SHARED; |
| |
| /* |
| * Break the mapping before we make any changes to avoid stale TLB |
| * entries or Synchronous External Aborts caused by RIPAS_EMPTY |
| */ |
| ret = __change_memory_common(addr, PAGE_SIZE * numpages, |
| __pgprot(set_prot), |
| __pgprot(clear_prot | PTE_VALID)); |
| |
| if (ret) |
| return ret; |
| |
| if (encrypt) |
| ret = rsi_set_memory_range_protected(start, end); |
| else |
| ret = rsi_set_memory_range_shared(start, end); |
| |
| if (ret) |
| return ret; |
| |
| return __change_memory_common(addr, PAGE_SIZE * numpages, |
| __pgprot(PTE_VALID), |
| __pgprot(0)); |
| } |
| |
| static int realm_set_memory_encrypted(unsigned long addr, int numpages) |
| { |
| int ret = __set_memory_enc_dec(addr, numpages, true); |
| |
| /* |
| * If the request to change state fails, then the only sensible cause |
| * of action for the caller is to leak the memory |
| */ |
| WARN(ret, "Failed to encrypt memory, %d pages will be leaked", |
| numpages); |
| |
| return ret; |
| } |
| |
| static int realm_set_memory_decrypted(unsigned long addr, int numpages) |
| { |
| int ret = __set_memory_enc_dec(addr, numpages, false); |
| |
| WARN(ret, "Failed to decrypt memory, %d pages will be leaked", |
| numpages); |
| |
| return ret; |
| } |
| |
| static const struct arm64_mem_crypt_ops realm_crypt_ops = { |
| .encrypt = realm_set_memory_encrypted, |
| .decrypt = realm_set_memory_decrypted, |
| }; |
| |
| int realm_register_memory_enc_ops(void) |
| { |
| return arm64_mem_crypt_ops_register(&realm_crypt_ops); |
| } |
| |
| int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) |
| { |
| unsigned long addr = (unsigned long)page_address(page); |
| |
| if (!can_set_direct_map()) |
| return 0; |
| |
| return set_memory_valid(addr, nr, valid); |
| } |
| |
| #ifdef CONFIG_DEBUG_PAGEALLOC |
| /* |
| * This is - apart from the return value - doing the same |
| * thing as the new set_direct_map_valid_noflush() function. |
| * |
| * Unify? Explain the conceptual differences? |
| */ |
| void __kernel_map_pages(struct page *page, int numpages, int enable) |
| { |
| if (!can_set_direct_map()) |
| return; |
| |
| set_memory_valid((unsigned long)page_address(page), numpages, enable); |
| } |
| #endif /* CONFIG_DEBUG_PAGEALLOC */ |
| |
| /* |
| * This function is used to determine if a linear map page has been marked as |
| * not-valid. Walk the page table and check the PTE_VALID bit. |
| * |
| * Because this is only called on the kernel linear map, p?d_sect() implies |
| * p?d_present(). When debug_pagealloc is enabled, sections mappings are |
| * disabled. |
| */ |
| bool kernel_page_present(struct page *page) |
| { |
| pgd_t *pgdp; |
| p4d_t *p4dp; |
| pud_t *pudp, pud; |
| pmd_t *pmdp, pmd; |
| pte_t *ptep; |
| unsigned long addr = (unsigned long)page_address(page); |
| |
| pgdp = pgd_offset_k(addr); |
| if (pgd_none(READ_ONCE(*pgdp))) |
| return false; |
| |
| p4dp = p4d_offset(pgdp, addr); |
| if (p4d_none(READ_ONCE(*p4dp))) |
| return false; |
| |
| pudp = pud_offset(p4dp, addr); |
| pud = READ_ONCE(*pudp); |
| if (pud_none(pud)) |
| return false; |
| if (pud_sect(pud)) |
| return true; |
| |
| pmdp = pmd_offset(pudp, addr); |
| pmd = READ_ONCE(*pmdp); |
| if (pmd_none(pmd)) |
| return false; |
| if (pmd_sect(pmd)) |
| return true; |
| |
| ptep = pte_offset_kernel(pmdp, addr); |
| return pte_valid(__ptep_get(ptep)); |
| } |