riscv: Only flush the mm icache when setting an exec pte
We used to emit a flush_icache_all() whenever a dirty executable
mapping is set in the page table but we can instead call
flush_icache_mm() which will only send IPIs to cores that currently run
this mm and add a deferred icache flush to the others.
The number of calls to sbi_remote_fence_i() (tested without IPI
support):
With a simple buildroot rootfs:
* Before: ~5k
* After : 4 (!)
Tested on HW, the boot to login is ~4.5% faster.
With an ubuntu rootfs:
* Before: ~24k
* After : ~13k
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20240202124711.256146-1-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 9c45810..b2e6965 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -513,12 +513,12 @@
WRITE_ONCE(*ptep, pteval);
}
-void flush_icache_pte(pte_t pte);
+void flush_icache_pte(struct mm_struct *mm, pte_t pte);
-static inline void __set_pte_at(pte_t *ptep, pte_t pteval)
+static inline void __set_pte_at(struct mm_struct *mm, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
- flush_icache_pte(pteval);
+ flush_icache_pte(mm, pteval);
set_pte(ptep, pteval);
}
@@ -529,7 +529,7 @@
page_table_check_ptes_set(mm, ptep, pteval, nr);
for (;;) {
- __set_pte_at(ptep, pteval);
+ __set_pte_at(mm, ptep, pteval);
if (--nr == 0)
break;
ptep++;
@@ -541,7 +541,7 @@
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- __set_pte_at(ptep, __pte(0));
+ __set_pte_at(mm, ptep, __pte(0));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */
@@ -713,14 +713,14 @@
pmd_t *pmdp, pmd_t pmd)
{
page_table_check_pmd_set(mm, pmdp, pmd);
- return __set_pte_at((pte_t *)pmdp, pmd_pte(pmd));
+ return __set_pte_at(mm, (pte_t *)pmdp, pmd_pte(pmd));
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
page_table_check_pud_set(mm, pudp, pud);
- return __set_pte_at((pte_t *)pudp, pud_pte(pud));
+ return __set_pte_at(mm, (pte_t *)pudp, pud_pte(pud));
}
#ifdef CONFIG_PAGE_TABLE_CHECK
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 55a34f2..bc61ee5 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -82,12 +82,12 @@
#endif /* CONFIG_SMP */
#ifdef CONFIG_MMU
-void flush_icache_pte(pte_t pte)
+void flush_icache_pte(struct mm_struct *mm, pte_t pte)
{
struct folio *folio = page_folio(pte_page(pte));
if (!test_bit(PG_dcache_clean, &folio->flags)) {
- flush_icache_all();
+ flush_icache_mm(mm, false);
set_bit(PG_dcache_clean, &folio->flags);
}
}
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index ef887ef..533ec90 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -10,7 +10,7 @@
pte_t entry, int dirty)
{
if (!pte_same(ptep_get(ptep), entry))
- __set_pte_at(ptep, entry);
+ __set_pte_at(vma->vm_mm, ptep, entry);
/*
* update_mmu_cache will unconditionally execute, handling both
* the case that the PTE changed and the spurious fault case.