| /* SPDX-License-Identifier: GPL-2.0 */ |
| #ifndef _LINUX_PAGEWALK_H |
| #define _LINUX_PAGEWALK_H |
| |
| #include <linux/mm.h> |
| |
| struct mm_walk; |
| |
| /* Locking requirement during a page walk. */ |
| enum page_walk_lock { |
| /* mmap_lock should be locked for read to stabilize the vma tree */ |
| PGWALK_RDLOCK = 0, |
| /* vma will be write-locked during the walk */ |
| PGWALK_WRLOCK = 1, |
| /* vma is expected to be already write-locked during the walk */ |
| PGWALK_WRLOCK_VERIFY = 2, |
| }; |
| |
| /** |
| * struct mm_walk_ops - callbacks for walk_page_range |
| * @pgd_entry: if set, called for each non-empty PGD (top-level) entry |
| * @p4d_entry: if set, called for each non-empty P4D entry |
| * @pud_entry: if set, called for each non-empty PUD entry |
| * @pmd_entry: if set, called for each non-empty PMD entry |
| * this handler is required to be able to handle |
| * pmd_trans_huge() pmds. They may simply choose to |
| * split_huge_page() instead of handling it explicitly. |
| * @pte_entry: if set, called for each PTE (lowest-level) entry, |
| * including empty ones |
| * @pte_hole: if set, called for each hole at all levels, |
| * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. |
| * Any folded depths (where PTRS_PER_P?D is equal to 1) |
| * are skipped. |
| * @hugetlb_entry: if set, called for each hugetlb entry. This hook |
| * function is called with the vma lock held, in order to |
| * protect against a concurrent freeing of the pte_t* or |
| * the ptl. In some cases, the hook function needs to drop |
| * and retake the vma lock in order to avoid deadlocks |
| * while calling other functions. In such cases the hook |
| * function must either refrain from accessing the pte or |
| * ptl after dropping the vma lock, or else revalidate |
| * those items after re-acquiring the vma lock and before |
| * accessing them. |
| * @test_walk: caller specific callback function to determine whether |
| * we walk over the current vma or not. Returning 0 means |
| * "do page table walk over the current vma", returning |
| * a negative value means "abort current page table walk |
| * right now" and returning 1 means "skip the current vma" |
| * Note that this callback is not called when the caller |
| * passes in a single VMA as for walk_page_vma(). |
| * @pre_vma: if set, called before starting walk on a non-null vma. |
| * @post_vma: if set, called after a walk on a non-null vma, provided |
| * that @pre_vma and the vma walk succeeded. |
| * |
| * p?d_entry callbacks are called even if those levels are folded on a |
| * particular architecture/configuration. |
| */ |
| struct mm_walk_ops { |
| int (*pgd_entry)(pgd_t *pgd, unsigned long addr, |
| unsigned long next, struct mm_walk *walk); |
| int (*p4d_entry)(p4d_t *p4d, unsigned long addr, |
| unsigned long next, struct mm_walk *walk); |
| int (*pud_entry)(pud_t *pud, unsigned long addr, |
| unsigned long next, struct mm_walk *walk); |
| int (*pmd_entry)(pmd_t *pmd, unsigned long addr, |
| unsigned long next, struct mm_walk *walk); |
| int (*pte_entry)(pte_t *pte, unsigned long addr, |
| unsigned long next, struct mm_walk *walk); |
| int (*pte_hole)(unsigned long addr, unsigned long next, |
| int depth, struct mm_walk *walk); |
| int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, |
| unsigned long addr, unsigned long next, |
| struct mm_walk *walk); |
| int (*test_walk)(unsigned long addr, unsigned long next, |
| struct mm_walk *walk); |
| int (*pre_vma)(unsigned long start, unsigned long end, |
| struct mm_walk *walk); |
| void (*post_vma)(struct mm_walk *walk); |
| enum page_walk_lock walk_lock; |
| }; |
| |
| /* |
| * Action for pud_entry / pmd_entry callbacks. |
| * ACTION_SUBTREE is the default |
| */ |
| enum page_walk_action { |
| /* Descend to next level, splitting huge pages if needed and possible */ |
| ACTION_SUBTREE = 0, |
| /* Continue to next entry at this level (ignoring any subtree) */ |
| ACTION_CONTINUE = 1, |
| /* Call again for this entry */ |
| ACTION_AGAIN = 2 |
| }; |
| |
| /** |
| * struct mm_walk - walk_page_range data |
| * @ops: operation to call during the walk |
| * @mm: mm_struct representing the target process of page table walk |
| * @pgd: pointer to PGD; only valid with no_vma (otherwise set to NULL) |
| * @vma: vma currently walked (NULL if walking outside vmas) |
| * @action: next action to perform (see enum page_walk_action) |
| * @no_vma: walk ignoring vmas (vma will always be NULL) |
| * @private: private data for callbacks' usage |
| * |
| * (see the comment on walk_page_range() for more details) |
| */ |
| struct mm_walk { |
| const struct mm_walk_ops *ops; |
| struct mm_struct *mm; |
| pgd_t *pgd; |
| struct vm_area_struct *vma; |
| enum page_walk_action action; |
| bool no_vma; |
| void *private; |
| }; |
| |
| int walk_page_range(struct mm_struct *mm, unsigned long start, |
| unsigned long end, const struct mm_walk_ops *ops, |
| void *private); |
| int walk_page_range_novma(struct mm_struct *mm, unsigned long start, |
| unsigned long end, const struct mm_walk_ops *ops, |
| pgd_t *pgd, |
| void *private); |
| int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, |
| unsigned long end, const struct mm_walk_ops *ops, |
| void *private); |
| int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, |
| void *private); |
| int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, |
| pgoff_t nr, const struct mm_walk_ops *ops, |
| void *private); |
| |
| typedef int __bitwise folio_walk_flags_t; |
| |
| /* |
| * Walk migration entries as well. Careful: a large folio might get split |
| * concurrently. |
| */ |
| #define FW_MIGRATION ((__force folio_walk_flags_t)BIT(0)) |
| |
| /* Walk shared zeropages (small + huge) as well. */ |
| #define FW_ZEROPAGE ((__force folio_walk_flags_t)BIT(1)) |
| |
| enum folio_walk_level { |
| FW_LEVEL_PTE, |
| FW_LEVEL_PMD, |
| FW_LEVEL_PUD, |
| }; |
| |
| /** |
| * struct folio_walk - folio_walk_start() / folio_walk_end() data |
| * @page: exact folio page referenced (if applicable) |
| * @level: page table level identifying the entry type |
| * @pte: pointer to the page table entry (FW_LEVEL_PTE). |
| * @pmd: pointer to the page table entry (FW_LEVEL_PMD). |
| * @pud: pointer to the page table entry (FW_LEVEL_PUD). |
| * @ptl: pointer to the page table lock. |
| * |
| * (see folio_walk_start() documentation for more details) |
| */ |
| struct folio_walk { |
| /* public */ |
| struct page *page; |
| enum folio_walk_level level; |
| union { |
| pte_t *ptep; |
| pud_t *pudp; |
| pmd_t *pmdp; |
| }; |
| union { |
| pte_t pte; |
| pud_t pud; |
| pmd_t pmd; |
| }; |
| /* private */ |
| struct vm_area_struct *vma; |
| spinlock_t *ptl; |
| }; |
| |
| struct folio *folio_walk_start(struct folio_walk *fw, |
| struct vm_area_struct *vma, unsigned long addr, |
| folio_walk_flags_t flags); |
| |
| #define folio_walk_end(__fw, __vma) do { \ |
| spin_unlock((__fw)->ptl); \ |
| if (likely((__fw)->level == FW_LEVEL_PTE)) \ |
| pte_unmap((__fw)->ptep); \ |
| vma_pgtable_walk_end(__vma); \ |
| } while (0) |
| |
| #endif /* _LINUX_PAGEWALK_H */ |