mm: numa: Add THP migration for the NUMA working set scanning fault case.
Note: This is very heavily based on a patch from Peter Zijlstra with
fixes from Ingo Molnar, Hugh Dickins and Johannes Weiner. That patch
put a lot of migration logic into mm/huge_memory.c where it does
not belong. This version puts tries to share some of the migration
logic with migrate_misplaced_page. However, it should be noted
that now migrate.c is doing more with the pagetable manipulation
than is preferred. The end result is barely recognisable so as
before, the signed-offs had to be removed but will be re-added if
the original authors are ok with it.
Add THP migration for the NUMA working set scanning fault case.
It uses the page lock to serialize. No migration pte dance is
necessary because the pte is already unmapped when we decide
to migrate.
[dhillf@gmail.com: Fix memory leak on isolation failure]
[dhillf@gmail.com: Fix transfer of last_nid information]
Signed-off-by: Mel Gorman <mgorman@suse.de>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 199b261..711baf8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -600,7 +600,7 @@
}
__setup("transparent_hugepage=", setup_transparent_hugepage);
-static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
pmd = pmd_mkwrite(pmd);
@@ -1023,10 +1023,12 @@
int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp)
{
- struct page *page = NULL;
+ struct page *page;
unsigned long haddr = addr & HPAGE_PMD_MASK;
int target_nid;
int current_nid = -1;
+ bool migrated;
+ bool page_locked = false;
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,42 +1036,61 @@
page = pmd_page(pmd);
get_page(page);
- spin_unlock(&mm->page_table_lock);
current_nid = page_to_nid(page);
count_vm_numa_event(NUMA_HINT_FAULTS);
if (current_nid == numa_node_id())
count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
target_nid = mpol_misplaced(page, vma, haddr);
- if (target_nid == -1)
+ if (target_nid == -1) {
+ put_page(page);
goto clear_pmdnuma;
+ }
- /*
- * Due to lacking code to migrate thp pages, we'll split
- * (which preserves the special PROT_NONE) and re-take the
- * fault on the normal pages.
- */
- split_huge_page(page);
- put_page(page);
+ /* Acquire the page lock to serialise THP migrations */
+ spin_unlock(&mm->page_table_lock);
+ lock_page(page);
+ page_locked = true;
+ /* Confirm the PTE did not while locked */
+ spin_lock(&mm->page_table_lock);
+ if (unlikely(!pmd_same(pmd, *pmdp))) {
+ unlock_page(page);
+ put_page(page);
+ goto out_unlock;
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ /* Migrate the THP to the requested node */
+ migrated = migrate_misplaced_transhuge_page(mm, vma,
+ pmdp, pmd, addr,
+ page, target_nid);
+ if (migrated)
+ current_nid = target_nid;
+ else {
+ spin_lock(&mm->page_table_lock);
+ if (unlikely(!pmd_same(pmd, *pmdp))) {
+ unlock_page(page);
+ goto out_unlock;
+ }
+ goto clear_pmdnuma;
+ }
+
+ task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
return 0;
clear_pmdnuma:
- spin_lock(&mm->page_table_lock);
- if (unlikely(!pmd_same(pmd, *pmdp)))
- goto out_unlock;
-
pmd = pmd_mknonnuma(pmd);
set_pmd_at(mm, haddr, pmdp, pmd);
VM_BUG_ON(pmd_numa(*pmdp));
update_mmu_cache_pmd(vma, addr, pmdp);
+ if (page_locked)
+ unlock_page(page);
out_unlock:
spin_unlock(&mm->page_table_lock);
- if (page) {
- put_page(page);
- task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
- }
+ if (current_nid != -1)
+ task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
return 0;
}