aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-21 01:18:23 +0000
committerMel Gorman <mgorman@suse.de>2012-12-11 14:42:55 +0000
commitb8593bfda1652755136333cdd362de125b283a9c (patch)
treec0395d9cf775fd9225e81b055fc8f5540a14333a /mm
parente42c8ff2999de1239a57d434bfbd8e9f2a56e814 (diff)
downloadkernel_goldelico_gta04-b8593bfda1652755136333cdd362de125b283a9c.zip
kernel_goldelico_gta04-b8593bfda1652755136333cdd362de125b283a9c.tar.gz
kernel_goldelico_gta04-b8593bfda1652755136333cdd362de125b283a9c.tar.bz2
mm: sched: Adapt the scanning rate if a NUMA hinting fault does not migrate
The PTE scanning rate and fault rates are two of the biggest sources of system CPU overhead with automatic NUMA placement. Ideally a proper policy would detect if a workload was properly placed, schedule and adjust the PTE scanning rate accordingly. We do not track the necessary information to do that but we at least know if we migrated or not. This patch scans slower if a page was not migrated as the result of a NUMA hinting fault up to sysctl_numa_balancing_scan_period_max which is now higher than the previous default. Once every minute it will reset the scanner in case of phase changes. This is hilariously crude and the numbers are arbitrary. Workloads will converge quite slowly in comparison to what a proper policy should be able to do. On the plus side, we will chew up less CPU for workloads that have no need for automatic balancing. Signed-off-by: Mel Gorman <mgorman@suse.de>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/memory.c12
2 files changed, 9 insertions, 5 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 79b9606..199b261 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1068,7 +1068,7 @@ out_unlock:
spin_unlock(&mm->page_table_lock);
if (page) {
put_page(page);
- task_numa_fault(numa_node_id(), HPAGE_PMD_NR);
+ task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
}
return 0;
}
diff --git a/mm/memory.c b/mm/memory.c
index 84c6d9e..39edb11 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3468,6 +3468,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl;
int current_nid = -1;
int target_nid;
+ bool migrated = false;
/*
* The "pte" at this point cannot be used safely without
@@ -3509,12 +3510,13 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
/* Migrate to the requested node */
- if (migrate_misplaced_page(page, target_nid))
+ migrated = migrate_misplaced_page(page, target_nid);
+ if (migrated)
current_nid = target_nid;
out:
if (current_nid != -1)
- task_numa_fault(current_nid, 1);
+ task_numa_fault(current_nid, 1, migrated);
return 0;
}
@@ -3554,6 +3556,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page;
int curr_nid = local_nid;
int target_nid;
+ bool migrated;
if (!pte_present(pteval))
continue;
if (!pte_numa(pteval))
@@ -3590,9 +3593,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Migrate to the requested node */
pte_unmap_unlock(pte, ptl);
- if (migrate_misplaced_page(page, target_nid))
+ migrated = migrate_misplaced_page(page, target_nid);
+ if (migrated)
curr_nid = target_nid;
- task_numa_fault(curr_nid, 1);
+ task_numa_fault(curr_nid, 1, migrated);
pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
}