aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig38
-rw-r--r--mm/Makefile1
-rw-r--r--mm/bootmem.c9
-rw-r--r--mm/memory.c2
-rw-r--r--mm/page_alloc.c39
-rw-r--r--mm/sparse.c85
6 files changed, 159 insertions, 15 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 5127441..cd37993 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -6,6 +6,7 @@ choice
prompt "Memory model"
depends on SELECT_MEMORY_MODEL
default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
+ default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
default FLATMEM_MANUAL
config FLATMEM_MANUAL
@@ -17,7 +18,15 @@ config FLATMEM_MANUAL
only have one option here: FLATMEM. This is normal
and a correct option.
- If unsure, choose this option over any other.
+ Some users of more advanced features like NUMA and
+ memory hotplug may have different options here.
+ DISCONTIGMEM is an more mature, better tested system,
+ but is incompatible with memory hotplug and may suffer
+ decreased performance over SPARSEMEM. If unsure between
+ "Sparse Memory" and "Discontiguous Memory", choose
+ "Discontiguous Memory".
+
+ If unsure, choose this option (Flat Memory) over any other.
config DISCONTIGMEM_MANUAL
bool "Discontigious Memory"
@@ -35,15 +44,38 @@ config DISCONTIGMEM_MANUAL
If unsure, choose "Flat Memory" over this option.
+config SPARSEMEM_MANUAL
+ bool "Sparse Memory"
+ depends on ARCH_SPARSEMEM_ENABLE
+ help
+ This will be the only option for some systems, including
+ memory hotplug systems. This is normal.
+
+ For many other systems, this will be an alternative to
+ "Discontigious Memory". This option provides some potential
+ performance benefits, along with decreased code complexity,
+ but it is newer, and more experimental.
+
+ If unsure, choose "Discontiguous Memory" or "Flat Memory"
+ over this option.
+
endchoice
config DISCONTIGMEM
def_bool y
depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL
+config SPARSEMEM
+ def_bool y
+ depends on SPARSEMEM_MANUAL
+
config FLATMEM
def_bool y
- depends on !DISCONTIGMEM || FLATMEM_MANUAL
+ depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
+
+config FLAT_NODE_MEM_MAP
+ def_bool y
+ depends on !SPARSEMEM
#
# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
@@ -56,4 +88,4 @@ config NEED_MULTIPLE_NODES
config HAVE_MEMORY_PRESENT
def_bool y
- depends on ARCH_HAVE_MEMORY_PRESENT
+ depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
diff --git a/mm/Makefile b/mm/Makefile
index 0974080..8f70ffd 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -15,6 +15,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
+obj-$(CONFIG_SPARSEMEM) += sparse.o
obj-$(CONFIG_SHMEM) += shmem.o
obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 260e703..f82f7ae 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -256,6 +256,7 @@ found:
static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
{
struct page *page;
+ unsigned long pfn;
bootmem_data_t *bdata = pgdat->bdata;
unsigned long i, count, total = 0;
unsigned long idx;
@@ -266,7 +267,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
count = 0;
/* first extant page of the node */
- page = virt_to_page(phys_to_virt(bdata->node_boot_start));
+ pfn = bdata->node_boot_start >> PAGE_SHIFT;
idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
map = bdata->node_bootmem_map;
/* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
@@ -275,9 +276,11 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
gofast = 1;
for (i = 0; i < idx; ) {
unsigned long v = ~map[i / BITS_PER_LONG];
+
if (gofast && v == ~0UL) {
int j, order;
+ page = pfn_to_page(pfn);
count += BITS_PER_LONG;
__ClearPageReserved(page);
order = ffs(BITS_PER_LONG) - 1;
@@ -292,6 +295,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
page += BITS_PER_LONG;
} else if (v) {
unsigned long m;
+
+ page = pfn_to_page(pfn);
for (m = 1; m && i < idx; m<<=1, page++, i++) {
if (v & m) {
count++;
@@ -302,8 +307,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
}
} else {
i+=BITS_PER_LONG;
- page += BITS_PER_LONG;
}
+ pfn += BITS_PER_LONG;
}
total += count;
diff --git a/mm/memory.c b/mm/memory.c
index da91b7b..30975ef 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -58,7 +58,7 @@
#include <linux/swapops.h>
#include <linux/elf.h>
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_NEED_MULTIPLE_NODES
/* use the per-pgdat data instead for discontigmem - mbligh */
unsigned long max_mapnr;
struct page *mem_map;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 20e2395..5c1b898 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(nr_swap_pages);
* Used by page_zone() to look up the address of the struct zone whose
* id is encoded in the upper bits of page->flags
*/
-struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)];
+struct zone *zone_table[1 << ZONETABLE_SHIFT];
EXPORT_SYMBOL(zone_table);
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
@@ -1649,11 +1649,15 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn)
{
- struct page *start = pfn_to_page(start_pfn);
struct page *page;
+ int end_pfn = start_pfn + size;
+ int pfn;
- for (page = start; page < (start + size); page++) {
- set_page_links(page, zone, nid);
+ for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) {
+ if (!early_pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ set_page_links(page, zone, nid, pfn);
set_page_count(page, 0);
reset_page_mapcount(page);
SetPageReserved(page);
@@ -1677,6 +1681,20 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
}
}
+#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr)
+void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
+ unsigned long size)
+{
+ unsigned long snum = pfn_to_section_nr(pfn);
+ unsigned long end = pfn_to_section_nr(pfn + size);
+
+ if (FLAGS_HAS_NODE)
+ zone_table[ZONETABLE_INDEX(nid, zid)] = zone;
+ else
+ for (; snum <= end; snum++)
+ zone_table[ZONETABLE_INDEX(snum, zid)] = zone;
+}
+
#ifndef __HAVE_ARCH_MEMMAP_INIT
#define memmap_init(size, nid, zone, start_pfn) \
memmap_init_zone((size), (nid), (zone), (start_pfn))
@@ -1861,7 +1879,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
unsigned long size, realsize;
unsigned long batch;
- zone_table[NODEZONE(nid, j)] = zone;
realsize = size = zones_size[j];
if (zholes_size)
realsize -= zholes_size[j];
@@ -1927,6 +1944,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
memmap_init(size, nid, j, zone_start_pfn);
+ zonetable_add(zone, nid, j, zone_start_pfn, size);
+
zone_start_pfn += size;
zone_init_free_lists(pgdat, zone, zone->spanned_pages);
@@ -1935,28 +1954,30 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
static void __init alloc_node_mem_map(struct pglist_data *pgdat)
{
- unsigned long size;
- struct page *map;
-
/* Skip empty nodes */
if (!pgdat->node_spanned_pages)
return;
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
/* ia64 gets its own node_mem_map, before this, without bootmem */
if (!pgdat->node_mem_map) {
+ unsigned long size;
+ struct page *map;
+
size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
if (!map)
map = alloc_bootmem_node(pgdat, size);
pgdat->node_mem_map = map;
}
-#ifndef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_FLATMEM
/*
* With no DISCONTIG, the global mem_map is just set as node 0's
*/
if (pgdat == NODE_DATA(0))
mem_map = NODE_DATA(0)->node_mem_map;
#endif
+#endif /* CONFIG_FLAT_NODE_MEM_MAP */
}
void __init free_area_init_node(int nid, struct pglist_data *pgdat,
diff --git a/mm/sparse.c b/mm/sparse.c
new file mode 100644
index 0000000..f888385
--- /dev/null
+++ b/mm/sparse.c
@@ -0,0 +1,85 @@
+/*
+ * sparse memory mappings.
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/dma.h>
+
+/*
+ * Permanent SPARSEMEM data:
+ *
+ * 1) mem_section - memory sections, mem_map's for valid memory
+ */
+struct mem_section mem_section[NR_MEM_SECTIONS];
+EXPORT_SYMBOL(mem_section);
+
+/* Record a memory area against a node. */
+void memory_present(int nid, unsigned long start, unsigned long end)
+{
+ unsigned long pfn;
+
+ start &= PAGE_SECTION_MASK;
+ for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
+ unsigned long section = pfn_to_section_nr(pfn);
+ if (!mem_section[section].section_mem_map)
+ mem_section[section].section_mem_map = (void *) -1;
+ }
+}
+
+/*
+ * Only used by the i386 NUMA architecures, but relatively
+ * generic code.
+ */
+unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long pfn;
+ unsigned long nr_pages = 0;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ if (nid != early_pfn_to_nid(pfn))
+ continue;
+
+ if (pfn_valid(pfn))
+ nr_pages += PAGES_PER_SECTION;
+ }
+
+ return nr_pages * sizeof(struct page);
+}
+
+/*
+ * Allocate the accumulated non-linear sections, allocate a mem_map
+ * for each and record the physical to section mapping.
+ */
+void sparse_init(void)
+{
+ unsigned long pnum;
+ struct page *map;
+ int nid;
+
+ for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+ if (!mem_section[pnum].section_mem_map)
+ continue;
+
+ nid = early_pfn_to_nid(section_nr_to_pfn(pnum));
+ map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
+ if (!map)
+ map = alloc_bootmem_node(NODE_DATA(nid),
+ sizeof(struct page) * PAGES_PER_SECTION);
+ if (!map) {
+ mem_section[pnum].section_mem_map = 0;
+ continue;
+ }
+
+ /*
+ * Subtle, we encode the real pfn into the mem_map such that
+ * the identity pfn - section_mem_map will return the actual
+ * physical page frame number.
+ */
+ mem_section[pnum].section_mem_map = map -
+ section_nr_to_pfn(pnum);
+ }
+}