From 441c7e0a2ed38827b48b907bd1fa29faba2017a3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 10 Jun 2009 20:05:53 +0300 Subject: bootmem: use slab if bootmem is no longer available As a preparation for initializing the slab allocator early, make sure the bootmem allocator does not crash and burn if someone calls it after slab is up; otherwise we'd need a flag day for switching to early slab. Acked-by: Johannes Weiner Acked-by: Linus Torvalds Cc: Christoph Lameter Cc: Ingo Molnar Cc: Matt Mackall Cc: Nick Piggin Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/bootmem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/bootmem.c b/mm/bootmem.c index daf9271..457269c 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -532,6 +532,9 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc(size, GFP_NOWAIT); + #ifdef CONFIG_HAVE_ARCH_BOOTMEM bootmem_data_t *p_bdata; -- cgit v1.1 From c91c4773b334d4d3a6d44626dc2a558ad97b86f3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 08:10:28 +0300 Subject: bootmem: fix slab fallback on numa If the user requested bootmem allocation on a specific node, we should use kzalloc_node() for the fallback allocation. Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/bootmem.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/bootmem.c b/mm/bootmem.c index 457269c..282df0a 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -665,6 +665,9 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); } @@ -696,6 +699,9 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, { void *ptr; + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); if (ptr) return ptr; @@ -748,6 +754,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, ARCH_LOW_ADDRESS_LIMIT); } -- cgit v1.1 From 83b519e8b9572c319c8e0c615ee5dd7272856090 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 10 Jun 2009 19:40:04 +0300 Subject: slab: setup allocators earlier in the boot sequence This patch makes kmalloc() available earlier in the boot sequence so we can get rid of some bootmem allocations. The bulk of the changes are due to kmem_cache_init() being called with interrupts disabled which requires some changes to allocator boostrap code. Note: 32-bit x86 does WP protect test in mem_init() so we must setup traps before we call mem_init() during boot as reported by Ingo Molnar: We have a hard crash in the WP-protect code: [ 0.000000] Checking if this processor honours the WP bit even in supervisor mode...BUG: Int 14: CR2 ffcff000 [ 0.000000] EDI 00000188 ESI 00000ac7 EBP c17eaf9c ESP c17eaf8c [ 0.000000] EBX 000014e0 EDX 0000000e ECX 01856067 EAX 00000001 [ 0.000000] err 00000003 EIP c10135b1 CS 00000060 flg 00010002 [ 0.000000] Stack: c17eafa8 c17fd410 c16747bc c17eafc4 c17fd7e5 000011fd f8616000 c18237cc [ 0.000000] 00099800 c17bb000 c17eafec c17f1668 000001c5 c17f1322 c166e039 c1822bf0 [ 0.000000] c166e033 c153a014 c18237cc 00020800 c17eaff8 c17f106a 00020800 01ba5003 [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.30-tip-02161-g7a74539-dirty #52203 [ 0.000000] Call Trace: [ 0.000000] [] ? printk+0x14/0x16 [ 0.000000] [] ? do_test_wp_bit+0x19/0x23 [ 0.000000] [] ? test_wp_bit+0x26/0x64 [ 0.000000] [] ? mem_init+0x1ba/0x1d8 [ 0.000000] [] ? start_kernel+0x164/0x2f7 [ 0.000000] [] ? unknown_bootoption+0x0/0x19c [ 0.000000] [] ? __init_begin+0x6a/0x6f Acked-by: Johannes Weiner Acked-by Linus Torvalds Cc: Christoph Lameter Cc: Ingo Molnar Cc: Matt Mackall Cc: Nick Piggin Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- init/main.c | 36 ++++++++++++++++---------- mm/slab.c | 85 ++++++++++++++++++++++++++++++++----------------------------- mm/slub.c | 17 ++++++++----- 3 files changed, 77 insertions(+), 61 deletions(-) diff --git a/init/main.c b/init/main.c index bb7dc57..0ab82a4 100644 --- a/init/main.c +++ b/init/main.c @@ -574,6 +574,28 @@ asmlinkage void __init start_kernel(void) setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + build_all_zonelists(); + page_alloc_init(); + + printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); + parse_early_param(); + parse_args("Booting kernel", static_command_line, __start___param, + __stop___param - __start___param, + &unknown_bootoption); + /* + * These use large bootmem allocations and must precede + * kmem_cache_init() + */ + pidhash_init(); + vmalloc_init(); + vfs_caches_init_early(); + sort_main_extable(); + trap_init(); + /* + * Set up kernel memory allocators + */ + mem_init(); + kmem_cache_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() @@ -585,25 +607,15 @@ asmlinkage void __init start_kernel(void) * fragile until we cpu_idle() for the first time. */ preempt_disable(); - build_all_zonelists(); - page_alloc_init(); - printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); - parse_early_param(); - parse_args("Booting kernel", static_command_line, __start___param, - __stop___param - __start___param, - &unknown_bootoption); if (!irqs_disabled()) { printk(KERN_WARNING "start_kernel(): bug: interrupts were " "enabled *very* early, fixing it\n"); local_irq_disable(); } - sort_main_extable(); - trap_init(); rcu_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); - pidhash_init(); init_timers(); hrtimers_init(); softirq_init(); @@ -645,14 +657,10 @@ asmlinkage void __init start_kernel(void) initrd_start = 0; } #endif - vmalloc_init(); - vfs_caches_init_early(); cpuset_init_early(); page_cgroup_init(); - mem_init(); enable_debug_pagealloc(); cpu_hotplug_init(); - kmem_cache_init(); kmemtrace_init(); debug_objects_mem_init(); idr_init_cache(); diff --git a/mm/slab.c b/mm/slab.c index f85831d..2bd611f 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -315,7 +315,7 @@ static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); -static int enable_cpucache(struct kmem_cache *cachep); +static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); /* @@ -958,12 +958,12 @@ static void __cpuinit start_cpu_timer(int cpu) } static struct array_cache *alloc_arraycache(int node, int entries, - int batchcount) + int batchcount, gfp_t gfp) { int memsize = sizeof(void *) * entries + sizeof(struct array_cache); struct array_cache *nc = NULL; - nc = kmalloc_node(memsize, GFP_KERNEL, node); + nc = kmalloc_node(memsize, gfp, node); if (nc) { nc->avail = 0; nc->limit = entries; @@ -1003,7 +1003,7 @@ static int transfer_objects(struct array_cache *to, #define drain_alien_cache(cachep, alien) do { } while (0) #define reap_alien(cachep, l3) do { } while (0) -static inline struct array_cache **alloc_alien_cache(int node, int limit) +static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { return (struct array_cache **)BAD_ALIEN_MAGIC; } @@ -1034,7 +1034,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); -static struct array_cache **alloc_alien_cache(int node, int limit) +static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { struct array_cache **ac_ptr; int memsize = sizeof(void *) * nr_node_ids; @@ -1042,14 +1042,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit) if (limit > 1) limit = 12; - ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); + ac_ptr = kmalloc_node(memsize, gfp, node); if (ac_ptr) { for_each_node(i) { if (i == node || !node_online(i)) { ac_ptr[i] = NULL; continue; } - ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); + ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); if (!ac_ptr[i]) { for (i--; i >= 0; i--) kfree(ac_ptr[i]); @@ -1282,20 +1282,20 @@ static int __cpuinit cpuup_prepare(long cpu) struct array_cache **alien = NULL; nc = alloc_arraycache(node, cachep->limit, - cachep->batchcount); + cachep->batchcount, GFP_KERNEL); if (!nc) goto bad; if (cachep->shared) { shared = alloc_arraycache(node, cachep->shared * cachep->batchcount, - 0xbaadf00d); + 0xbaadf00d, GFP_KERNEL); if (!shared) { kfree(nc); goto bad; } } if (use_alien_caches) { - alien = alloc_alien_cache(node, cachep->limit); + alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL); if (!alien) { kfree(shared); kfree(nc); @@ -1399,10 +1399,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, { struct kmem_list3 *ptr; - ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); + ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); BUG_ON(!ptr); - local_irq_disable(); memcpy(ptr, list, sizeof(struct kmem_list3)); /* * Do not assume that spinlocks can be initialized via memcpy: @@ -1411,7 +1410,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; - local_irq_enable(); } /* @@ -1575,9 +1573,8 @@ void __init kmem_cache_init(void) { struct array_cache *ptr; - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - local_irq_disable(); BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); @@ -1587,11 +1584,9 @@ void __init kmem_cache_init(void) spin_lock_init(&ptr->lock); cache_cache.array[smp_processor_id()] = ptr; - local_irq_enable(); - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - local_irq_disable(); BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), @@ -1603,7 +1598,6 @@ void __init kmem_cache_init(void) malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; - local_irq_enable(); } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1627,7 +1621,7 @@ void __init kmem_cache_init(void) struct kmem_cache *cachep; mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) - if (enable_cpucache(cachep)) + if (enable_cpucache(cachep, GFP_NOWAIT)) BUG(); mutex_unlock(&cache_chain_mutex); } @@ -2064,10 +2058,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, return left_over; } -static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) +static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) { if (g_cpucache_up == FULL) - return enable_cpucache(cachep); + return enable_cpucache(cachep, gfp); if (g_cpucache_up == NONE) { /* @@ -2089,7 +2083,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) g_cpucache_up = PARTIAL_AC; } else { cachep->array[smp_processor_id()] = - kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + kmalloc(sizeof(struct arraycache_init), gfp); if (g_cpucache_up == PARTIAL_AC) { set_up_list3s(cachep, SIZE_L3); @@ -2153,6 +2147,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; + gfp_t gfp; /* * Sanity checks... these are all serious usage bugs. @@ -2168,8 +2163,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, * We use cache_chain_mutex to ensure a consistent view of * cpu_online_mask as well. Please see cpuup_callback */ - get_online_cpus(); - mutex_lock(&cache_chain_mutex); + if (slab_is_available()) { + get_online_cpus(); + mutex_lock(&cache_chain_mutex); + } list_for_each_entry(pc, &cache_chain, next) { char tmp; @@ -2278,8 +2275,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ align = ralign; + if (slab_is_available()) + gfp = GFP_KERNEL; + else + gfp = GFP_NOWAIT; + /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); + cachep = kmem_cache_zalloc(&cache_cache, gfp); if (!cachep) goto oops; @@ -2382,7 +2384,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->ctor = ctor; cachep->name = name; - if (setup_cpu_cache(cachep)) { + if (setup_cpu_cache(cachep, gfp)) { __kmem_cache_destroy(cachep); cachep = NULL; goto oops; @@ -2394,8 +2396,10 @@ oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); - mutex_unlock(&cache_chain_mutex); - put_online_cpus(); + if (slab_is_available()) { + mutex_unlock(&cache_chain_mutex); + put_online_cpus(); + } return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -3802,7 +3806,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); /* * This initializes kmem_list3 or resizes various caches for all nodes. */ -static int alloc_kmemlist(struct kmem_cache *cachep) +static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) { int node; struct kmem_list3 *l3; @@ -3812,7 +3816,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) for_each_online_node(node) { if (use_alien_caches) { - new_alien = alloc_alien_cache(node, cachep->limit); + new_alien = alloc_alien_cache(node, cachep->limit, gfp); if (!new_alien) goto fail; } @@ -3821,7 +3825,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) if (cachep->shared) { new_shared = alloc_arraycache(node, cachep->shared*cachep->batchcount, - 0xbaadf00d); + 0xbaadf00d, gfp); if (!new_shared) { free_alien_cache(new_alien); goto fail; @@ -3850,7 +3854,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) free_alien_cache(new_alien); continue; } - l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); + l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); if (!l3) { free_alien_cache(new_alien); kfree(new_shared); @@ -3906,18 +3910,18 @@ static void do_ccupdate_local(void *info) /* Always called with the cache_chain_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, - int batchcount, int shared) + int batchcount, int shared, gfp_t gfp) { struct ccupdate_struct *new; int i; - new = kzalloc(sizeof(*new), GFP_KERNEL); + new = kzalloc(sizeof(*new), gfp); if (!new) return -ENOMEM; for_each_online_cpu(i) { new->new[i] = alloc_arraycache(cpu_to_node(i), limit, - batchcount); + batchcount, gfp); if (!new->new[i]) { for (i--; i >= 0; i--) kfree(new->new[i]); @@ -3944,11 +3948,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, kfree(ccold); } kfree(new); - return alloc_kmemlist(cachep); + return alloc_kmemlist(cachep, gfp); } /* Called with cache_chain_mutex held always */ -static int enable_cpucache(struct kmem_cache *cachep) +static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) { int err; int limit, shared; @@ -3994,7 +3998,7 @@ static int enable_cpucache(struct kmem_cache *cachep) if (limit > 32) limit = 32; #endif - err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); + err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", cachep->name, -err); @@ -4300,7 +4304,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, res = 0; } else { res = do_tune_cpucache(cachep, limit, - batchcount, shared); + batchcount, shared, + GFP_KERNEL); } break; } diff --git a/mm/slub.c b/mm/slub.c index 5e805a6..c1815a6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2557,13 +2557,16 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, if (gfp_flags & SLUB_DMA) flags = SLAB_CACHE_DMA; - down_write(&slub_lock); + /* + * This function is called with IRQs disabled during early-boot on + * single CPU so there's no need to take slub_lock here. + */ if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, flags, NULL)) goto panic; list_add(&s->list, &slab_caches); - up_write(&slub_lock); + if (sysfs_slab_add(s)) goto panic; return s; @@ -3021,7 +3024,7 @@ void __init kmem_cache_init(void) * kmem_cache_open for slab_state == DOWN. */ create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), GFP_KERNEL); + sizeof(struct kmem_cache_node), GFP_NOWAIT); kmalloc_caches[0].refcount = -1; caches++; @@ -3034,16 +3037,16 @@ void __init kmem_cache_init(void) /* Caches that are not of the two-to-the-power-of size */ if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, GFP_KERNEL); + "kmalloc-96", 96, GFP_NOWAIT); caches++; create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, GFP_KERNEL); + "kmalloc-192", 192, GFP_NOWAIT); caches++; } for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, GFP_KERNEL); + "kmalloc", 1 << i, GFP_NOWAIT); caches++; } @@ -3080,7 +3083,7 @@ void __init kmem_cache_init(void) /* Provide the correct kmalloc names now that the caches are up */ for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) kmalloc_caches[i]. name = - kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); + kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); -- cgit v1.1 From 43ebdac42f16037263b52a5aeedcd1bfa4a9bb29 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 25 May 2009 15:01:35 +0300 Subject: vmalloc: use kzalloc() instead of alloc_bootmem() We can call vmalloc_init() after kmem_cache_init() and use kzalloc() instead of the bootmem allocator when initializing vmalloc data structures. Acked-by: Johannes Weiner Acked-by: Linus Torvalds Acked-by: Nick Piggin Cc: Ingo Molnar Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- init/main.c | 2 +- mm/vmalloc.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index 0ab82a4..6d38f96 100644 --- a/init/main.c +++ b/init/main.c @@ -587,7 +587,6 @@ asmlinkage void __init start_kernel(void) * kmem_cache_init() */ pidhash_init(); - vmalloc_init(); vfs_caches_init_early(); sort_main_extable(); trap_init(); @@ -596,6 +595,7 @@ asmlinkage void __init start_kernel(void) */ mem_init(); kmem_cache_init(); + vmalloc_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 083716e..3235138 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -1032,7 +1031,7 @@ void __init vmalloc_init(void) /* Import existing vmlist entries. */ for (tmp = vmlist; tmp; tmp = tmp->next) { - va = alloc_bootmem(sizeof(struct vmap_area)); + va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); va->flags = tmp->flags | VM_VM_AREA; va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; -- cgit v1.1 From 444f478f65c7ca4606f9965b31feed13fe2bc9fa Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 18:29:06 +0300 Subject: init: introduce mm_init() As suggested by Christoph Lameter, introduce mm_init() now that we initialize all the kernel memory allocations together. Cc: Christoph Lameter Signed-off-by: Pekka Enberg --- init/main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/init/main.c b/init/main.c index 6d38f96..7917695 100644 --- a/init/main.c +++ b/init/main.c @@ -533,6 +533,16 @@ void __init __weak thread_info_cache_init(void) { } +/* + * Set up kernel memory allocators + */ +static void __init mm_init(void) +{ + mem_init(); + kmem_cache_init(); + vmalloc_init(); +} + asmlinkage void __init start_kernel(void) { char * command_line; @@ -590,12 +600,7 @@ asmlinkage void __init start_kernel(void) vfs_caches_init_early(); sort_main_extable(); trap_init(); - /* - * Set up kernel memory allocators - */ - mem_init(); - kmem_cache_init(); - vmalloc_init(); + mm_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() -- cgit v1.1 From 36b7b6d465489c4754c4fd66fcec6086eba87896 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 10 Jun 2009 23:42:36 +0300 Subject: sched: use kzalloc() instead of the bootmem allocator Now that kmem_cache_init() happens before sched_init(), we should use kzalloc() and not the bootmem allocator. Signed-off-by: Pekka Enberg --- kernel/sched.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 14c447a..a9ff953 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -68,7 +68,6 @@ #include #include #include -#include #include #include #include @@ -7782,21 +7781,18 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) { + gfp_t gfp = GFP_KERNEL; + memset(rd, 0, sizeof(*rd)); - if (bootmem) { - alloc_bootmem_cpumask_var(&def_root_domain.span); - alloc_bootmem_cpumask_var(&def_root_domain.online); - alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); - cpupri_init(&rd->cpupri, true); - return 0; - } + if (bootmem) + gfp = GFP_NOWAIT; - if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) + if (!alloc_cpumask_var(&rd->span, gfp)) goto out; - if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) + if (!alloc_cpumask_var(&rd->online, gfp)) goto free_span; - if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) + if (!alloc_cpumask_var(&rd->rto_mask, gfp)) goto free_online; if (cpupri_init(&rd->cpupri, false) != 0) @@ -9123,7 +9119,7 @@ void __init sched_init(void) * we use alloc_bootmem(). */ if (alloc_size) { - ptr = (unsigned long)alloc_bootmem(alloc_size); + ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); #ifdef CONFIG_FAIR_GROUP_SCHED init_task_group.se = (struct sched_entity **)ptr; -- cgit v1.1 From a5f4f52e82114e85aa1a066bd1a450acc19a464d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 10 Jun 2009 23:53:37 +0300 Subject: vt: use kzalloc() instead of the bootmem allocator Now that kmem_cache_init() happens before console_init(), we should use kzalloc() and not the bootmem allocator. Signed-off-by: Pekka Enberg --- drivers/char/vt.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 08151d4..c796a86 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -95,7 +95,6 @@ #include #include #include -#include #include #include #include @@ -2875,14 +2874,11 @@ static int __init con_init(void) mod_timer(&console_timer, jiffies + blankinterval); } - /* - * kmalloc is not running yet - we use the bootmem allocator. - */ for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) { - vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data)); + vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT); INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); visual_init(vc, currcons, 1); - vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size); + vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT); vc->vc_kmalloced = 0; vc_init(vc, vc->vc_rows, vc->vc_cols, currcons || !vc->vc_sw->con_save_screen); -- cgit v1.1 From 38c7fed2f5ffee17e1fa3e0f78b0e1bf43d52d13 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 May 2009 15:10:58 +0300 Subject: x86: remove some alloc_bootmem_cpumask_var calling Now that we set up the slab allocator earlier, we can get rid of some alloc_bootmem_cpumask_var() calls in boot code. Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Pekka Enberg --- arch/x86/kernel/apic/io_apic.c | 4 ++-- include/linux/irq.h | 18 +++++++----------- kernel/cpuset.c | 2 +- kernel/profile.c | 6 ------ lib/cpumask.c | 11 ++--------- 5 files changed, 12 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1946fac..139201a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -185,8 +185,8 @@ int __init arch_early_irq_init(void) for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; - alloc_bootmem_cpumask_var(&cfg[i].domain); - alloc_bootmem_cpumask_var(&cfg[i].old_domain); + alloc_cpumask_var(&cfg[i].domain, GFP_NOWAIT); + alloc_cpumask_var(&cfg[i].old_domain, GFP_NOWAIT); if (i < NR_IRQS_LEGACY) cpumask_setall(cfg[i].domain); } diff --git a/include/linux/irq.h b/include/linux/irq.h index eedbb8e..1e50c34 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -430,23 +430,19 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); * Returns true if successful (or not required). */ static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) + bool boot) { -#ifdef CONFIG_CPUMASK_OFFSTACK - if (boot) { - alloc_bootmem_cpumask_var(&desc->affinity); + gfp_t gfp = GFP_ATOMIC; -#ifdef CONFIG_GENERIC_PENDING_IRQ - alloc_bootmem_cpumask_var(&desc->pending_mask); -#endif - return true; - } + if (boot) + gfp = GFP_NOWAIT; - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) +#ifdef CONFIG_CPUMASK_OFFSTACK + if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) return false; #ifdef CONFIG_GENERIC_PENDING_IRQ - if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { free_cpumask_var(desc->affinity); return false; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 026facc..d5a7e17 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1857,7 +1857,7 @@ struct cgroup_subsys cpuset_subsys = { int __init cpuset_init_early(void) { - alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed); + alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT); top_cpuset.mems_generation = cpuset_mems_generation++; return 0; diff --git a/kernel/profile.c b/kernel/profile.c index 7724e04..28cf26a 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -111,12 +111,6 @@ int __ref profile_init(void) /* only text is profiled */ prof_len = (_etext - _stext) >> prof_shift; buffer_bytes = prof_len*sizeof(atomic_t); - if (!slab_is_available()) { - prof_buffer = alloc_bootmem(buffer_bytes); - alloc_bootmem_cpumask_var(&prof_cpu_mask); - cpumask_copy(prof_cpu_mask, cpu_possible_mask); - return 0; - } if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) return -ENOMEM; diff --git a/lib/cpumask.c b/lib/cpumask.c index eb23aaa..7bb4142 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -92,15 +92,8 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) */ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { - if (likely(slab_is_available())) - *mask = kmalloc_node(cpumask_size(), flags, node); - else { -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - printk(KERN_ERR - "=> alloc_cpumask_var: kmalloc not available!\n"); -#endif - *mask = NULL; - } + *mask = kmalloc_node(cpumask_size(), flags, node); + #ifdef CONFIG_DEBUG_PER_CPU_MAPS if (!*mask) { printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); -- cgit v1.1 From dad213aeb59718623fc59defeff95fe8c3feb8a0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 May 2009 18:14:40 -0700 Subject: irq/cpumask: make memoryless node zero happy Don't hardcode to node zero for early boot IRQ setup memory allocations. [ penberg@cs.helsinki.fi: minor cleanups ] Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Pekka Enberg --- arch/x86/kernel/apic/io_apic.c | 6 ++++-- kernel/irq/handle.c | 9 +++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 139201a..94605e7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -177,16 +177,18 @@ int __init arch_early_irq_init(void) struct irq_cfg *cfg; struct irq_desc *desc; int count; + int node; int i; cfg = irq_cfgx; count = ARRAY_SIZE(irq_cfgx); + node= cpu_to_node(boot_cpu_id); for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; - alloc_cpumask_var(&cfg[i].domain, GFP_NOWAIT); - alloc_cpumask_var(&cfg[i].old_domain, GFP_NOWAIT); + alloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); + alloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); if (i < NR_IRQS_LEGACY) cpumask_setall(cfg[i].domain); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a600184..e161999 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -150,6 +150,7 @@ int __init early_irq_init(void) { struct irq_desc *desc; int legacy_count; + int node; int i; init_irq_default_affinity(); @@ -160,20 +161,20 @@ int __init early_irq_init(void) desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); + node = first_online_node; /* allocate irq_desc_ptrs array based on nr_irqs */ irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); /* allocate based on nr_cpu_ids */ - /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */ - kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids * - sizeof(int)); + kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids * + sizeof(int), GFP_NOWAIT, node); for (i = 0; i < legacy_count; i++) { desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - alloc_desc_masks(&desc[i], 0, true); + alloc_desc_masks(&desc[i], node, true); init_desc_masks(&desc[i]); irq_desc_ptrs[i] = desc + i; } -- cgit v1.1 From 959982fee4e635c61780e989c3e34267143fcc02 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 May 2009 18:15:16 -0700 Subject: memcg: don't use bootmem allocator in setup code The bootmem allocator is no longer available for page_cgroup_init() because we set up the kernel slab allocator much earlier now. Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/page_cgroup.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 791905c..3dd4a90 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -47,6 +47,8 @@ static int __init alloc_node_page_cgroup(int nid) struct page_cgroup *base, *pc; unsigned long table_size; unsigned long start_pfn, nr_pages, index; + struct page *page; + unsigned int order; start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; @@ -55,11 +57,13 @@ static int __init alloc_node_page_cgroup(int nid) return 0; table_size = sizeof(struct page_cgroup) * nr_pages; - - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), - table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); - if (!base) + order = get_order(table_size); + page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order); + if (!page) + page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order); + if (!page) return -ENOMEM; + base = page_address(page); for (index = 0; index < nr_pages; index++) { pc = base + index; __init_page_cgroup(pc, start_pfn + index); -- cgit v1.1 From 4bdddf8ff9bbb8aa7b4d7847586202bd25842c90 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 08:35:27 +0300 Subject: sched: use alloc_cpumask_var() instead of alloc_bootmem_cpumask_var() Slab is initialized when sched_init() runs now so lets use alloc_cpumask_var(). Cc: Ingo Molnar Cc: Linus Torvalds Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- kernel/sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index a9ff953..12cc09c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9310,13 +9310,13 @@ void __init sched_init(void) current->sched_class = &fair_sched_class; /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_bootmem_cpumask_var(&nohz_cpu_mask); + alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ - alloc_bootmem_cpumask_var(&nohz.cpu_mask); - alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask); + alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); + alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); #endif - alloc_bootmem_cpumask_var(&cpu_isolated_map); + alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ scheduler_running = 1; -- cgit v1.1 From 0fb530291621c8b8a1b16abeeab05d9262489f71 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 08:41:22 +0300 Subject: sched: use slab in cpupri_init() Lets not use the bootmem allocator in cpupri_init() as slab is already up when it is run. Cc: Ingo Molnar Cc: Linus Torvalds Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- kernel/sched.c | 2 +- kernel/sched_cpupri.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 12cc09c..dcf2dc2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7795,7 +7795,7 @@ static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) if (!alloc_cpumask_var(&rd->rto_mask, gfp)) goto free_online; - if (cpupri_init(&rd->cpupri, false) != 0) + if (cpupri_init(&rd->cpupri, bootmem) != 0) goto free_rto_mask; return 0; diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 344712a..7deffc9 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -154,8 +154,12 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) */ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) { + gfp_t gfp = GFP_KERNEL; int i; + if (bootmem) + gfp = GFP_NOWAIT; + memset(cp, 0, sizeof(*cp)); for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { @@ -163,9 +167,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) spin_lock_init(&vec->lock); vec->count = 0; - if (bootmem) - alloc_bootmem_cpumask_var(&vec->mask); - else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&vec->mask, gfp)) goto cleanup; } -- cgit v1.1 From 22fb4e71e646695c7e0f379ada66b372c2d1aa1a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 14:46:49 +0300 Subject: irq: use kcalloc() instead of the bootmem allocator Fixes the following problem: [ 0.000000] Experimental hierarchical RCU init done. [ 0.000000] NR_IRQS:4352 nr_irqs:256 [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: at mm/bootmem.c:537 alloc_arch_preferred_bootmem+0x40/0x7e() [ 0.000000] Hardware name: To Be Filled By O.E.M. [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.30-tip-02161-g7a74539-dirty #59709 [ 0.000000] Call Trace: [ 0.000000] [] ? alloc_arch_preferred_bootmem+0x40/0x7e [ 0.000000] [] warn_slowpath_common+0x88/0xcb [ 0.000000] [] warn_slowpath_null+0x27/0x3d [ 0.000000] [] alloc_arch_preferred_bootmem+0x40/0x7e [ 0.000000] [] ___alloc_bootmem_nopanic+0x4e/0xec [ 0.000000] [] ___alloc_bootmem+0x20/0x61 [ 0.000000] [] __alloc_bootmem+0x1e/0x34 [ 0.000000] [] early_irq_init+0x6d/0x118 [ 0.000000] [] ? early_idt_handler+0x0/0x71 [ 0.000000] [] start_kernel+0x192/0x394 [ 0.000000] [] ? early_idt_handler+0x0/0x71 [ 0.000000] [] x86_64_start_reservations+0xb4/0xcf [ 0.000000] [] ? __init_begin+0x0/0x140 [ 0.000000] [] x86_64_start_kernel+0x158/0x17b [ 0.000000] ---[ end trace a7919e7f17c0a725 ]--- [ 0.000000] Fast TSC calibration using PIT [ 0.000000] Detected 2002.510 MHz processor. [ 0.004000] Console: colour VGA+ 80x25 Reported-by: Ingo Molnar Signed-off-by: Pekka Enberg --- kernel/irq/handle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e161999..1045785 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -164,7 +164,7 @@ int __init early_irq_init(void) node = first_online_node; /* allocate irq_desc_ptrs array based on nr_irqs */ - irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); + irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT); /* allocate based on nr_cpu_ids */ kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids * -- cgit v1.1 From b8ec757390282e21d349bf6b602a8cb182da0429 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 19:25:37 +0300 Subject: vgacon: use slab allocator instead of the bootmem allocator Slab is initialized before the console subsystem so use the slab allocator in vgacon_scrollback_startup(). Signed-off-by: Pekka Enberg --- drivers/video/console/vgacon.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 38e86b8..59d7d5e 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -180,7 +180,7 @@ static inline void vga_set_mem_top(struct vc_data *c) } #ifdef CONFIG_VGACON_SOFT_SCROLLBACK -#include +#include /* software scrollback */ static void *vgacon_scrollback; static int vgacon_scrollback_tail; @@ -210,8 +210,7 @@ static void vgacon_scrollback_init(int pitch) */ static void __init_refok vgacon_scrollback_startup(void) { - vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE - * 1024); + vgacon_scrollback = kcalloc(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE, 1024, GFP_NOWAIT); vgacon_scrollback_init(vga_video_num_columns * 2); } -- cgit v1.1