From 773e3eb7b81e5ba13b5155dfb3bb75b8ce37f8f9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:33 -0800 Subject: init: Move radix_tree_init() early Prepare for using radix trees in early_irq_init(). Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-30-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 4cb47a1..8451878 100644 --- a/init/main.c +++ b/init/main.c @@ -584,6 +584,7 @@ asmlinkage void __init start_kernel(void) local_irq_disable(); } rcu_init(); + radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); @@ -657,7 +658,6 @@ asmlinkage void __init start_kernel(void) proc_caches_init(); buffer_init(); key_init(); - radix_tree_init(); security_init(); vfs_caches_init(totalram_pages); signals_init(); -- cgit v1.1 From 2b633e3fac5efada088b57d31e65401f22bcc18f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:37 -0800 Subject: smp: Use nr_cpus= to set nr_cpu_ids early On x86, before prefill_possible_map(), nr_cpu_ids will be NR_CPUS aka CONFIG_NR_CPUS. Add nr_cpus= to set nr_cpu_ids. so we can simulate cpus <=8 are installed on normal config. -v2: accordging to Christoph, acpi_numa_init should use nr_cpu_ids in stead of NR_CPUS. -v3: add doc in kernel-parameters.txt according to Andrew. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-34-git-send-email-yinghai@kernel.org> Acked-by: Linus Torvalds Signed-off-by: H. Peter Anvin Cc: Tony Luck --- init/main.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'init') diff --git a/init/main.c b/init/main.c index 8451878..05b5283 100644 --- a/init/main.c +++ b/init/main.c @@ -149,6 +149,20 @@ static int __init nosmp(char *str) early_param("nosmp", nosmp); +/* this is hard limit */ +static int __init nrcpus(char *str) +{ + int nr_cpus; + + get_option(&str, &nr_cpus); + if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) + nr_cpu_ids = nr_cpus; + + return 0; +} + +early_param("nr_cpus", nrcpus); + static int __init maxcpus(char *str) { get_option(&str, &setup_max_cpus); -- cgit v1.1 From 2bd3a997befc226ab4b504f05c5cbba305f3e0e6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 2 Mar 2010 23:53:19 -0800 Subject: init: Open /dev/console from rootfs To avoid potential problems with an empty /dev open /dev/console from rootfs instead of waiting to mount our root filesystem and mounting it there. This effectively guarantees that there will be a device node, and it won't be on a filesystem that we will ever unmount, so there are no issues with leaving /dev/console open and pinning the filesystem. This is actually more effective than automatically mounting devtmpfs on /dev because it removes removes the occasionally problematic assumption that /dev/console exists from the boot code. With this patch I was able to throw busybox on my /boot partition (which has no /dev directory) and boot into userspace without problems. The only possible negative consequence I can think of is that someone out there deliberately used did not use a character device that is major 5 minor 2 for /dev/console. Does anyone know of a situation in which that could make sense? Signed-off-by: Eric W. Biederman Signed-off-by: Al Viro --- init/do_mounts_initrd.c | 4 ---- init/main.c | 11 ++++++----- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'init') diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 614241b..2b10853 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -30,11 +30,7 @@ static int __init do_linuxrc(void * shell) extern char * envp_init[]; sys_close(old_fd);sys_close(root_fd); - sys_close(0);sys_close(1);sys_close(2); sys_setsid(); - (void) sys_open("/dev/console",O_RDWR,0); - (void) sys_dup(0); - (void) sys_dup(0); return kernel_execve(shell, argv, envp_init); } diff --git a/init/main.c b/init/main.c index 4cb47a1..106e02d 100644 --- a/init/main.c +++ b/init/main.c @@ -806,11 +806,6 @@ static noinline int init_post(void) system_state = SYSTEM_RUNNING; numa_default_policy(); - if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - printk(KERN_WARNING "Warning: unable to open an initial console.\n"); - - (void) sys_dup(0); - (void) sys_dup(0); current->signal->flags |= SIGNAL_UNKILLABLE; @@ -873,6 +868,12 @@ static int __init kernel_init(void * unused) do_basic_setup(); + /* Open the /dev/console on the rootfs, this should never fail */ + if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) + printk(KERN_WARNING "Warning: unable to open an initial console.\n"); + + (void) sys_dup(0); + (void) sys_dup(0); /* * check if there is an early userspace init. If yes, let it do all * the work -- cgit v1.1 From 452aa6999e6703ffbddd7f6ea124d3968915f3e3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Mar 2010 13:42:13 -0800 Subject: mm/pm: force GFP_NOIO during suspend/hibernation and resume There are quite a few GFP_KERNEL memory allocations made during suspend/hibernation and resume that may cause the system to hang, because the I/O operations they depend on cannot be completed due to the underlying devices being suspended. Avoid this problem by clearing the __GFP_IO and __GFP_FS bits in gfp_allowed_mask before suspend/hibernation and restoring the original values of these bits in gfp_allowed_mask durig the subsequent resume. [akpm@linux-foundation.org: fix CONFIG_PM=n linkage] Signed-off-by: Rafael J. Wysocki Reported-by: Maxim Levitsky Cc: Sebastian Ott Cc: Benjamin Herrenschmidt Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 40aaa02..41d0f10 100644 --- a/init/main.c +++ b/init/main.c @@ -618,7 +618,7 @@ asmlinkage void __init start_kernel(void) local_irq_enable(); /* Interrupts are enabled now so all GFP allocations are safe. */ - set_gfp_allowed_mask(__GFP_BITS_MASK); + gfp_allowed_mask = __GFP_BITS_MASK; kmem_cache_init_late(); -- cgit v1.1 From 9a85b8d6049cbb0e7961df2069322fbc4192026a Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Fri, 5 Mar 2010 13:42:39 -0800 Subject: init/main.c: improve usability in case of init binary failure - new Documentation/init.txt file describing various forms of failure trying to load the init binary after kernel bootup - extend the init/main.c init failure message to direct to Documentation/init.txt Signed-off-by: Andreas Mohr Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 41d0f10..b09a828 100644 --- a/init/main.c +++ b/init/main.c @@ -847,7 +847,8 @@ static noinline int init_post(void) run_init_process("/bin/init"); run_init_process("/bin/sh"); - panic("No init found. Try passing init= option to kernel."); + panic("No init found. Try passing init= option to kernel. " + "See Linux Documentation/init.txt for guidance."); } static int __init kernel_init(void * unused) -- cgit v1.1 From 8aaed5bec2b9177eab1796c8c4f7a4c90804eef6 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 5 Mar 2010 13:42:39 -0800 Subject: init/initramfs.c: fix "symbol shadows an earlier one" noise The symbol 'count' is a local global variable in this file. The function clean_rootfs() should use a different symbol name to prevent "symbol shadows an earlier one" noise. Signed-off-by: H Hartley Sweeten Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/initramfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'init') diff --git a/init/initramfs.c b/init/initramfs.c index b37d34b..37d3859 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -525,7 +525,7 @@ static void __init clean_rootfs(void) int fd; void *buf; struct linux_dirent64 *dirp; - int count; + int num; fd = sys_open("/", O_RDONLY, 0); WARN_ON(fd < 0); @@ -539,9 +539,9 @@ static void __init clean_rootfs(void) } dirp = buf; - count = sys_getdents64(fd, dirp, BUF_SIZE); - while (count > 0) { - while (count > 0) { + num = sys_getdents64(fd, dirp, BUF_SIZE); + while (num > 0) { + while (num > 0) { struct stat st; int ret; @@ -554,12 +554,12 @@ static void __init clean_rootfs(void) sys_unlink(dirp->d_name); } - count -= dirp->d_reclen; + num -= dirp->d_reclen; dirp = (void *)dirp + dirp->d_reclen; } dirp = buf; memset(buf, 0, BUF_SIZE); - count = sys_getdents64(fd, dirp, BUF_SIZE); + num = sys_getdents64(fd, dirp, BUF_SIZE); } sys_close(fd); -- cgit v1.1 From 7463e633c5f94792dcff1afefb0d2961318a9d09 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 5 Mar 2010 13:42:46 -0800 Subject: init/main.c: make setup_max_cpus static for !SMP The only in tree external users of the symbol setup_max_cpus are in arch/x86/. The files ./kernel/alternative.c, ./kernel/visws_quirks.c, and ./mm/kmemcheck/kmemcheck.c are all guarded by CONFIG_SMP being defined. For this case the symbol is an unsigned int and declared as an extern in include/linux/smp.h. When CONFIG_SMP is not defined the symbol setup_max_cpus is a constant value that is only used in init/main.c. Make the symbol static for this case. Signed-off-by: H Hartley Sweeten Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index b09a828..a1ab78c 100644 --- a/init/main.c +++ b/init/main.c @@ -174,7 +174,7 @@ static int __init maxcpus(char *str) early_param("maxcpus", maxcpus); #else -const unsigned int setup_max_cpus = NR_CPUS; +static const unsigned int setup_max_cpus = NR_CPUS; #endif /* -- cgit v1.1 From 0dea116876eefc9c7ca9c5d74fe665481e499fa3 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 10 Mar 2010 15:22:20 -0800 Subject: cgroup: implement eventfd-based generic API for notifications This patchset introduces eventfd-based API for notifications in cgroups and implements memory notifications on top of it. It uses statistics in memory controler to track memory usage. Output of time(1) on building kernel on tmpfs: Root cgroup before changes: make -j2 506.37 user 60.93s system 193% cpu 4:52.77 total Non-root cgroup before changes: make -j2 507.14 user 62.66s system 193% cpu 4:54.74 total Root cgroup after changes (0 thresholds): make -j2 507.13 user 62.20s system 193% cpu 4:53.55 total Non-root cgroup after changes (0 thresholds): make -j2 507.70 user 64.20s system 193% cpu 4:55.70 total Root cgroup after changes (1 thresholds, never crossed): make -j2 506.97 user 62.20s system 193% cpu 4:53.90 total Non-root cgroup after changes (1 thresholds, never crossed): make -j2 507.55 user 64.08s system 193% cpu 4:55.63 total This patch: Introduce the write-only file "cgroup.event_control" in every cgroup. To register new notification handler you need: - create an eventfd; - open a control file to be monitored. Callbacks register_event() and unregister_event() must be defined for the control file; - write " " to cgroup.event_control. Interpretation of args is defined by control file implementation; eventfd will be woken up by control file implementation or when the cgroup is removed. To unregister notification handler just close eventfd. If you need notification functionality for a control file you have to implement callbacks register_event() and unregister_event() in the struct cftype. [kamezawa.hiroyu@jp.fujitsu.com: Kconfig fix] Signed-off-by: Kirill A. Shutemov Reviewed-by: KAMEZAWA Hiroyuki Paul Menage Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Cc: Dan Malek Cc: Vladislav Buzov Cc: Daisuke Nishimura Cc: Alexander Shishkin Cc: Davide Libenzi Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index 089a230..eb77e8c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -463,6 +463,7 @@ config HAVE_UNSTABLE_SCHED_CLOCK menuconfig CGROUPS boolean "Control Group support" + depends on EVENTFD help This option adds support for grouping sets of processes together, for use with process control subsystems such as Cpusets, CFS, memory -- cgit v1.1 From 5ab116c9349ef52d6fbd2e2917a53f13194b048e Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 23 Mar 2010 13:35:34 -0700 Subject: cpuset: fix the problem that cpuset_mem_spread_node() returns an offline node cpuset_mem_spread_node() returns an offline node, and causes an oops. This patch fixes it by initializing task->mems_allowed to node_states[N_HIGH_MEMORY], and updating task->mems_allowed when doing memory hotplug. Signed-off-by: Miao Xie Acked-by: David Rientjes Reported-by: Nick Piggin Tested-by: Nick Piggin Cc: Paul Menage Cc: Li Zefan Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index a1ab78c..cbead27 100644 --- a/init/main.c +++ b/init/main.c @@ -858,7 +858,7 @@ static int __init kernel_init(void * unused) /* * init can allocate pages on any node */ - set_mems_allowed(node_possible_map); + set_mems_allowed(node_states[N_HIGH_MEMORY]); /* * init can run on any cpu. */ -- cgit v1.1