From db5e7ecc4abc91b9f26f0c0d79ef88a51e987d90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Jun 2011 08:40:59 -0400 Subject: tracing: Fix regression in printk_formats file The fix to fix the printk_formats of modules broke the printk_formats of trace_printks in the kernel. The update of what to show via the seq_file was only updated if the passed in fmt was NULL, which happens only on the first iteration. The result was showing the first format every time instead of iterating through the available formats. Signed-off-by: Steven Rostedt --- kernel/trace/trace_printk.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index dff763b..1f06468 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -240,13 +240,10 @@ static const char **find_next(void *v, loff_t *pos) const char **fmt = v; int start_index; - if (!fmt) - fmt = __start___trace_bprintk_fmt + *pos; - start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; if (*pos < start_index) - return fmt; + return __start___trace_bprintk_fmt + *pos; return find_next_mod_format(start_index, v, fmt, pos); } -- cgit v1.1 From 9a432736904d386cda28b987b38ba14dae960ecc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 30 May 2011 20:38:55 -0700 Subject: rcu: Simplify curing of load woes Make the functions creating the kthreads wake them up. Leverage the fact that the per-node and boost kthreads can run anywhere, thus dispensing with the need to wake them up once the incoming CPU has gone fully online. Signed-off-by: Paul E. McKenney Tested-by: Daniel J Blueman --- kernel/rcutree.c | 65 ++++++++++++++++--------------------------------- kernel/rcutree_plugin.h | 11 +-------- 2 files changed, 22 insertions(+), 54 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 89419ff..0a8ec5b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1635,6 +1635,20 @@ static int rcu_cpu_kthread(void *arg) * to manipulate rcu_cpu_kthread_task. There might be another CPU * attempting to access it during boot, but the locking in kthread_bind() * will enforce sufficient ordering. + * + * Please note that we cannot simply refuse to wake up the per-CPU + * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, + * which can result in softlockup complaints if the task ends up being + * idle for more than a couple of minutes. + * + * However, please note also that we cannot bind the per-CPU kthread to its + * CPU until that CPU is fully online. We also cannot wait until the + * CPU is fully online before we create its per-CPU kthread, as this would + * deadlock the system when CPU notifiers tried waiting for grace + * periods. So we bind the per-CPU kthread to its CPU only if the CPU + * is online. If its CPU is not yet fully online, then the code in + * rcu_cpu_kthread() will wait until it is fully online, and then do + * the binding. */ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) { @@ -1647,12 +1661,14 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); if (IS_ERR(t)) return PTR_ERR(t); - kthread_bind(t, cpu); + if (cpu_online(cpu)) + kthread_bind(t, cpu); per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - per_cpu(rcu_cpu_kthread_task, cpu) = t; sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + per_cpu(rcu_cpu_kthread_task, cpu) = t; + wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ return 0; } @@ -1759,12 +1775,11 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = 99; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ } return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } -static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); - /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1772,30 +1787,18 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; - struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) { + if (cpu_online(cpu)) (void)rcu_spawn_one_cpu_kthread(cpu); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t) - wake_up_process(t); - } } rnp = rcu_get_root(rcu_state); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (rnp->node_kthread_task) - wake_up_process(rnp->node_kthread_task); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) { + rcu_for_each_leaf_node(rcu_state, rnp) (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - t = rnp->node_kthread_task; - if (t) - wake_up_process(t); - rcu_wake_one_boost_kthread(rnp); - } } return 0; } @@ -2221,31 +2224,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) } /* - * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, - * but the RCU threads are woken on demand, and if demand is low this - * could be a while triggering the hung task watchdog. - * - * In order to avoid this, poke all tasks once the CPU is fully - * up and running. - */ -static void __cpuinit rcu_online_kthreads(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); - struct rcu_node *rnp = rdp->mynode; - struct task_struct *t; - - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t) - wake_up_process(t); - - t = rnp->node_kthread_task; - if (t) - wake_up_process(t); - - rcu_wake_one_boost_kthread(rnp); -} - -/* * Handle CPU online/offline notification events. */ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, @@ -2262,7 +2240,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: - rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c8bff30..ea2e2fb 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1299,15 +1299,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ return 0; } -static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) -{ - if (rnp->boost_kthread_task) - wake_up_process(rnp->boost_kthread_task); -} - #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1331,10 +1326,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } -static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) -{ -} - #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP -- cgit v1.1 From 09223371deac67d08ca0b70bd18787920284c967 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 14 Jun 2011 13:26:25 +0800 Subject: rcu: Use softirq to address performance regression Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread) introduced performance regression. In an AIM7 test, this commit degraded performance by about 40%. The commit runs rcu callbacks in a kthread instead of softirq. We observed high rate of context switch which is caused by this. Out test system has 64 CPUs and HZ is 1000, so we saw more than 64k context switch per second which is caused by RCU's per-CPU kthread. A trace showed that most of the time the RCU per-CPU kthread doesn't actually handle any callbacks, but instead just does a very small amount of work handling grace periods. This means that RCU's per-CPU kthreads are making the scheduler do quite a bit of work in order to allow a very small amount of RCU-related processing to be done. Alex Shi's analysis determined that this slowdown is due to lock contention within the scheduler. Unfortunately, as Peter Zijlstra points out, the scheduler's real-time semantics require global action, which means that this contention is inherent in real-time scheduling. (Yes, perhaps someone will come up with a workaround -- otherwise, -rt is not going to do well on large SMP systems -- but this patch will work around this issue in the meantime. And "the meantime" might well be forever.) This patch therefore re-introduces softirq processing to RCU, but only for core RCU work. RCU callbacks are still executed in kthread context, so that only a small amount of RCU work runs in softirq context in the common case. This should minimize ksoftirqd execution, allowing us to skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels. Signed-off-by: Shaohua Li Tested-by: "Alex,Shi" Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 23 +++++++++++++++++++---- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 9 +++++++++ kernel/softirq.c | 2 +- 4 files changed, 30 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0a8ec5b..ae5c9ea 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -100,6 +100,7 @@ static char rcu_kthreads_spawnable; static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_cpu_kthread(void); +static void __invoke_rcu_cpu_kthread(void); #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ @@ -1442,13 +1443,21 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) } /* If there are callbacks ready, invoke them. */ - rcu_do_batch(rsp, rdp); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + __invoke_rcu_cpu_kthread(); +} + +static void rcu_kthread_do_work(void) +{ + rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_preempt_do_callbacks(); } /* * Do softirq processing for the current CPU. */ -static void rcu_process_callbacks(void) +static void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1465,7 +1474,7 @@ static void rcu_process_callbacks(void) * the current CPU with interrupts disabled, the rcu_cpu_kthread_task * cannot disappear out from under us. */ -static void invoke_rcu_cpu_kthread(void) +static void __invoke_rcu_cpu_kthread(void) { unsigned long flags; @@ -1479,6 +1488,11 @@ static void invoke_rcu_cpu_kthread(void) local_irq_restore(flags); } +static void invoke_rcu_cpu_kthread(void) +{ + raise_softirq(RCU_SOFTIRQ); +} + /* * Wake up the specified per-rcu_node-structure kthread. * Because the per-rcu_node kthreads are immortal, we don't need @@ -1613,7 +1627,7 @@ static int rcu_cpu_kthread(void *arg) *workp = 0; local_irq_restore(flags); if (work) - rcu_process_callbacks(); + rcu_kthread_do_work(); local_bh_enable(); if (*workp != 0) spincnt++; @@ -2387,6 +2401,7 @@ void __init rcu_init(void) rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* * We don't need protection against CPU-hotplug here because diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7b9a08b..0fed6b9 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -439,6 +439,7 @@ static void rcu_preempt_offline_cpu(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_process_callbacks(void); +static void rcu_preempt_do_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ea2e2fb..38d09c5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -602,6 +602,11 @@ static void rcu_preempt_process_callbacks(void) &__get_cpu_var(rcu_preempt_data)); } +static void rcu_preempt_do_callbacks(void) +{ + rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); +} + /* * Queue a preemptible-RCU callback for invocation after a grace period. */ @@ -997,6 +1002,10 @@ static void rcu_preempt_process_callbacks(void) { } +static void rcu_preempt_do_callbacks(void) +{ +} + /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. diff --git a/kernel/softirq.c b/kernel/softirq.c index 1396017..40cf63d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -58,7 +58,7 @@ DEFINE_PER_CPU(struct task_struct *, ksoftirqd); char *softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", - "TASKLET", "SCHED", "HRTIMER" + "TASKLET", "SCHED", "HRTIMER", "RCU" }; /* -- cgit v1.1 From ada9c93312f7ec49514c68c211595ce2601cebae Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 14 Jun 2011 15:50:11 -0700 Subject: signal.c: fix kernel-doc notation Fix kernel-doc warnings in signal.c: Warning(kernel/signal.c:2374): No description found for parameter 'nset' Warning(kernel/signal.c:2374): Excess function parameter 'set' description in 'sys_rt_sigprocmask' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 86c32b8..ff76786 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2365,7 +2365,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) /** * sys_rt_sigprocmask - change the list of currently blocked signals * @how: whether to add, remove, or set signals - * @set: stores pending signals + * @nset: stores pending signals * @oset: previous value of signal mask if non-null * @sigsetsize: size of sigset_t type */ -- cgit v1.1 From 8dd0de8be31b4b966d17750a0b10df2f575c91ac Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Tue, 14 Jun 2011 18:36:24 -0400 Subject: sched: Fix need_resched() when checking peempt The RT preempt check tests the wrong task if NEED_RESCHED is set. It currently checks the local CPU task. It is supposed to check the task that is running on the runqueue we are about to wake another task on. Signed-off-by: Hillf Danton Reviewed-by: Yong Zhang Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110614223657.450239027@goodmis.org Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 88725c9..9b8d5dc 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1096,7 +1096,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag * to move current somewhere else, making room for our non-migratable * task. */ - if (p->prio == rq->curr->prio && !need_resched()) + if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) check_preempt_equal_prio(rq, p); #endif } -- cgit v1.1 From 0da938c44921cfb690283d3b0c9c48a10375db2c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Jun 2011 18:36:25 -0400 Subject: sched: Check if lowest_mask is initialized in find_lowest_rq() On system boot up, the lowest_mask is initialized with an early_initcall(). But RT tasks may wake up on other early_initcall() callers before the lowest_mask is initialized, causing a system crash. Commit "d72bce0e67 rcu: Cure load woes" was the first commit to wake up RT tasks in early init. Before this commit this bug should not happen. Reported-by: Andrew Theurer Tested-by: Andrew Theurer Tested-by: Paul E. McKenney Signed-off-by: Steven Rostedt Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20110614223657.824872966@goodmis.org Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 9b8d5dc..10d0182 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1239,6 +1239,10 @@ static int find_lowest_rq(struct task_struct *task) int this_cpu = smp_processor_id(); int cpu = task_cpu(task); + /* Make sure the mask is initialized first */ + if (unlikely(!lowest_mask)) + return -1; + if (task->rt.nr_cpus_allowed == 1) return -1; /* No other targets possible */ -- cgit v1.1 From 733eda7ac316cd4e550fa096e4ed42356dc546e7 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 15 Jun 2011 15:08:43 -0700 Subject: memcg: clear mm->owner when last possible owner leaves The following crash was reported: > Call Trace: > [] mem_cgroup_from_task+0x15/0x17 > [] __mem_cgroup_try_charge+0x148/0x4b4 > [] ? need_resched+0x23/0x2d > [] ? preempt_schedule+0x46/0x4f > [] mem_cgroup_charge_common+0x9a/0xce > [] mem_cgroup_newpage_charge+0x5d/0x5f > [] khugepaged+0x5da/0xfaf > [] ? __init_waitqueue_head+0x4b/0x4b > [] ? add_mm_counter.constprop.5+0x13/0x13 > [] kthread+0xa8/0xb0 > [] ? sub_preempt_count+0xa1/0xb4 > [] kernel_thread_helper+0x4/0x10 > [] ? retint_restore_args+0x13/0x13 > [] ? __init_kthread_worker+0x5a/0x5a What happens is that khugepaged tries to charge a huge page against an mm whose last possible owner has already exited, and the memory controller crashes when the stale mm->owner is used to look up the cgroup to charge. mm->owner has never been set to NULL with the last owner going away, but nobody cared until khugepaged came along. Even then it wasn't a problem because the final mmput() on an mm was forced to acquire and release mmap_sem in write-mode, preventing an exiting owner to go away while the mmap_sem was held, and until "692e0b3 mm: thp: optimize memcg charge in khugepaged", the memory cgroup charge was protected by mmap_sem in read-mode. Instead of going back to relying on the mmap_sem to enforce lifetime of a task, this patch ensures that mm->owner is properly set to NULL when the last possible owner is exiting, which the memory controller can handle just fine. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Hugh Dickins Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Johannes Weiner Reported-by: Hugh Dickins Reported-by: Dave Jones Reviewed-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 20a4064..f2b321b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -561,29 +561,28 @@ void exit_files(struct task_struct *tsk) #ifdef CONFIG_MM_OWNER /* - * Task p is exiting and it owned mm, lets find a new owner for it + * A task is exiting. If it owned this mm, find a new owner for the mm. */ -static inline int -mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) -{ - /* - * If there are other users of the mm and the owner (us) is exiting - * we need to find a new owner to take on the responsibility. - */ - if (atomic_read(&mm->mm_users) <= 1) - return 0; - if (mm->owner != p) - return 0; - return 1; -} - void mm_update_next_owner(struct mm_struct *mm) { struct task_struct *c, *g, *p = current; retry: - if (!mm_need_new_owner(mm, p)) + /* + * If the exiting or execing task is not the owner, it's + * someone else's problem. + */ + if (mm->owner != p) return; + /* + * The current owner is exiting/execing and there are no other + * candidates. Do not leave the mm pointing to a possibly + * freed task structure. + */ + if (atomic_read(&mm->mm_users) <= 1) { + mm->owner = NULL; + return; + } read_lock(&tasklist_lock); /* -- cgit v1.1 From d2c32258798f813dc2be6cbc32f78aa5ac5cb205 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 15 Jun 2011 15:08:47 -0700 Subject: gcov: disable CONFIG_CONSTRUCTORS when not needed by CONFIG_GCOV_KERNEL CONFIG_CONSTRUCTORS controls support for running constructor functions at kernel init time. According to commit b99b87f70c7785ab ("kernel: constructor support"), gcov (CONFIG_GCOV_KERNEL) needs this. However, CONFIG_CONSTRUCTORS currently defaults to y, with no option to disable it, and CONFIG_GCOV_KERNEL depends on it. Instead, default it to n and have CONFIG_GCOV_KERNEL select it, so that the normal case of CONFIG_GCOV_KERNEL=n will result in CONFIG_CONSTRUCTORS=n. Observed in the short list of =y values in a minimal kernel configuration. Signed-off-by: Josh Triplett Acked-by: WANG Cong Acked-by: Peter Oberparleiter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index b8cadf7..5bf924d 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -2,7 +2,8 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" - depends on DEBUG_FS && CONSTRUCTORS + depends on DEBUG_FS + select CONSTRUCTORS default n ---help--- This option enables gcov-based code profiling (e.g. for code coverage -- cgit v1.1 From a46e0899eec7a3069bcadd45dfba7bf67c6ed016 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 15 Jun 2011 15:47:09 -0700 Subject: rcu: use softirq instead of kthreads except when RCU_BOOST=y This patch #ifdefs RCU kthreads out of the kernel unless RCU_BOOST=y, thus eliminating context-switch overhead if RCU priority boosting has not been configured. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 59 ++++++++++++++++++++++++++++++++++++------------- kernel/rcutree.h | 8 +++++-- kernel/rcutree_plugin.h | 41 +++++++++++++++++++++------------- kernel/rcutree_trace.c | 32 +++++++++++++++++++-------- 4 files changed, 99 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ae5c9ea..429d494 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -87,6 +87,8 @@ static struct rcu_state *rcu_state; int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#ifdef CONFIG_RCU_BOOST + /* * Control variables for per-CPU and per-rcu_node kthreads. These * handle all flavors of RCU. @@ -98,9 +100,11 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); static char rcu_kthreads_spawnable; +#endif /* #ifdef CONFIG_RCU_BOOST */ + static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); -static void invoke_rcu_cpu_kthread(void); -static void __invoke_rcu_cpu_kthread(void); +static void invoke_rcu_core(void); +static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ @@ -1089,6 +1093,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp; +#ifdef CONFIG_RCU_BOOST struct task_struct *t; /* Stop the CPU's kthread. */ @@ -1097,6 +1102,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) per_cpu(rcu_cpu_kthread_task, cpu) = NULL; kthread_stop(t); } +#endif /* #ifdef CONFIG_RCU_BOOST */ /* Exclude any attempts to start a new grace period. */ raw_spin_lock_irqsave(&rsp->onofflock, flags); @@ -1232,7 +1238,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* Re-raise the RCU softirq if there are callbacks remaining. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); } /* @@ -1278,7 +1284,7 @@ void rcu_check_callbacks(int cpu, int user) } rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); } #ifdef CONFIG_SMP @@ -1444,9 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) /* If there are callbacks ready, invoke them. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) - __invoke_rcu_cpu_kthread(); + invoke_rcu_callbacks(rsp, rdp); } +#ifdef CONFIG_RCU_BOOST + static void rcu_kthread_do_work(void) { rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1454,6 +1462,8 @@ static void rcu_kthread_do_work(void) rcu_preempt_do_callbacks(); } +#endif /* #ifdef CONFIG_RCU_BOOST */ + /* * Do softirq processing for the current CPU. */ @@ -1474,25 +1484,22 @@ static void rcu_process_callbacks(struct softirq_action *unused) * the current CPU with interrupts disabled, the rcu_cpu_kthread_task * cannot disappear out from under us. */ -static void __invoke_rcu_cpu_kthread(void) +static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) { - unsigned long flags; - - local_irq_save(flags); - __this_cpu_write(rcu_cpu_has_work, 1); - if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { - local_irq_restore(flags); + if (likely(!rsp->boost)) { + rcu_do_batch(rsp, rdp); return; } - wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); - local_irq_restore(flags); + invoke_rcu_callbacks_kthread(); } -static void invoke_rcu_cpu_kthread(void) +static void invoke_rcu_core(void) { raise_softirq(RCU_SOFTIRQ); } +#ifdef CONFIG_RCU_BOOST + /* * Wake up the specified per-rcu_node-structure kthread. * Because the per-rcu_node kthreads are immortal, we don't need @@ -1818,6 +1825,18 @@ static int __init rcu_spawn_kthreads(void) } early_initcall(rcu_spawn_kthreads); +#else /* #ifdef CONFIG_RCU_BOOST */ + +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +{ +} + +static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_state *rsp) @@ -2224,6 +2243,8 @@ static void __cpuinit rcu_prepare_cpu(int cpu) rcu_preempt_init_percpu_data(cpu); } +#ifdef CONFIG_RCU_BOOST + static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); @@ -2237,6 +2258,14 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) } } +#else /* #ifdef CONFIG_RCU_BOOST */ + +static void __cpuinit rcu_prepare_kthreads(int cpu) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ + /* * Handle CPU online/offline notification events. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 0fed6b9..434288c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -369,6 +369,7 @@ struct rcu_state { /* period because */ /* force_quiescent_state() */ /* was running. */ + u8 boost; /* Subject to priority boost. */ unsigned long gpnum; /* Current gp number. */ unsigned long completed; /* # of last completed gp. */ @@ -439,7 +440,6 @@ static void rcu_preempt_offline_cpu(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_process_callbacks(void); -static void rcu_preempt_do_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); @@ -451,11 +451,15 @@ static void rcu_preempt_send_cbs_to_online(void); static void __init __rcu_init_preempt(void); static void rcu_needs_cpu_flush(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); +static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); +static void invoke_rcu_callbacks_kthread(void); +#ifdef CONFIG_RCU_BOOST +static void rcu_preempt_do_callbacks(void); static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, cpumask_var_t cm); -static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp, int rnp_index); +#endif /* #ifdef CONFIG_RCU_BOOST */ #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 38d09c5..2772386 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -602,11 +602,15 @@ static void rcu_preempt_process_callbacks(void) &__get_cpu_var(rcu_preempt_data)); } +#ifdef CONFIG_RCU_BOOST + static void rcu_preempt_do_callbacks(void) { rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); } +#endif /* #ifdef CONFIG_RCU_BOOST */ + /* * Queue a preemptible-RCU callback for invocation after a grace period. */ @@ -1002,10 +1006,6 @@ static void rcu_preempt_process_callbacks(void) { } -static void rcu_preempt_do_callbacks(void) -{ -} - /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. @@ -1258,6 +1258,23 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) } /* + * Wake up the per-CPU kthread to invoke RCU callbacks. + */ +static void invoke_rcu_callbacks_kthread(void) +{ + unsigned long flags; + + local_irq_save(flags); + __this_cpu_write(rcu_cpu_has_work, 1); + if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { + local_irq_restore(flags); + return; + } + wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); + local_irq_restore(flags); +} + +/* * Set the affinity of the boost kthread. The CPU-hotplug locks are * held, so no one should be messing with the existence of the boost * kthread. @@ -1297,6 +1314,7 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (&rcu_preempt_state != rsp) return 0; + rsp->boost = 1; if (rnp->boost_kthread_task != NULL) return 0; t = kthread_create(rcu_boost_kthread, (void *)rnp, @@ -1319,22 +1337,15 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) raw_spin_unlock_irqrestore(&rnp->lock, flags); } -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, - cpumask_var_t cm) +static void invoke_rcu_callbacks_kthread(void) { + WARN_ON_ONCE(1); } static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } -static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, - struct rcu_node *rnp, - int rnp_index) -{ - return 0; -} - #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP @@ -1509,7 +1520,7 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); * * Because it is not legal to invoke rcu_process_callbacks() with irqs * disabled, we do one pass of force_quiescent_state(), then do a - * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked + * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. */ int rcu_needs_cpu(int cpu) @@ -1560,7 +1571,7 @@ int rcu_needs_cpu(int cpu) /* If RCU callbacks are still pending, RCU still needs this CPU. */ if (c) - invoke_rcu_cpu_kthread(); + invoke_rcu_core(); return c; } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 9678cc3..4e14487 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,6 +46,8 @@ #define RCU_TREE_NONCORE #include "rcutree.h" +#ifdef CONFIG_RCU_BOOST + DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu); DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); @@ -58,6 +60,8 @@ static char convert_kthread_status(unsigned int kthread_status) return "SRWOY"[kthread_status]; } +#endif /* #ifdef CONFIG_RCU_BOOST */ + static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) @@ -76,7 +80,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, " ql=%ld qs=%c%c%c%c kt=%d/%c/%d ktl=%x b=%ld", + seq_printf(m, " ql=%ld qs=%c%c%c%c", rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]], @@ -84,13 +88,16 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->nxttail[RCU_NEXT_READY_TAIL]], ".W"[rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_WAIT_TAIL]], - ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]], + ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); +#ifdef CONFIG_RCU_BOOST + seq_printf(m, " kt=%d/%c/%d ktl=%x", per_cpu(rcu_cpu_has_work, rdp->cpu), convert_kthread_status(per_cpu(rcu_cpu_kthread_status, rdp->cpu)), per_cpu(rcu_cpu_kthread_cpu, rdp->cpu), - per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff, - rdp->blimit); + per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff); +#endif /* #ifdef CONFIG_RCU_BOOST */ + seq_printf(m, " b=%ld", rdp->blimit); seq_printf(m, " ci=%lu co=%lu ca=%lu\n", rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } @@ -147,18 +154,21 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, ",%ld,\"%c%c%c%c\",%d,\"%c\",%ld", rdp->qlen, + seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]], ".R"[rdp->nxttail[RCU_WAIT_TAIL] != rdp->nxttail[RCU_NEXT_READY_TAIL]], ".W"[rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_WAIT_TAIL]], - ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]], + ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); +#ifdef CONFIG_RCU_BOOST + seq_printf(m, ",%d,\"%c\"", per_cpu(rcu_cpu_has_work, rdp->cpu), convert_kthread_status(per_cpu(rcu_cpu_kthread_status, - rdp->cpu)), - rdp->blimit); + rdp->cpu))); +#endif /* #ifdef CONFIG_RCU_BOOST */ + seq_printf(m, ",%ld", rdp->blimit); seq_printf(m, ",%lu,%lu,%lu\n", rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted); } @@ -169,7 +179,11 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) #ifdef CONFIG_NO_HZ seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ - seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); + seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); +#ifdef CONFIG_RCU_BOOST + seq_puts(m, "\"kt\",\"ktl\""); +#endif /* #ifdef CONFIG_RCU_BOOST */ + seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n"); #ifdef CONFIG_TREE_PREEMPT_RCU seq_puts(m, "\"rcu_preempt:\"\n"); PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); -- cgit v1.1 From b5199515c25cca622495eb9c6a8a1d275e775088 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jun 2011 16:22:08 +0200 Subject: clocksource: Make watchdog robust vs. interruption The clocksource watchdog code is interruptible and it has been observed that this can trigger false positives which disable the TSC. The reason is that an interrupt storm or a long running interrupt handler between the read of the watchdog source and the read of the TSC brings the two far enough apart that the delta is larger than the unstable treshold. Move both reads into a short interrupt disabled region to avoid that. Reported-and-tested-by: Vernon Mauery Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- kernel/time/clocksource.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c95fd6..e0980f0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -185,7 +185,6 @@ static struct clocksource *watchdog; static struct timer_list watchdog_timer; static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); -static cycle_t watchdog_last; static int watchdog_running; static int clocksource_watchdog_kthread(void *data); @@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data) if (!watchdog_running) goto out; - wdnow = watchdog->read(watchdog); - wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, - watchdog->mult, watchdog->shift); - watchdog_last = wdnow; - list_for_each_entry(cs, &watchdog_list, wd_list) { /* Clocksource already marked unstable? */ @@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data) continue; } + local_irq_disable(); csnow = cs->read(cs); + wdnow = watchdog->read(watchdog); + local_irq_enable(); /* Clocksource initialized ? */ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { cs->flags |= CLOCK_SOURCE_WATCHDOG; - cs->wd_last = csnow; + cs->wd_last = wdnow; + cs->cs_last = csnow; continue; } - /* Check the deviation from the watchdog clocksource. */ - cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, + watchdog->mult, watchdog->shift); + + cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) & cs->mask, cs->mult, cs->shift); - cs->wd_last = csnow; + cs->cs_last = csnow; + cs->wd_last = wdnow; + + /* Check the deviation from the watchdog clocksource. */ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); continue; @@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void) return; init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; - watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); watchdog_running = 1; -- cgit v1.1 From f8b7fc6b514f34a51875dd48dff70d4d17a54f38 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Jun 2011 08:26:32 -0700 Subject: rcu: Move RCU_BOOST #ifdefs to header file The commit "use softirq instead of kthreads except when RCU_BOOST=y" just applied #ifdef in place. This commit is a cleanup that moves the newly #ifdef'ed code to the header file kernel/rcutree_plugin.h. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 383 +---------------------------------------------- kernel/rcutree.h | 5 + kernel/rcutree_plugin.h | 384 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+), 382 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 429d494..7e59ffb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1093,16 +1093,8 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp; -#ifdef CONFIG_RCU_BOOST - struct task_struct *t; - /* Stop the CPU's kthread. */ - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t != NULL) { - per_cpu(rcu_cpu_kthread_task, cpu) = NULL; - kthread_stop(t); - } -#endif /* #ifdef CONFIG_RCU_BOOST */ + rcu_stop_cpu_kthread(cpu); /* Exclude any attempts to start a new grace period. */ raw_spin_lock_irqsave(&rsp->onofflock, flags); @@ -1453,17 +1445,6 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) invoke_rcu_callbacks(rsp, rdp); } -#ifdef CONFIG_RCU_BOOST - -static void rcu_kthread_do_work(void) -{ - rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); - rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); - rcu_preempt_do_callbacks(); -} - -#endif /* #ifdef CONFIG_RCU_BOOST */ - /* * Do softirq processing for the current CPU. */ @@ -1498,345 +1479,6 @@ static void invoke_rcu_core(void) raise_softirq(RCU_SOFTIRQ); } -#ifdef CONFIG_RCU_BOOST - -/* - * Wake up the specified per-rcu_node-structure kthread. - * Because the per-rcu_node kthreads are immortal, we don't need - * to do anything to keep them alive. - */ -static void invoke_rcu_node_kthread(struct rcu_node *rnp) -{ - struct task_struct *t; - - t = rnp->node_kthread_task; - if (t != NULL) - wake_up_process(t); -} - -/* - * Set the specified CPU's kthread to run RT or not, as specified by - * the to_rt argument. The CPU-hotplug locks are held, so the task - * is not going away. - */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) -{ - int policy; - struct sched_param sp; - struct task_struct *t; - - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (t == NULL) - return; - if (to_rt) { - policy = SCHED_FIFO; - sp.sched_priority = RCU_KTHREAD_PRIO; - } else { - policy = SCHED_NORMAL; - sp.sched_priority = 0; - } - sched_setscheduler_nocheck(t, policy, &sp); -} - -/* - * Timer handler to initiate the waking up of per-CPU kthreads that - * have yielded the CPU due to excess numbers of RCU callbacks. - * We wake up the per-rcu_node kthread, which in turn will wake up - * the booster kthread. - */ -static void rcu_cpu_kthread_timer(unsigned long arg) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); - struct rcu_node *rnp = rdp->mynode; - - atomic_or(rdp->grpmask, &rnp->wakemask); - invoke_rcu_node_kthread(rnp); -} - -/* - * Drop to non-real-time priority and yield, but only after posting a - * timer that will cause us to regain our real-time priority if we - * remain preempted. Either way, we restore our real-time priority - * before returning. - */ -static void rcu_yield(void (*f)(unsigned long), unsigned long arg) -{ - struct sched_param sp; - struct timer_list yield_timer; - - setup_timer_on_stack(&yield_timer, f, arg); - mod_timer(&yield_timer, jiffies + 2); - sp.sched_priority = 0; - sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); - set_user_nice(current, 19); - schedule(); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); - del_timer(&yield_timer); -} - -/* - * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. - * This can happen while the corresponding CPU is either coming online - * or going offline. We cannot wait until the CPU is fully online - * before starting the kthread, because the various notifier functions - * can wait for RCU grace periods. So we park rcu_cpu_kthread() until - * the corresponding CPU is online. - * - * Return 1 if the kthread needs to stop, 0 otherwise. - * - * Caller must disable bh. This function can momentarily enable it. - */ -static int rcu_cpu_kthread_should_stop(int cpu) -{ - while (cpu_is_offline(cpu) || - !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || - smp_processor_id() != cpu) { - if (kthread_should_stop()) - return 1; - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; - per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); - local_bh_enable(); - schedule_timeout_uninterruptible(1); - if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) - set_cpus_allowed_ptr(current, cpumask_of(cpu)); - local_bh_disable(); - } - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - return 0; -} - -/* - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the - * earlier RCU softirq. - */ -static int rcu_cpu_kthread(void *arg) -{ - int cpu = (int)(long)arg; - unsigned long flags; - int spincnt = 0; - unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - char work; - char *workp = &per_cpu(rcu_cpu_has_work, cpu); - - for (;;) { - *statusp = RCU_KTHREAD_WAITING; - rcu_wait(*workp != 0 || kthread_should_stop()); - local_bh_disable(); - if (rcu_cpu_kthread_should_stop(cpu)) { - local_bh_enable(); - break; - } - *statusp = RCU_KTHREAD_RUNNING; - per_cpu(rcu_cpu_kthread_loops, cpu)++; - local_irq_save(flags); - work = *workp; - *workp = 0; - local_irq_restore(flags); - if (work) - rcu_kthread_do_work(); - local_bh_enable(); - if (*workp != 0) - spincnt++; - else - spincnt = 0; - if (spincnt > 10) { - *statusp = RCU_KTHREAD_YIELDING; - rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); - spincnt = 0; - } - } - *statusp = RCU_KTHREAD_STOPPED; - return 0; -} - -/* - * Spawn a per-CPU kthread, setting up affinity and priority. - * Because the CPU hotplug lock is held, no other CPU will be attempting - * to manipulate rcu_cpu_kthread_task. There might be another CPU - * attempting to access it during boot, but the locking in kthread_bind() - * will enforce sufficient ordering. - * - * Please note that we cannot simply refuse to wake up the per-CPU - * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, - * which can result in softlockup complaints if the task ends up being - * idle for more than a couple of minutes. - * - * However, please note also that we cannot bind the per-CPU kthread to its - * CPU until that CPU is fully online. We also cannot wait until the - * CPU is fully online before we create its per-CPU kthread, as this would - * deadlock the system when CPU notifiers tried waiting for grace - * periods. So we bind the per-CPU kthread to its CPU only if the CPU - * is online. If its CPU is not yet fully online, then the code in - * rcu_cpu_kthread() will wait until it is fully online, and then do - * the binding. - */ -static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) -{ - struct sched_param sp; - struct task_struct *t; - - if (!rcu_kthreads_spawnable || - per_cpu(rcu_cpu_kthread_task, cpu) != NULL) - return 0; - t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); - if (IS_ERR(t)) - return PTR_ERR(t); - if (cpu_online(cpu)) - kthread_bind(t, cpu); - per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; - WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ - return 0; -} - -/* - * Per-rcu_node kthread, which is in charge of waking up the per-CPU - * kthreads when needed. We ignore requests to wake up kthreads - * for offline CPUs, which is OK because force_quiescent_state() - * takes care of this case. - */ -static int rcu_node_kthread(void *arg) -{ - int cpu; - unsigned long flags; - unsigned long mask; - struct rcu_node *rnp = (struct rcu_node *)arg; - struct sched_param sp; - struct task_struct *t; - - for (;;) { - rnp->node_kthread_status = RCU_KTHREAD_WAITING; - rcu_wait(atomic_read(&rnp->wakemask) != 0); - rnp->node_kthread_status = RCU_KTHREAD_RUNNING; - raw_spin_lock_irqsave(&rnp->lock, flags); - mask = atomic_xchg(&rnp->wakemask, 0); - rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { - if ((mask & 0x1) == 0) - continue; - preempt_disable(); - t = per_cpu(rcu_cpu_kthread_task, cpu); - if (!cpu_online(cpu) || t == NULL) { - preempt_enable(); - continue; - } - per_cpu(rcu_cpu_has_work, cpu) = 1; - sp.sched_priority = RCU_KTHREAD_PRIO; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - preempt_enable(); - } - } - /* NOTREACHED */ - rnp->node_kthread_status = RCU_KTHREAD_STOPPED; - return 0; -} - -/* - * Set the per-rcu_node kthread's affinity to cover all CPUs that are - * served by the rcu_node in question. The CPU hotplug lock is still - * held, so the value of rnp->qsmaskinit will be stable. - * - * We don't include outgoingcpu in the affinity set, use -1 if there is - * no outgoing CPU. If there are no CPUs left in the affinity set, - * this function allows the kthread to execute on any CPU. - */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ - cpumask_var_t cm; - int cpu; - unsigned long mask = rnp->qsmaskinit; - - if (rnp->node_kthread_task == NULL) - return; - if (!alloc_cpumask_var(&cm, GFP_KERNEL)) - return; - cpumask_clear(cm); - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) - if ((mask & 0x1) && cpu != outgoingcpu) - cpumask_set_cpu(cpu, cm); - if (cpumask_weight(cm) == 0) { - cpumask_setall(cm); - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) - cpumask_clear_cpu(cpu, cm); - WARN_ON_ONCE(cpumask_weight(cm) == 0); - } - set_cpus_allowed_ptr(rnp->node_kthread_task, cm); - rcu_boost_kthread_setaffinity(rnp, cm); - free_cpumask_var(cm); -} - -/* - * Spawn a per-rcu_node kthread, setting priority and affinity. - * Called during boot before online/offline can happen, or, if - * during runtime, with the main CPU-hotplug locks held. So only - * one of these can be executing at a time. - */ -static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, - struct rcu_node *rnp) -{ - unsigned long flags; - int rnp_index = rnp - &rsp->node[0]; - struct sched_param sp; - struct task_struct *t; - - if (!rcu_kthreads_spawnable || - rnp->qsmaskinit == 0) - return 0; - if (rnp->node_kthread_task == NULL) { - t = kthread_create(rcu_node_kthread, (void *)rnp, - "rcun%d", rnp_index); - if (IS_ERR(t)) - return PTR_ERR(t); - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->node_kthread_task = t; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - sp.sched_priority = 99; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ - } - return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); -} - -/* - * Spawn all kthreads -- called as soon as the scheduler is running. - */ -static int __init rcu_spawn_kthreads(void) -{ - int cpu; - struct rcu_node *rnp; - - rcu_kthreads_spawnable = 1; - for_each_possible_cpu(cpu) { - per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) - (void)rcu_spawn_one_cpu_kthread(cpu); - } - rnp = rcu_get_root(rcu_state); - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } - return 0; -} -early_initcall(rcu_spawn_kthreads); - -#else /* #ifdef CONFIG_RCU_BOOST */ - -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ -} - -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_state *rsp) @@ -2243,29 +1885,6 @@ static void __cpuinit rcu_prepare_cpu(int cpu) rcu_preempt_init_percpu_data(cpu); } -#ifdef CONFIG_RCU_BOOST - -static void __cpuinit rcu_prepare_kthreads(int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); - struct rcu_node *rnp = rdp->mynode; - - /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ - if (rcu_kthreads_spawnable) { - (void)rcu_spawn_one_cpu_kthread(cpu); - if (rnp->node_kthread_task == NULL) - (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - } -} - -#else /* #ifdef CONFIG_RCU_BOOST */ - -static void __cpuinit rcu_prepare_kthreads(int cpu) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_BOOST */ - /* * Handle CPU online/offline notification events. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 434288c..01b2ccd 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -427,6 +427,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags); +static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static void rcu_print_task_stall(struct rcu_node *rnp); @@ -460,6 +461,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp, int rnp_index); +static void invoke_rcu_node_kthread(struct rcu_node *rnp); +static void rcu_yield(void (*f)(unsigned long), unsigned long arg); #endif /* #ifdef CONFIG_RCU_BOOST */ +static void rcu_cpu_kthread_setrt(int cpu, int to_rt); +static void __cpuinit rcu_prepare_kthreads(int cpu); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 2772386..14dc7dd 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1330,6 +1330,370 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Stop the RCU's per-CPU kthread when its CPU goes offline,. + */ +static void rcu_stop_cpu_kthread(int cpu) +{ + struct task_struct *t; + + /* Stop the CPU's kthread. */ + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t != NULL) { + per_cpu(rcu_cpu_kthread_task, cpu) = NULL; + kthread_stop(t); + } +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + +static void rcu_kthread_do_work(void) +{ + rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_preempt_do_callbacks(); +} + +/* + * Wake up the specified per-rcu_node-structure kthread. + * Because the per-rcu_node kthreads are immortal, we don't need + * to do anything to keep them alive. + */ +static void invoke_rcu_node_kthread(struct rcu_node *rnp) +{ + struct task_struct *t; + + t = rnp->node_kthread_task; + if (t != NULL) + wake_up_process(t); +} + +/* + * Set the specified CPU's kthread to run RT or not, as specified by + * the to_rt argument. The CPU-hotplug locks are held, so the task + * is not going away. + */ +static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +{ + int policy; + struct sched_param sp; + struct task_struct *t; + + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t == NULL) + return; + if (to_rt) { + policy = SCHED_FIFO; + sp.sched_priority = RCU_KTHREAD_PRIO; + } else { + policy = SCHED_NORMAL; + sp.sched_priority = 0; + } + sched_setscheduler_nocheck(t, policy, &sp); +} + +/* + * Timer handler to initiate the waking up of per-CPU kthreads that + * have yielded the CPU due to excess numbers of RCU callbacks. + * We wake up the per-rcu_node kthread, which in turn will wake up + * the booster kthread. + */ +static void rcu_cpu_kthread_timer(unsigned long arg) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); + struct rcu_node *rnp = rdp->mynode; + + atomic_or(rdp->grpmask, &rnp->wakemask); + invoke_rcu_node_kthread(rnp); +} + +/* + * Drop to non-real-time priority and yield, but only after posting a + * timer that will cause us to regain our real-time priority if we + * remain preempted. Either way, we restore our real-time priority + * before returning. + */ +static void rcu_yield(void (*f)(unsigned long), unsigned long arg) +{ + struct sched_param sp; + struct timer_list yield_timer; + + setup_timer_on_stack(&yield_timer, f, arg); + mod_timer(&yield_timer, jiffies + 2); + sp.sched_priority = 0; + sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); + set_user_nice(current, 19); + schedule(); + sp.sched_priority = RCU_KTHREAD_PRIO; + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); + del_timer(&yield_timer); +} + +/* + * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. + * This can happen while the corresponding CPU is either coming online + * or going offline. We cannot wait until the CPU is fully online + * before starting the kthread, because the various notifier functions + * can wait for RCU grace periods. So we park rcu_cpu_kthread() until + * the corresponding CPU is online. + * + * Return 1 if the kthread needs to stop, 0 otherwise. + * + * Caller must disable bh. This function can momentarily enable it. + */ +static int rcu_cpu_kthread_should_stop(int cpu) +{ + while (cpu_is_offline(cpu) || + !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || + smp_processor_id() != cpu) { + if (kthread_should_stop()) + return 1; + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; + per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); + local_bh_enable(); + schedule_timeout_uninterruptible(1); + if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) + set_cpus_allowed_ptr(current, cpumask_of(cpu)); + local_bh_disable(); + } + per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; + return 0; +} + +/* + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the + * earlier RCU softirq. + */ +static int rcu_cpu_kthread(void *arg) +{ + int cpu = (int)(long)arg; + unsigned long flags; + int spincnt = 0; + unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); + char work; + char *workp = &per_cpu(rcu_cpu_has_work, cpu); + + for (;;) { + *statusp = RCU_KTHREAD_WAITING; + rcu_wait(*workp != 0 || kthread_should_stop()); + local_bh_disable(); + if (rcu_cpu_kthread_should_stop(cpu)) { + local_bh_enable(); + break; + } + *statusp = RCU_KTHREAD_RUNNING; + per_cpu(rcu_cpu_kthread_loops, cpu)++; + local_irq_save(flags); + work = *workp; + *workp = 0; + local_irq_restore(flags); + if (work) + rcu_kthread_do_work(); + local_bh_enable(); + if (*workp != 0) + spincnt++; + else + spincnt = 0; + if (spincnt > 10) { + *statusp = RCU_KTHREAD_YIELDING; + rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); + spincnt = 0; + } + } + *statusp = RCU_KTHREAD_STOPPED; + return 0; +} + +/* + * Spawn a per-CPU kthread, setting up affinity and priority. + * Because the CPU hotplug lock is held, no other CPU will be attempting + * to manipulate rcu_cpu_kthread_task. There might be another CPU + * attempting to access it during boot, but the locking in kthread_bind() + * will enforce sufficient ordering. + * + * Please note that we cannot simply refuse to wake up the per-CPU + * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, + * which can result in softlockup complaints if the task ends up being + * idle for more than a couple of minutes. + * + * However, please note also that we cannot bind the per-CPU kthread to its + * CPU until that CPU is fully online. We also cannot wait until the + * CPU is fully online before we create its per-CPU kthread, as this would + * deadlock the system when CPU notifiers tried waiting for grace + * periods. So we bind the per-CPU kthread to its CPU only if the CPU + * is online. If its CPU is not yet fully online, then the code in + * rcu_cpu_kthread() will wait until it is fully online, and then do + * the binding. + */ +static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) +{ + struct sched_param sp; + struct task_struct *t; + + if (!rcu_kthreads_spawnable || + per_cpu(rcu_cpu_kthread_task, cpu) != NULL) + return 0; + t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); + if (IS_ERR(t)) + return PTR_ERR(t); + if (cpu_online(cpu)) + kthread_bind(t, cpu); + per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; + WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); + sp.sched_priority = RCU_KTHREAD_PRIO; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + per_cpu(rcu_cpu_kthread_task, cpu) = t; + wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ + return 0; +} + +/* + * Per-rcu_node kthread, which is in charge of waking up the per-CPU + * kthreads when needed. We ignore requests to wake up kthreads + * for offline CPUs, which is OK because force_quiescent_state() + * takes care of this case. + */ +static int rcu_node_kthread(void *arg) +{ + int cpu; + unsigned long flags; + unsigned long mask; + struct rcu_node *rnp = (struct rcu_node *)arg; + struct sched_param sp; + struct task_struct *t; + + for (;;) { + rnp->node_kthread_status = RCU_KTHREAD_WAITING; + rcu_wait(atomic_read(&rnp->wakemask) != 0); + rnp->node_kthread_status = RCU_KTHREAD_RUNNING; + raw_spin_lock_irqsave(&rnp->lock, flags); + mask = atomic_xchg(&rnp->wakemask, 0); + rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { + if ((mask & 0x1) == 0) + continue; + preempt_disable(); + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (!cpu_online(cpu) || t == NULL) { + preempt_enable(); + continue; + } + per_cpu(rcu_cpu_has_work, cpu) = 1; + sp.sched_priority = RCU_KTHREAD_PRIO; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + preempt_enable(); + } + } + /* NOTREACHED */ + rnp->node_kthread_status = RCU_KTHREAD_STOPPED; + return 0; +} + +/* + * Set the per-rcu_node kthread's affinity to cover all CPUs that are + * served by the rcu_node in question. The CPU hotplug lock is still + * held, so the value of rnp->qsmaskinit will be stable. + * + * We don't include outgoingcpu in the affinity set, use -1 if there is + * no outgoing CPU. If there are no CPUs left in the affinity set, + * this function allows the kthread to execute on any CPU. + */ +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +{ + cpumask_var_t cm; + int cpu; + unsigned long mask = rnp->qsmaskinit; + + if (rnp->node_kthread_task == NULL) + return; + if (!alloc_cpumask_var(&cm, GFP_KERNEL)) + return; + cpumask_clear(cm); + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) + if ((mask & 0x1) && cpu != outgoingcpu) + cpumask_set_cpu(cpu, cm); + if (cpumask_weight(cm) == 0) { + cpumask_setall(cm); + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) + cpumask_clear_cpu(cpu, cm); + WARN_ON_ONCE(cpumask_weight(cm) == 0); + } + set_cpus_allowed_ptr(rnp->node_kthread_task, cm); + rcu_boost_kthread_setaffinity(rnp, cm); + free_cpumask_var(cm); +} + +/* + * Spawn a per-rcu_node kthread, setting priority and affinity. + * Called during boot before online/offline can happen, or, if + * during runtime, with the main CPU-hotplug locks held. So only + * one of these can be executing at a time. + */ +static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, + struct rcu_node *rnp) +{ + unsigned long flags; + int rnp_index = rnp - &rsp->node[0]; + struct sched_param sp; + struct task_struct *t; + + if (!rcu_kthreads_spawnable || + rnp->qsmaskinit == 0) + return 0; + if (rnp->node_kthread_task == NULL) { + t = kthread_create(rcu_node_kthread, (void *)rnp, + "rcun%d", rnp_index); + if (IS_ERR(t)) + return PTR_ERR(t); + raw_spin_lock_irqsave(&rnp->lock, flags); + rnp->node_kthread_task = t; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + sp.sched_priority = 99; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ + } + return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); +} + +/* + * Spawn all kthreads -- called as soon as the scheduler is running. + */ +static int __init rcu_spawn_kthreads(void) +{ + int cpu; + struct rcu_node *rnp; + + rcu_kthreads_spawnable = 1; + for_each_possible_cpu(cpu) { + per_cpu(rcu_cpu_has_work, cpu) = 0; + if (cpu_online(cpu)) + (void)rcu_spawn_one_cpu_kthread(cpu); + } + rnp = rcu_get_root(rcu_state); + (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + if (NUM_RCU_NODES > 1) { + rcu_for_each_leaf_node(rcu_state, rnp) + (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + } + return 0; +} +early_initcall(rcu_spawn_kthreads); + +static void __cpuinit rcu_prepare_kthreads(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_node *rnp = rdp->mynode; + + /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ + if (rcu_kthreads_spawnable) { + (void)rcu_spawn_one_cpu_kthread(cpu); + if (rnp->node_kthread_task == NULL) + (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + } +} + #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1346,6 +1710,26 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } +#ifdef CONFIG_HOTPLUG_CPU + +static void rcu_stop_cpu_kthread(int cpu) +{ +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + +static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +{ +} + +static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +{ +} + +static void __cpuinit rcu_prepare_kthreads(int cpu) +{ +} + #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP -- cgit v1.1 From d8ad7d1123a960cc9f276bd499f9325c6f5e1bd1 Mon Sep 17 00:00:00 2001 From: Takao Indoh Date: Tue, 29 Mar 2011 12:35:04 -0400 Subject: generic-ipi: Fix kexec boot crash by initializing call_single_queue before enabling interrupts There is a problem that kdump(2nd kernel) sometimes hangs up due to a pending IPI from 1st kernel. Kernel panic occurs because IPI comes before call_single_queue is initialized. To fix the crash, rename init_call_single_data() to call_function_init() and call it in start_kernel() so that call_single_queue can be initialized before enabling interrupts. The details of the crash are: (1) 2nd kernel boots up (2) A pending IPI from 1st kernel comes when irqs are first enabled in start_kernel(). (3) Kernel tries to handle the interrupt, but call_single_queue is not initialized yet at this point. As a result, in the generic_smp_call_function_single_interrupt(), NULL pointer dereference occurs when list_replace_init() tries to access &q->list.next. Therefore this patch changes the name of init_call_single_data() to call_function_init() and calls it before local_irq_enable() in start_kernel(). Signed-off-by: Takao Indoh Reviewed-by: WANG Cong Acked-by: Neil Horman Acked-by: Vivek Goyal Acked-by: Peter Zijlstra Cc: Milton Miller Cc: Jens Axboe Cc: Paul E. McKenney Cc: kexec@lists.infradead.org Link: http://lkml.kernel.org/r/D6CBEE2F420741indou.takao@jp.fujitsu.com Signed-off-by: Ingo Molnar --- kernel/smp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 73a1951..fb67dfa 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -74,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { .notifier_call = hotplug_cfd, }; -static int __cpuinit init_call_single_data(void) +void __init call_function_init(void) { void *cpu = (void *)(long)smp_processor_id(); int i; @@ -88,10 +88,7 @@ static int __cpuinit init_call_single_data(void) hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); register_cpu_notifier(&hotplug_cfd_notifier); - - return 0; } -early_initcall(init_call_single_data); /* * csd_lock/csd_unlock used to serialize access to per-cpu csd resources -- cgit v1.1 From 879669961b11e7f40b518784863a259f735a72bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Jun 2011 11:25:59 +0100 Subject: KEYS/DNS: Fix ____call_usermodehelper() to not lose the session keyring ____call_usermodehelper() now erases any credentials set by the subprocess_inf::init() function. The problem is that commit 17f60a7da150 ("capabilites: allow the application of capability limits to usermode helpers") creates and commits new credentials with prepare_kernel_cred() after the call to the init() function. This wipes all keyrings after umh_keys_init() is called. The best way to deal with this is to put the init() call just prior to the commit_creds() call, and pass the cred pointer to init(). That means that umh_keys_init() and suchlike can modify the credentials _before_ they are published and potentially in use by the rest of the system. This prevents request_key() from working as it is prevented from passing the session keyring it set up with the authorisation token to /sbin/request-key, and so the latter can't assume the authority to instantiate the key. This causes the in-kernel DNS resolver to fail with ENOKEY unconditionally. Signed-off-by: David Howells Acked-by: Eric Paris Tested-by: Jeff Layton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kmod.c b/kernel/kmod.c index ad6a81c..47613df 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -156,12 +156,6 @@ static int ____call_usermodehelper(void *data) */ set_user_nice(current, 0); - if (sub_info->init) { - retval = sub_info->init(sub_info); - if (retval) - goto fail; - } - retval = -ENOMEM; new = prepare_kernel_cred(current); if (!new) @@ -173,6 +167,14 @@ static int ____call_usermodehelper(void *data) new->cap_inheritable); spin_unlock(&umh_sysctl_lock); + if (sub_info->init) { + retval = sub_info->init(sub_info, new); + if (retval) { + abort_creds(new); + goto fail; + } + } + commit_creds(new); retval = kernel_execve(sub_info->path, @@ -388,7 +390,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); * context in which call_usermodehelper_exec is called. */ void call_usermodehelper_setfns(struct subprocess_info *info, - int (*init)(struct subprocess_info *info), + int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *info), void *data) { -- cgit v1.1