From f42aa66c19796fd453f434be29a039707aec435f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 Mar 2013 20:43:57 -0400 Subject: tracing: Use stack of calling function for stack tracer commit 87889501d0adfae10e3b0f0e6f2d7536eed9ae84 upstream. Use the stack of stack_trace_call() instead of check_stack() as the test pointer for max stack size. It makes it a bit cleaner and a little more accurate. Adding stable, as a later fix depends on this patch. Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_stack.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b0b53b8..d21f844 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -37,20 +37,21 @@ static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static int last_stack_tracer_enabled; -static inline void check_stack(void) +static inline void +check_stack(unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; int i; - this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1); + this_size = ((unsigned long)stack) & (THREAD_SIZE-1); this_size = THREAD_SIZE - this_size; if (this_size <= max_stack_size) return; /* we do not handle interrupt stacks yet */ - if (!object_is_on_stack(&this_size)) + if (!object_is_on_stack(stack)) return; local_irq_save(flags); @@ -71,7 +72,7 @@ static inline void check_stack(void) * Now find where in the stack these are. */ i = 0; - start = &this_size; + start = stack; top = (unsigned long *) (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE); @@ -110,6 +111,7 @@ static inline void check_stack(void) static void stack_trace_call(unsigned long ip, unsigned long parent_ip) { + unsigned long stack; int cpu; if (unlikely(!ftrace_enabled || stack_trace_disabled)) @@ -122,7 +124,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) if (per_cpu(trace_active, cpu)++ != 0) goto out; - check_stack(); + check_stack(&stack); out: per_cpu(trace_active, cpu)--; -- cgit v1.1 From 53318264d21af2445edd1eb47b4189717d53f288 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 Mar 2013 21:25:35 -0400 Subject: tracing: Fix stack tracer with fentry use commit d4ecbfc49b4b1d4b597fb5ba9e4fa25d62f105c5 upstream. When gcc 4.6 on x86 is used, the function tracer will use the new option -mfentry which does a call to "fentry" at every function instead of "mcount". The significance of this is that fentry is called as the first operation of the function instead of the mcount usage of being called after the stack. This causes the stack tracer to show some bogus results for the size of the last function traced, as well as showing "ftrace_call" instead of the function. This is due to the stack frame not being set up by the function that is about to be traced. # cat stack_trace Depth Size Location (48 entries) ----- ---- -------- 0) 4824 216 ftrace_call+0x5/0x2f 1) 4608 112 ____cache_alloc+0xb7/0x22d 2) 4496 80 kmem_cache_alloc+0x63/0x12f The 216 size for ftrace_call includes both the ftrace_call stack (which includes the saving of registers it does), as well as the stack size of the parent. To fix this, if CC_USING_FENTRY is defined, then the stack_tracer will reserve the first item in stack_dump_trace[] array when calling save_stack_trace(), and it will fill it in with the parent ip. Then the code will look for the parent pointer on the stack and give the real size of the parent's stack pointer: # cat stack_trace Depth Size Location (14 entries) ----- ---- -------- 0) 2640 48 update_group_power+0x26/0x187 1) 2592 224 update_sd_lb_stats+0x2a5/0x4ac 2) 2368 160 find_busiest_group+0x31/0x1f1 3) 2208 256 load_balance+0xd9/0x662 I'm Cc'ing stable, although it's not urgent, as it only shows bogus size for item #0, the rest of the trace is legit. It should still be corrected in previous stable releases. Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_stack.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index d21f844..f8bf3df 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -17,13 +17,27 @@ #define STACK_TRACE_ENTRIES 500 +/* + * If fentry is used, then the function being traced will + * jump to fentry directly before it sets up its stack frame. + * We need to ignore that one and record the parent. Since + * the stack frame for the traced function wasn't set up yet, + * the stack_trace wont see the parent. That needs to be added + * manually to stack_dump_trace[] as the first element. + */ +#ifdef CC_USING_FENTRY +# define add_func 1 +#else +# define add_func 0 +#endif + static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; static struct stack_trace max_stack_trace = { - .max_entries = STACK_TRACE_ENTRIES, - .entries = stack_dump_trace, + .max_entries = STACK_TRACE_ENTRIES - add_func, + .entries = &stack_dump_trace[add_func], }; static unsigned long max_stack_size; @@ -38,7 +52,7 @@ int stack_tracer_enabled; static int last_stack_tracer_enabled; static inline void -check_stack(unsigned long *stack) +check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; @@ -69,6 +83,17 @@ check_stack(unsigned long *stack) save_stack_trace(&max_stack_trace); /* + * When fentry is used, the traced function does not get + * its stack frame set up, and we lose the parent. + * Add that one in manally. We set up save_stack_trace() + * to not touch the first element in this case. + */ + if (add_func) { + stack_dump_trace[0] = ip; + max_stack_trace.nr_entries++; + } + + /* * Now find where in the stack these are. */ i = 0; @@ -124,7 +149,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) if (per_cpu(trace_active, cpu)++ != 0) goto out; - check_stack(&stack); + check_stack(parent_ip, &stack); out: per_cpu(trace_active, cpu)--; -- cgit v1.1 From 13f475a567de775169fd7e69a5d84fc41c168c3e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 13 Mar 2013 23:34:22 -0400 Subject: tracing: Remove most or all of stack tracer stack size from stack_max_size commit 4df297129f622bdc18935c856f42b9ddd18f9f28 upstream. Currently, the depth reported in the stack tracer stack_trace file does not match the stack_max_size file. This is because the stack_max_size includes the overhead of stack tracer itself while the depth does not. The first time a max is triggered, a calculation is not performed that figures out the overhead of the stack tracer and subtracts it from the stack_max_size variable. The overhead is stored and is subtracted from the reported stack size for comparing for a new max. Now the stack_max_size corresponds to the reported depth: # cat stack_max_size 4640 # cat stack_trace Depth Size Location (48 entries) ----- ---- -------- 0) 4640 32 _raw_spin_lock+0x18/0x24 1) 4608 112 ____cache_alloc+0xb7/0x22d 2) 4496 80 kmem_cache_alloc+0x63/0x12f 3) 4416 16 mempool_alloc_slab+0x15/0x17 [...] While testing against and older gcc on x86 that uses mcount instead of fentry, I found that pasing in ip + MCOUNT_INSN_SIZE let the stack trace show one more function deep which was missing before. Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_stack.c | 75 +++++++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index f8bf3df..ff6473d 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -17,27 +17,24 @@ #define STACK_TRACE_ENTRIES 500 -/* - * If fentry is used, then the function being traced will - * jump to fentry directly before it sets up its stack frame. - * We need to ignore that one and record the parent. Since - * the stack frame for the traced function wasn't set up yet, - * the stack_trace wont see the parent. That needs to be added - * manually to stack_dump_trace[] as the first element. - */ #ifdef CC_USING_FENTRY -# define add_func 1 +# define fentry 1 #else -# define add_func 0 +# define fentry 0 #endif static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; +/* + * Reserve one entry for the passed in ip. This will allow + * us to remove most or all of the stack size overhead + * added by the stack tracer itself. + */ static struct stack_trace max_stack_trace = { - .max_entries = STACK_TRACE_ENTRIES - add_func, - .entries = &stack_dump_trace[add_func], + .max_entries = STACK_TRACE_ENTRIES - 1, + .entries = &stack_dump_trace[1], }; static unsigned long max_stack_size; @@ -56,10 +53,14 @@ check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; + static int tracer_frame; + int frame_size = ACCESS_ONCE(tracer_frame); int i; this_size = ((unsigned long)stack) & (THREAD_SIZE-1); this_size = THREAD_SIZE - this_size; + /* Remove the frame of the tracer */ + this_size -= frame_size; if (this_size <= max_stack_size) return; @@ -71,6 +72,10 @@ check_stack(unsigned long ip, unsigned long *stack) local_irq_save(flags); arch_spin_lock(&max_stack_lock); + /* In case another CPU set the tracer_frame on us */ + if (unlikely(!frame_size)) + this_size -= tracer_frame; + /* a race could have already updated it */ if (this_size <= max_stack_size) goto out; @@ -83,15 +88,12 @@ check_stack(unsigned long ip, unsigned long *stack) save_stack_trace(&max_stack_trace); /* - * When fentry is used, the traced function does not get - * its stack frame set up, and we lose the parent. - * Add that one in manally. We set up save_stack_trace() - * to not touch the first element in this case. + * Add the passed in ip from the function tracer. + * Searching for this on the stack will skip over + * most of the overhead from the stack tracer itself. */ - if (add_func) { - stack_dump_trace[0] = ip; - max_stack_trace.nr_entries++; - } + stack_dump_trace[0] = ip; + max_stack_trace.nr_entries++; /* * Now find where in the stack these are. @@ -121,6 +123,18 @@ check_stack(unsigned long ip, unsigned long *stack) found = 1; /* Start the search from here */ start = p + 1; + /* + * We do not want to show the overhead + * of the stack tracer stack in the + * max stack. If we haven't figured + * out what that is, then figure it out + * now. + */ + if (unlikely(!tracer_frame) && i == 1) { + tracer_frame = (p - stack) * + sizeof(unsigned long); + max_stack_size -= tracer_frame; + } } } @@ -149,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) if (per_cpu(trace_active, cpu)++ != 0) goto out; - check_stack(parent_ip, &stack); + /* + * When fentry is used, the traced function does not get + * its stack frame set up, and we lose the parent. + * The ip is pretty useless because the function tracer + * was called before that function set up its stack frame. + * In this case, we use the parent ip. + * + * By adding the return address of either the parent ip + * or the current ip we can disregard most of the stack usage + * caused by the stack tracer itself. + * + * The function tracer always reports the address of where the + * mcount call was, but the stack will hold the return address. + */ + if (fentry) + ip = parent_ip; + else + ip += MCOUNT_INSN_SIZE; + + check_stack(ip, &stack); out: per_cpu(trace_active, cpu)--; -- cgit v1.1 From 61857764da0a6fa75f3407e06fbaf05f7cac3d84 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Apr 2013 21:46:24 +0900 Subject: tracing: Fix off-by-one on allocating stat->pages commit 39e30cd1537937d3c00ef87e865324e981434e5b upstream. The first page was allocated separately, so no need to start from 0. Link: http://lkml.kernel.org/r/1364820385-32027-2-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Namhyung Kim Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5312d96..abf4d3d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -548,7 +548,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); - for (i = 0; i < pages; i++) { + for (i = 1; i < pages; i++) { pg->next = (void *)get_zeroed_page(GFP_KERNEL); if (!pg->next) goto out_free; -- cgit v1.1 From e81e6f4a69ddf39b89e29a4191a23372d4b1007a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 Apr 2013 09:18:12 +0900 Subject: tracing: Check return value of tracing_init_dentry() commit ed6f1c996bfe4b6e520cf7a74b51cd6988d84420 upstream. Check return value and bail out if it's NULL. Link: http://lkml.kernel.org/r/1365553093-10180-2-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Namhyung Kim Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ kernel/trace/trace_stack.c | 2 ++ kernel/trace/trace_stat.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b3ae845..d197611 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4360,6 +4360,8 @@ static __init int tracer_init_debugfs(void) trace_access_lock_init(); d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; trace_create_file("tracing_enabled", 0644, d_tracer, &global_trace, &tracing_ctrl_fops); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index ff6473d..2d43977 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -420,6 +420,8 @@ static __init int stack_trace_init(void) struct dentry *d_tracer; d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; trace_create_file("stack_max_size", 0644, d_tracer, &max_stack_size, &stack_max_size_fops); diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 96cffb2..847f88a 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -307,6 +307,8 @@ static int tracing_stat_init(void) struct dentry *d_tracing; d_tracing = tracing_init_dentry(); + if (!d_tracing) + return 0; stat_dir = debugfs_create_dir("trace_stat", d_tracing); if (!stat_dir) -- cgit v1.1 From 08181f491cd016e610d072dd42e8d0e7bda4a789 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 11 Apr 2013 16:01:38 +0900 Subject: tracing: Reset ftrace_graph_filter_enabled if count is zero commit 9f50afccfdc15d95d7331acddcb0f7703df089ae upstream. The ftrace_graph_count can be decreased with a "!" pattern, so that the enabled flag should be updated too. Link: http://lkml.kernel.org/r/1365663698-2413-1-git-send-email-namhyung@kernel.org Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index abf4d3d..8e4361f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3287,7 +3287,8 @@ out: if (fail) return -EINVAL; - ftrace_graph_filter_enabled = 1; + ftrace_graph_filter_enabled = !!(*idx); + return 0; } -- cgit v1.1 From b0f97a448749144ed26634ed47323ce2217a7a4c Mon Sep 17 00:00:00 2001 From: David Engraf Date: Tue, 19 Mar 2013 13:29:55 +0100 Subject: hrtimer: Fix ktime_add_ns() overflow on 32bit architectures commit 51fd36f3fad8447c487137ae26b9d0b3ce77bb25 upstream. One can trigger an overflow when using ktime_add_ns() on a 32bit architecture not supporting CONFIG_KTIME_SCALAR. When passing a very high value for u64 nsec, e.g. 7881299347898368000 the do_div() function converts this value to seconds (7881299347) which is still to high to pass to the ktime_set() function as long. The result in is a negative value. The problem on my system occurs in the tick-sched.c, tick_nohz_stop_sched_tick() when time_delta is set to timekeeping_max_deferment(). The check for time_delta < KTIME_MAX is valid, thus ktime_add_ns() is called with a too large value resulting in a negative expire value. This leads to an endless loop in the ticker code: time_delta: 7881299347898368000 expires = ktime_add_ns(last_update, time_delta) expires: negative value This fix caps the value to KTIME_MAX. This error doesn't occurs on 64bit or architectures supporting CONFIG_KTIME_SCALAR (e.g. ARM, x86-32). Signed-off-by: David Engraf [jstultz: Minor tweaks to commit message & header] Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- kernel/hrtimer.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3c5a22f..d8e856e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -298,6 +298,10 @@ ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec) } else { unsigned long rem = do_div(nsec, NSEC_PER_SEC); + /* Make sure nsec fits into long */ + if (unlikely(nsec > KTIME_SEC_MAX)) + return (ktime_t){ .tv64 = KTIME_MAX }; + tmp = ktime_set((long)nsec, rem); } -- cgit v1.1 From a0f25ff9b9e74174def19cdad1f1d2e7f4894683 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 8 Apr 2013 08:47:15 -0400 Subject: hrtimer: Add expiry time overflow check in hrtimer_interrupt commit 8f294b5a139ee4b75e890ad5b443c93d1e558a8b upstream. The settimeofday01 test in the LTP testsuite effectively does gettimeofday(current time); settimeofday(Jan 1, 1970 + 100 seconds); settimeofday(current time); This test causes a stack trace to be displayed on the console during the setting of timeofday to Jan 1, 1970 + 100 seconds: [ 131.066751] ------------[ cut here ]------------ [ 131.096448] WARNING: at kernel/time/clockevents.c:209 clockevents_program_event+0x135/0x140() [ 131.104935] Hardware name: Dinar [ 131.108150] Modules linked in: sg nfsv3 nfs_acl nfsv4 auth_rpcgss nfs dns_resolver fscache lockd sunrpc nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables kvm_amd kvm sp5100_tco bnx2 i2c_piix4 crc32c_intel k10temp fam15h_power ghash_clmulni_intel amd64_edac_mod pcspkr serio_raw edac_mce_amd edac_core microcode xfs libcrc32c sr_mod sd_mod cdrom ata_generic crc_t10dif pata_acpi radeon i2c_algo_bit drm_kms_helper ttm drm ahci pata_atiixp libahci libata usb_storage i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 131.176784] Pid: 0, comm: swapper/28 Not tainted 3.8.0+ #6 [ 131.182248] Call Trace: [ 131.184684] [] warn_slowpath_common+0x7f/0xc0 [ 131.191312] [] warn_slowpath_null+0x1a/0x20 [ 131.197131] [] clockevents_program_event+0x135/0x140 [ 131.203721] [] tick_program_event+0x24/0x30 [ 131.209534] [] hrtimer_interrupt+0x131/0x230 [ 131.215437] [] ? cpufreq_p4_target+0x130/0x130 [ 131.221509] [] smp_apic_timer_interrupt+0x69/0x99 [ 131.227839] [] apic_timer_interrupt+0x6d/0x80 [ 131.233816] [] ? sched_clock_cpu+0xc5/0x120 [ 131.240267] [] ? cpuidle_wrap_enter+0x50/0xa0 [ 131.246252] [] ? cpuidle_wrap_enter+0x49/0xa0 [ 131.252238] [] cpuidle_enter_tk+0x10/0x20 [ 131.257877] [] cpuidle_idle_call+0xa9/0x260 [ 131.263692] [] cpu_idle+0xaf/0x120 [ 131.268727] [] start_secondary+0x255/0x257 [ 131.274449] ---[ end trace 1151a50552231615 ]--- When we change the system time to a low value like this, the value of timekeeper->offs_real will be a negative value. It seems that the WARN occurs because an hrtimer has been started in the time between the releasing of the timekeeper lock and the IPI call (via a call to on_each_cpu) in clock_was_set() in the do_settimeofday() code. The end result is that a REALTIME_CLOCK timer has been added with softexpires = expires = KTIME_MAX. The hrtimer_interrupt() fires/is called and the loop at kernel/hrtimer.c:1289 is executed. In this loop the code subtracts the clock base's offset (which was set to timekeeper->offs_real in do_settimeofday()) from the current hrtimer_cpu_base->expiry value (which was KTIME_MAX): KTIME_MAX - (a negative value) = overflow A simple check for an overflow can resolve this problem. Using KTIME_MAX instead of the overflow value will result in the hrtimer function being run, and the reprogramming of the timer after that. Reviewed-by: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Prarit Bhargava [jstultz: Tweaked commit subject] Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- kernel/hrtimer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d8e856e..ca3bd3c 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1312,6 +1312,8 @@ retry: expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + if (expires.tv64 < 0) + expires.tv64 = KTIME_MAX; if (expires.tv64 < expires_next.tv64) expires_next = expires; break; -- cgit v1.1 From ed0a169166af3fc21e3b8ee9f3020298a93f9bd7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 12 Mar 2013 15:36:00 -0700 Subject: cgroup: fix an off-by-one bug which may trigger BUG_ON() commit 3ac1707a13a3da9cfc8f242a15b2fae6df2c5f88 upstream. The 3rd parameter of flex_array_prealloc() is the number of elements, not the index of the last element. The effect of the bug is, when opening cgroup.procs, a flex array will be allocated and all elements of the array is allocated with GFP_KERNEL flag, but the last one is GFP_ATOMIC, and if we fail to allocate memory for it, it'll trigger a BUG_ON(). Signed-off-by: Li Zefan Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b964f9e..5d40afe 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2026,7 +2026,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) if (!group) return -ENOMEM; /* pre-allocate to guarantee space while iterating in rcu read-side. */ - retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL); + retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL); if (retval) goto out_free_group_list; -- cgit v1.1 From a35089a9cc44f621e58af899b3483d206bb89284 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 11:45:53 +0200 Subject: clockevents: Set dummy handler on CPU_DEAD shutdown commit 6f7a05d7018de222e40ca003721037a530979974 upstream. Vitaliy reported that a per cpu HPET timer interrupt crashes the system during hibernation. What happens is that the per cpu HPET timer gets shut down when the nonboot cpus are stopped. When the nonboot cpus are onlined again the HPET code sets up the MSI interrupt which fires before the clock event device is registered. The event handler is still set to hrtimer_interrupt, which then crashes the machine due to highres mode not being active. See http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=700333 There is no real good way to avoid that in the HPET code. The HPET code alrady has a mechanism to detect spurious interrupts when event handler == NULL for a similar reason. We can handle that in the clockevent/tick layer and replace the previous functional handler with a dummy handler like we do in tick_setup_new_device(). The original clockevents code did this in clockevents_exchange_device(), but that got removed by commit 7c1e76897 (clockevents: prevent clockevent event_handler ending up handler_noop) which forgot to fix it up in tick_shutdown(). Same issue with the broadcast device. Reported-by: Vitaliy Fillipov Cc: Ben Hutchings Cc: 700333@bugs.debian.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 4 ++++ kernel/time/tick-common.c | 1 + 2 files changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 9e40370..e9df75d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -66,6 +66,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) */ int tick_check_broadcast_device(struct clock_event_device *dev) { + struct clock_event_device *cur = tick_broadcast_device.evtdev; + if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || (tick_broadcast_device.evtdev && tick_broadcast_device.evtdev->rating >= dev->rating) || @@ -73,6 +75,8 @@ int tick_check_broadcast_device(struct clock_event_device *dev) return 0; clockevents_exchange_device(tick_broadcast_device.evtdev, dev); + if (cur) + cur->event_handler = clockevents_handle_noop; tick_broadcast_device.evtdev = dev; if (!cpumask_empty(tick_get_broadcast_mask())) tick_broadcast_start_periodic(dev); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 119528d..c43b479 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -323,6 +323,7 @@ static void tick_shutdown(unsigned int *cpup) */ dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); + dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; } raw_spin_unlock_irqrestore(&tick_device_lock, flags); -- cgit v1.1