diff options
author | Ziyann <jaraidaniel@gmail.com> | 2014-11-26 11:37:03 +0100 |
---|---|---|
committer | Ziyann <jaraidaniel@gmail.com> | 2014-11-26 11:37:03 +0100 |
commit | e90611725bc4dcc8946e74c38bb2c58fdf6bf98f (patch) | |
tree | a6a2441a53406a47f3a4b4a5103c0a067aaab445 /kernel | |
parent | da82942bbd6a40cf7ddd3edc52352610905137ce (diff) | |
parent | 5dba9ddd98cbc7ad319d687887981a0ea0062c75 (diff) | |
download | kernel_samsung_tuna-e90611725bc4dcc8946e74c38bb2c58fdf6bf98f.zip kernel_samsung_tuna-e90611725bc4dcc8946e74c38bb2c58fdf6bf98f.tar.gz kernel_samsung_tuna-e90611725bc4dcc8946e74c38bb2c58fdf6bf98f.tar.bz2 |
Merge remote-tracking branch 'linux/linux-3.0.y' into stable-newpvr
Conflicts:
arch/arm/include/asm/hardware/cache-l2x0.h
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit_tree.c | 2 | ||||
-rw-r--r-- | kernel/cgroup.c | 13 | ||||
-rw-r--r-- | kernel/events/core.c | 79 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 4 | ||||
-rw-r--r-- | kernel/futex.c | 3 | ||||
-rw-r--r-- | kernel/hrtimer.c | 37 | ||||
-rw-r--r-- | kernel/irq/manage.c | 6 | ||||
-rw-r--r-- | kernel/kmod.c | 5 | ||||
-rw-r--r-- | kernel/relay.c | 5 | ||||
-rw-r--r-- | kernel/sched.c | 6 | ||||
-rw-r--r-- | kernel/sched_clock.c | 26 | ||||
-rw-r--r-- | kernel/sched_fair.c | 2 | ||||
-rw-r--r-- | kernel/signal.c | 2 | ||||
-rw-r--r-- | kernel/sys.c | 3 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 14 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 | ||||
-rw-r--r-- | kernel/timer.c | 10 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 48 | ||||
-rw-r--r-- | kernel/trace/trace.c | 70 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 76 | ||||
-rw-r--r-- | kernel/trace/trace_stat.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 21 |
24 files changed, 317 insertions, 129 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f6b4ac7..7d9731d 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -614,9 +614,9 @@ void audit_trim_trees(void) } spin_unlock(&hash_lock); trim_marked(tree); - put_tree(tree); drop_collected_mounts(root_mnt); skip_it: + put_tree(tree); mutex_lock(&audit_filter_mutex); } list_del(&cursor); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d0913a6..2005981 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2036,7 +2036,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) if (!group) return -ENOMEM; /* pre-allocate to guarantee space while iterating in rcu read-side. */ - retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL); + retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL); if (retval) goto out_free_group_list; @@ -3533,6 +3533,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { struct cgroup_event *event = NULL; + struct cgroup *cgrp_cfile; unsigned int efd, cfd; struct file *efile = NULL; struct file *cfile = NULL; @@ -3587,6 +3588,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, goto fail; } + /* + * The file to be monitored must be in the same cgroup as + * cgroup.event_control is. + */ + cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent); + if (cgrp_cfile != cgrp) { + ret = -EINVAL; + goto fail; + } + if (!event->cft->register_event || !event->cft->unregister_event) { ret = -EINVAL; goto fail; diff --git a/kernel/events/core.c b/kernel/events/core.c index b582650..acdc087 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -223,9 +223,9 @@ perf_cgroup_match(struct perf_event *event) return !event->cgrp || event->cgrp == cpuctx->cgrp; } -static inline void perf_get_cgroup(struct perf_event *event) +static inline bool perf_tryget_cgroup(struct perf_event *event) { - css_get(&event->cgrp->css); + return css_tryget(&event->cgrp->css); } static inline void perf_put_cgroup(struct perf_event *event) @@ -342,6 +342,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + if (cpuctx->unique_pmu != pmu) + continue; /* ensure we process each cpuctx once */ perf_pmu_disable(cpuctx->ctx.pmu); @@ -365,9 +367,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode) if (mode & PERF_CGROUP_SWIN) { WARN_ON_ONCE(cpuctx->cgrp); - /* set cgrp before ctxsw in to - * allow event_filter_match() to not - * have to pass task around + /* + * set cgrp before ctxsw in to allow + * event_filter_match() to not have to pass + * task around */ cpuctx->cgrp = perf_cgroup_from_task(task); cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); @@ -415,7 +418,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, event->cgrp = cgrp; /* must be done before we fput() the file */ - perf_get_cgroup(event); + if (!perf_tryget_cgroup(event)) { + event->cgrp = NULL; + ret = -ENOENT; + goto out; + } /* * all events in a group must monitor @@ -651,8 +658,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; - rcu_read_lock(); retry: + /* + * One of the few rules of preemptible RCU is that one cannot do + * rcu_read_unlock() while holding a scheduler (or nested) lock when + * part of the read side critical section was preemptible -- see + * rcu_read_unlock_special(). + * + * Since ctx->lock nests under rq->lock we must ensure the entire read + * side critical section is non-preemptible. + */ + preempt_disable(); + rcu_read_lock(); ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* @@ -668,6 +685,8 @@ retry: raw_spin_lock_irqsave(&ctx->lock, *flags); if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); + rcu_read_unlock(); + preempt_enable(); goto retry; } @@ -677,6 +696,7 @@ retry: } } rcu_read_unlock(); + preempt_enable(); return ctx; } @@ -826,6 +846,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) } /* + * Initialize event state based on the perf_event_attr::disabled. + */ +static inline void perf_event__state_init(struct perf_event *event) +{ + event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF : + PERF_EVENT_STATE_INACTIVE; +} + +/* * Called at perf_event creation and when events are attached/detached from a * group. */ @@ -1616,7 +1645,16 @@ static int __perf_event_enable(void *info) struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; - if (WARN_ON_ONCE(!ctx->is_active)) + /* + * There's a time window between 'ctx->is_active' check + * in perf_event_enable function and this place having: + * - IRQs on + * - ctx->lock unlocked + * + * where the task could be killed and 'ctx' deactivated + * by perf_event_exit_task. + */ + if (!ctx->is_active) return -EINVAL; raw_spin_lock(&ctx->lock); @@ -4544,7 +4582,7 @@ static void perf_event_task_event(struct perf_task_event *task_event) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); - if (cpuctx->active_pmu != pmu) + if (cpuctx->unique_pmu != pmu) goto next; perf_event_task_ctx(&cpuctx->ctx, task_event); @@ -4690,7 +4728,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); - if (cpuctx->active_pmu != pmu) + if (cpuctx->unique_pmu != pmu) goto next; perf_event_comm_ctx(&cpuctx->ctx, comm_event); @@ -4886,7 +4924,7 @@ got_name: rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); - if (cpuctx->active_pmu != pmu) + if (cpuctx->unique_pmu != pmu) goto next; perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); @@ -5912,8 +5950,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); - if (cpuctx->active_pmu == old_pmu) - cpuctx->active_pmu = pmu; + if (cpuctx->unique_pmu == old_pmu) + cpuctx->unique_pmu = pmu; } } @@ -6045,7 +6083,7 @@ skip_type: cpuctx->ctx.pmu = pmu; cpuctx->jiffies_interval = 1; INIT_LIST_HEAD(&cpuctx->rotation_list); - cpuctx->active_pmu = pmu; + cpuctx->unique_pmu = pmu; } got_cpu_context: @@ -6219,8 +6257,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, event->overflow_handler = overflow_handler; - if (attr->disabled) - event->state = PERF_EVENT_STATE_OFF; + perf_event__state_init(event); pmu = NULL; @@ -6594,9 +6631,17 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&gctx->mutex); perf_remove_from_context(group_leader); + + /* + * Removing from the context ends up with disabled + * event. What we want here is event in the initial + * startup state, ready to be add into new context. + */ + perf_event__state_init(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { perf_remove_from_context(sibling); + perf_event__state_init(sibling); put_ctx(gctx); } mutex_unlock(&gctx->mutex); @@ -7064,7 +7109,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * child. */ - child_ctx = alloc_perf_context(event->pmu, child); + child_ctx = alloc_perf_context(parent_ctx->pmu, child); if (!child_ctx) return -ENOMEM; diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 086adf2..d99cb4b 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -147,7 +147,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, return; } - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { unsigned int nr; nr = per_cpu(nr_cpu_bp_pinned[type], cpu); @@ -233,7 +233,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, if (cpu >= 0) { toggle_bp_task_slot(bp, cpu, enable, type, weight); } else { - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) toggle_bp_task_slot(bp, cpu, enable, type, weight); } diff --git a/kernel/futex.c b/kernel/futex.c index 61e554e..d38153d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -60,6 +60,7 @@ #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/ptrace.h> +#include <linux/hugetlb.h> #include <asm/futex.h> @@ -363,7 +364,7 @@ again: } else { key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.inode = page_head->mapping->host; - key->shared.pgoff = page_head->index; + key->shared.pgoff = basepage_index(page); } get_futex_key_refs(key); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e079c3e..80ec91d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -61,6 +61,7 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { + .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), .clock_base = { { @@ -297,6 +298,10 @@ ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec) } else { unsigned long rem = do_div(nsec, NSEC_PER_SEC); + /* Make sure nsec fits into long */ + if (unlikely(nsec > KTIME_SEC_MAX)) + return (ktime_t){ .tv64 = KTIME_MAX }; + tmp = ktime_set((long)nsec, rem); } @@ -702,17 +707,20 @@ static int hrtimer_switch_to_hres(void) return 1; } +static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + /* - * Called from timekeeping code to reprogramm the hrtimer interrupt - * device. If called from the timer interrupt context we defer it to - * softirq context. + * Called from timekeeping and resume code to reprogramm the hrtimer + * interrupt device on all cpus. */ void clock_was_set_delayed(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - cpu_base->clock_was_set = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + schedule_work(&hrtimer_work); } #else @@ -761,8 +769,10 @@ void hrtimers_resume(void) WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); + /* Retrigger on the local CPU */ retrigger_next_event(NULL); - timerfd_clock_was_set(); + /* And schedule a retrigger for all others */ + clock_was_set_delayed(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1307,6 +1317,8 @@ retry: expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + if (expires.tv64 < 0) + expires.tv64 = KTIME_MAX; if (expires.tv64 < expires_next.tv64) expires_next = expires; break; @@ -1409,13 +1421,6 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - if (cpu_base->clock_was_set) { - cpu_base->clock_was_set = 0; - clock_was_set(); - } - hrtimer_peek_ahead_timers(); } @@ -1640,8 +1645,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - raw_spin_lock_init(&cpu_base->lock); - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { cpu_base->clock_base[i].cpu_base = cpu_base; timerqueue_init_head(&cpu_base->clock_base[i].active); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3e1bdf9..2f61278 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -536,9 +536,9 @@ int can_request_irq(unsigned int irq, unsigned long irqflags) return 0; if (irq_settings_can_request(desc)) { - if (desc->action) - if (irqflags & desc->action->flags & IRQF_SHARED) - canrequest =1; + if (!desc->action || + irqflags & desc->action->flags & IRQF_SHARED) + canrequest = 1; } irq_put_desc_unlock(desc, flags); return canrequest; diff --git a/kernel/kmod.c b/kernel/kmod.c index fabfe54..f625b4f 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -421,6 +421,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int retval = 0; helper_lock(); + if (!sub_info->path) { + retval = -EINVAL; + goto out; + } + if (sub_info->path[0] == '\0') goto out; diff --git a/kernel/relay.c b/kernel/relay.c index 2c242fb..a5be9af 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in, struct splice_pipe_desc spd = { .pages = pages, .nr_pages = 0, + .nr_pages_max = PIPE_DEF_BUFFERS, .partial = partial, .flags = flags, .ops = &relay_pipe_buf_ops, @@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in, ret += padding; out: - splice_shrink_spd(pipe, &spd); - return ret; + splice_shrink_spd(&spd); + return ret; } static ssize_t relay_file_splice_read(struct file *in, diff --git a/kernel/sched.c b/kernel/sched.c index 93d8b46..efb62f0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2774,8 +2774,10 @@ static void try_to_wake_up_local(struct task_struct *p) { struct rq *rq = task_rq(p); - BUG_ON(rq != this_rq()); - BUG_ON(p == current); + if (WARN_ON_ONCE(rq != this_rq()) || + WARN_ON_ONCE(p == current)) + return; + lockdep_assert_held(&rq->lock); if (!raw_spin_trylock(&p->pi_lock)) { diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9d8af0b..1eeaf74 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd) u64 this_clock, remote_clock; u64 *ptr, old_val, val; +#if BITS_PER_LONG != 64 +again: + /* + * Careful here: The local and the remote clock values need to + * be read out atomic as we need to compare the values and + * then update either the local or the remote side. So the + * cmpxchg64 below only protects one readout. + * + * We must reread via sched_clock_local() in the retry case on + * 32bit as an NMI could use sched_clock_local() via the + * tracer and hit between the readout of + * the low32bit and the high 32bit portion. + */ + this_clock = sched_clock_local(my_scd); + /* + * We must enforce atomic readout on 32bit, otherwise the + * update on the remote cpu can hit inbetween the readout of + * the low32bit and the high 32bit portion. + */ + remote_clock = cmpxchg64(&scd->clock, 0, 0); +#else + /* + * On 64bit the read of [my]scd->clock is atomic versus the + * update, so we can avoid the above 32bit dance. + */ sched_clock_local(my_scd); again: this_clock = my_scd->clock; remote_clock = scd->clock; +#endif /* * Use the opportunity that we have both locks diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c768588..fae7d67 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -4277,7 +4277,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task * idle runqueue: */ if (rq->cfs.load.weight) - rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); + rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se)); return rr_interval; } diff --git a/kernel/signal.c b/kernel/signal.c index b0c0887..f15021b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2664,7 +2664,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) static int do_tkill(pid_t tgid, pid_t pid, int sig) { - struct siginfo info; + struct siginfo info = {}; info.si_signo = sig; info.si_errno = 0; diff --git a/kernel/sys.c b/kernel/sys.c index 84e353b..1c69aa7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -320,7 +320,6 @@ void kernel_restart_prepare(char *cmd) system_state = SYSTEM_RESTART; usermodehelper_disable(); device_shutdown(); - syscore_shutdown(); } /** @@ -335,6 +334,7 @@ void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); disable_nonboot_cpus(); + syscore_shutdown(); if (!cmd) printk(KERN_EMERG "Restarting system.\n"); else @@ -360,6 +360,7 @@ static void kernel_shutdown_prepare(enum system_states state) void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); + disable_nonboot_cpus(); syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 9e40370..20ba7b4 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -66,6 +66,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) */ int tick_check_broadcast_device(struct clock_event_device *dev) { + struct clock_event_device *cur = tick_broadcast_device.evtdev; + if ((dev->features & CLOCK_EVT_FEAT_DUMMY) || (tick_broadcast_device.evtdev && tick_broadcast_device.evtdev->rating >= dev->rating) || @@ -73,6 +75,8 @@ int tick_check_broadcast_device(struct clock_event_device *dev) return 0; clockevents_exchange_device(tick_broadcast_device.evtdev, dev); + if (cur) + cur->event_handler = clockevents_handle_noop; tick_broadcast_device.evtdev = dev; if (!cpumask_empty(tick_get_broadcast_mask())) tick_broadcast_start_periodic(dev); @@ -392,7 +396,15 @@ void tick_check_oneshot_broadcast(int cpu) if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) { struct tick_device *td = &per_cpu(tick_cpu_device, cpu); - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); + /* + * We might be in the middle of switching over from + * periodic to oneshot. If the CPU has not yet + * switched over, leave the device alone. + */ + if (td->mode == TICKDEV_MODE_ONESHOT) { + clockevents_set_mode(td->evtdev, + CLOCK_EVT_MODE_ONESHOT); + } } } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 119528d..c43b479 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -323,6 +323,7 @@ static void tick_shutdown(unsigned int *cpup) */ dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); + dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; } raw_spin_unlock_irqrestore(&tick_device_lock, flags); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c473ce2..c0be5f2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -811,7 +811,7 @@ void tick_cancel_sched_timer(int cpu) hrtimer_cancel(&ts->sched_timer); # endif - ts->nohz_mode = NOHZ_MODE_INACTIVE; + memset(ts, 0, sizeof(*ts)); } #endif diff --git a/kernel/timer.c b/kernel/timer.c index 27982d9..5eac0d8 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -145,9 +145,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu, /* now that we have rounded, subtract the extra skew again */ j -= cpu * 3; - if (j <= jiffies) /* rounding ate our timeout entirely; */ - return original; - return j; + /* + * Make sure j is still in the future. Otherwise return the + * unmodified value. + */ + return time_is_after_jiffies(j) ? j : original; } /** @@ -1630,12 +1632,12 @@ static int __cpuinit init_timers_cpu(int cpu) boot_done = 1; base = &boot_tvec_bases; } + spin_lock_init(&base->lock); tvec_base_done[cpu] = 1; } else { base = per_cpu(tvec_bases, cpu); } - spin_lock_init(&base->lock); for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b2ca34a..0d704b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -548,7 +548,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); - for (i = 0; i < pages; i++) { + for (i = 1; i < pages; i++) { pg->next = (void *)get_zeroed_page(GFP_KERNEL); if (!pg->next) goto out_free; @@ -566,7 +566,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) free_page(tmp); } - free_page((unsigned long)stat->pages); stat->pages = NULL; stat->start = NULL; @@ -934,6 +933,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) static struct pid * const ftrace_swapper_pid = &init_struct_pid; +static loff_t +ftrace_filter_lseek(struct file *file, loff_t offset, int whence) +{ + loff_t ret; + + if (file->f_mode & FMODE_READ) + ret = seq_lseek(file, offset, whence); + else + file->f_pos = ret = 1; + + return ret; +} + #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD @@ -2300,19 +2312,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file) inode, file); } -static loff_t -ftrace_regex_lseek(struct file *file, loff_t offset, int origin) -{ - loff_t ret; - - if (file->f_mode & FMODE_READ) - ret = seq_lseek(file, offset, origin); - else - file->f_pos = ret = 1; - - return ret; -} - static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; @@ -3119,7 +3118,7 @@ static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; @@ -3127,7 +3126,7 @@ static const struct file_operations ftrace_notrace_fops = { .open = ftrace_notrace_open, .read = seq_read, .write = ftrace_notrace_write, - .llseek = ftrace_regex_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_regex_release, }; @@ -3288,7 +3287,8 @@ out: if (fail) return -EINVAL; - ftrace_graph_filter_enabled = 1; + ftrace_graph_filter_enabled = !!(*idx); + return 0; } @@ -3335,8 +3335,8 @@ static const struct file_operations ftrace_graph_fops = { .open = ftrace_graph_open, .read = seq_read, .write = ftrace_graph_write, + .llseek = ftrace_filter_lseek, .release = ftrace_graph_release, - .llseek = seq_lseek, }; #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -3822,7 +3822,7 @@ static const struct file_operations ftrace_pid_fops = { .open = ftrace_pid_open, .write = ftrace_pid_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = ftrace_filter_lseek, .release = ftrace_pid_release, }; @@ -3934,12 +3934,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ftrace_startup_sysctl(); /* we are starting ftrace again */ - if (ftrace_ops_list != &ftrace_list_end) { - if (ftrace_ops_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_ops_list->func; - else - ftrace_trace_function = ftrace_ops_list_func; - } + if (ftrace_ops_list != &ftrace_list_end) + update_ftrace_function(); } else { /* stopping ftrace calls (just send to ftrace_stub) */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b3ae845..34d15ba 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -628,7 +628,15 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); max_data->pid = tsk->pid; - max_data->uid = task_uid(tsk); + /* + * If tsk == current, then use current_uid(), as that does not use + * RCU. The irq tracer can be called out of RCU scope. + */ + if (tsk == current) + max_data->uid = current_uid(); + else + max_data->uid = task_uid(tsk); + max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; max_data->policy = tsk->policy; max_data->rt_priority = tsk->rt_priority; @@ -3237,6 +3245,7 @@ waitagain: memset(&iter->seq, 0, sizeof(struct trace_iterator) - offsetof(struct trace_iterator, seq)); + cpumask_clear(iter->started); iter->pos = -1; trace_event_read_lock(); @@ -3355,6 +3364,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .pages = pages_def, .partial = partial_def, .nr_pages = 0, /* This gets updated below. */ + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, @@ -3426,7 +3436,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, ret = splice_to_pipe(pipe, &spd); out: - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return ret; out_err: @@ -3839,6 +3849,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages_def, .partial = partial_def, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &buffer_pipe_buf_ops, .spd_release = buffer_spd_release, @@ -3927,7 +3938,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } ret = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); out: return ret; } @@ -4360,6 +4371,8 @@ static __init int tracer_init_debugfs(void) trace_access_lock_init(); d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; trace_create_file("tracing_enabled", 0644, d_tracer, &global_trace, &tracing_ctrl_fops); @@ -4487,30 +4500,32 @@ void trace_init_global_iter(struct trace_iterator *iter) iter->cpu_file = TRACE_PIPE_ALL_CPU; } -static void -__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) +void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { - static arch_spinlock_t ftrace_dump_lock = - (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; + static atomic_t dump_running; unsigned int old_userobj; - static int dump_ran; unsigned long flags; int cnt = 0, cpu; - /* only one dump */ - local_irq_save(flags); - arch_spin_lock(&ftrace_dump_lock); - if (dump_ran) - goto out; - - dump_ran = 1; + /* Only allow one dump user at a time. */ + if (atomic_inc_return(&dump_running) != 1) { + atomic_dec(&dump_running); + return; + } + /* + * Always turn off tracing when we dump. + * We don't need to show trace output of what happens + * between multiple crashes. + * + * If the user does a sysrq-z, then they can re-enable + * tracing with echo 1 > tracing_on. + */ tracing_off(); - if (disable_tracing) - ftrace_kill(); + local_irq_save(flags); trace_init_global_iter(&iter); @@ -4581,26 +4596,15 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) printk(KERN_TRACE "---------------------------------\n"); out_enable: - /* Re-enable tracing if requested */ - if (!disable_tracing) { - trace_flags |= old_userobj; + trace_flags |= old_userobj; - for_each_tracing_cpu(cpu) { - atomic_dec(&iter.tr->data[cpu]->disabled); - } - tracing_on(); + for_each_tracing_cpu(cpu) { + atomic_dec(&iter.tr->data[cpu]->disabled); } - - out: - arch_spin_unlock(&ftrace_dump_lock); + atomic_dec(&dump_running); local_irq_restore(flags); } - -/* By default: disable tracing after the dump */ -void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) -{ - __ftrace_dump(true, oops_dump_mode); -} +EXPORT_SYMBOL_GPL(ftrace_dump); __init static int tracer_alloc_buffers(void) { diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 288541f..09fd98a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -461,8 +461,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) /* Maximum number of functions to trace before diagnosing a hang */ #define GRAPH_MAX_FUNC_TEST 100000000 -static void -__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode); static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ @@ -472,8 +470,11 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { ftrace_graph_stop(); printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); - if (ftrace_dump_on_oops) - __ftrace_dump(false, DUMP_ALL); + if (ftrace_dump_on_oops) { + ftrace_dump(DUMP_ALL); + /* ftrace_dump() disables tracing */ + tracing_on(); + } return 0; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b0b53b8..2d43977 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -17,13 +17,24 @@ #define STACK_TRACE_ENTRIES 500 +#ifdef CC_USING_FENTRY +# define fentry 1 +#else +# define fentry 0 +#endif + static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; +/* + * Reserve one entry for the passed in ip. This will allow + * us to remove most or all of the stack size overhead + * added by the stack tracer itself. + */ static struct stack_trace max_stack_trace = { - .max_entries = STACK_TRACE_ENTRIES, - .entries = stack_dump_trace, + .max_entries = STACK_TRACE_ENTRIES - 1, + .entries = &stack_dump_trace[1], }; static unsigned long max_stack_size; @@ -37,25 +48,34 @@ static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static int last_stack_tracer_enabled; -static inline void check_stack(void) +static inline void +check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; + static int tracer_frame; + int frame_size = ACCESS_ONCE(tracer_frame); int i; - this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1); + this_size = ((unsigned long)stack) & (THREAD_SIZE-1); this_size = THREAD_SIZE - this_size; + /* Remove the frame of the tracer */ + this_size -= frame_size; if (this_size <= max_stack_size) return; /* we do not handle interrupt stacks yet */ - if (!object_is_on_stack(&this_size)) + if (!object_is_on_stack(stack)) return; local_irq_save(flags); arch_spin_lock(&max_stack_lock); + /* In case another CPU set the tracer_frame on us */ + if (unlikely(!frame_size)) + this_size -= tracer_frame; + /* a race could have already updated it */ if (this_size <= max_stack_size) goto out; @@ -68,10 +88,18 @@ static inline void check_stack(void) save_stack_trace(&max_stack_trace); /* + * Add the passed in ip from the function tracer. + * Searching for this on the stack will skip over + * most of the overhead from the stack tracer itself. + */ + stack_dump_trace[0] = ip; + max_stack_trace.nr_entries++; + + /* * Now find where in the stack these are. */ i = 0; - start = &this_size; + start = stack; top = (unsigned long *) (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE); @@ -95,6 +123,18 @@ static inline void check_stack(void) found = 1; /* Start the search from here */ start = p + 1; + /* + * We do not want to show the overhead + * of the stack tracer stack in the + * max stack. If we haven't figured + * out what that is, then figure it out + * now. + */ + if (unlikely(!tracer_frame) && i == 1) { + tracer_frame = (p - stack) * + sizeof(unsigned long); + max_stack_size -= tracer_frame; + } } } @@ -110,6 +150,7 @@ static inline void check_stack(void) static void stack_trace_call(unsigned long ip, unsigned long parent_ip) { + unsigned long stack; int cpu; if (unlikely(!ftrace_enabled || stack_trace_disabled)) @@ -122,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) if (per_cpu(trace_active, cpu)++ != 0) goto out; - check_stack(); + /* + * When fentry is used, the traced function does not get + * its stack frame set up, and we lose the parent. + * The ip is pretty useless because the function tracer + * was called before that function set up its stack frame. + * In this case, we use the parent ip. + * + * By adding the return address of either the parent ip + * or the current ip we can disregard most of the stack usage + * caused by the stack tracer itself. + * + * The function tracer always reports the address of where the + * mcount call was, but the stack will hold the return address. + */ + if (fentry) + ip = parent_ip; + else + ip += MCOUNT_INSN_SIZE; + + check_stack(ip, &stack); out: per_cpu(trace_active, cpu)--; @@ -360,6 +420,8 @@ static __init int stack_trace_init(void) struct dentry *d_tracer; d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; trace_create_file("stack_max_size", 0644, d_tracer, &max_stack_size, &stack_max_size_fops); diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 96cffb2..847f88a 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -307,6 +307,8 @@ static int tracing_stat_init(void) struct dentry *d_tracing; d_tracing = tracing_init_dentry(); + if (!d_tracing) + return 0; stat_dir = debugfs_create_dir("trace_stat", d_tracing); if (!stat_dir) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index ee7b5a0..5819cd5 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -304,6 +304,8 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) struct ring_buffer *buffer; int size; int syscall_nr; + unsigned long irq_flags; + int pc; syscall_nr = syscall_get_nr(current, regs); if (syscall_nr < 0) @@ -317,8 +319,11 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + local_save_flags(irq_flags); + pc = preempt_count(); + event = trace_current_buffer_lock_reserve(&buffer, - sys_data->enter_event->event.type, size, 0, 0); + sys_data->enter_event->event.type, size, irq_flags, pc); if (!event) return; @@ -328,7 +333,8 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!filter_current_check_discard(buffer, sys_data->enter_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) @@ -338,6 +344,8 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) struct ring_buffer_event *event; struct ring_buffer *buffer; int syscall_nr; + unsigned long irq_flags; + int pc; syscall_nr = syscall_get_nr(current, regs); if (syscall_nr < 0) @@ -349,8 +357,12 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; + local_save_flags(irq_flags); + pc = preempt_count(); + event = trace_current_buffer_lock_reserve(&buffer, - sys_data->exit_event->event.type, sizeof(*entry), 0, 0); + sys_data->exit_event->event.type, sizeof(*entry), + irq_flags, pc); if (!event) return; @@ -360,7 +372,8 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!filter_current_check_discard(buffer, sys_data->exit_event, entry, event)) - trace_current_buffer_unlock_commit(buffer, event, 0, 0); + trace_current_buffer_unlock_commit(buffer, event, + irq_flags, pc); } int reg_event_syscall_enter(struct ftrace_event_call *call) |