aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorZiyann <jaraidaniel@gmail.com>2014-11-26 11:37:03 +0100
committerZiyann <jaraidaniel@gmail.com>2014-11-26 11:37:03 +0100
commite90611725bc4dcc8946e74c38bb2c58fdf6bf98f (patch)
treea6a2441a53406a47f3a4b4a5103c0a067aaab445 /kernel
parentda82942bbd6a40cf7ddd3edc52352610905137ce (diff)
parent5dba9ddd98cbc7ad319d687887981a0ea0062c75 (diff)
downloadkernel_samsung_tuna-e90611725bc4dcc8946e74c38bb2c58fdf6bf98f.zip
kernel_samsung_tuna-e90611725bc4dcc8946e74c38bb2c58fdf6bf98f.tar.gz
kernel_samsung_tuna-e90611725bc4dcc8946e74c38bb2c58fdf6bf98f.tar.bz2
Merge remote-tracking branch 'linux/linux-3.0.y' into stable-newpvr
Conflicts: arch/arm/include/asm/hardware/cache-l2x0.h
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_tree.c2
-rw-r--r--kernel/cgroup.c13
-rw-r--r--kernel/events/core.c79
-rw-r--r--kernel/events/hw_breakpoint.c4
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/hrtimer.c37
-rw-r--r--kernel/irq/manage.c6
-rw-r--r--kernel/kmod.c5
-rw-r--r--kernel/relay.c5
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/sched_clock.c26
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/time/tick-broadcast.c14
-rw-r--r--kernel/time/tick-common.c1
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/timer.c10
-rw-r--r--kernel/trace/ftrace.c48
-rw-r--r--kernel/trace/trace.c70
-rw-r--r--kernel/trace/trace_selftest.c9
-rw-r--r--kernel/trace/trace_stack.c76
-rw-r--r--kernel/trace/trace_stat.c2
-rw-r--r--kernel/trace/trace_syscalls.c21
24 files changed, 317 insertions, 129 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f6b4ac7..7d9731d 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -614,9 +614,9 @@ void audit_trim_trees(void)
}
spin_unlock(&hash_lock);
trim_marked(tree);
- put_tree(tree);
drop_collected_mounts(root_mnt);
skip_it:
+ put_tree(tree);
mutex_lock(&audit_filter_mutex);
}
list_del(&cursor);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d0913a6..2005981 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2036,7 +2036,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
if (!group)
return -ENOMEM;
/* pre-allocate to guarantee space while iterating in rcu read-side. */
- retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
+ retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
if (retval)
goto out_free_group_list;
@@ -3533,6 +3533,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
const char *buffer)
{
struct cgroup_event *event = NULL;
+ struct cgroup *cgrp_cfile;
unsigned int efd, cfd;
struct file *efile = NULL;
struct file *cfile = NULL;
@@ -3587,6 +3588,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
goto fail;
}
+ /*
+ * The file to be monitored must be in the same cgroup as
+ * cgroup.event_control is.
+ */
+ cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
+ if (cgrp_cfile != cgrp) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
if (!event->cft->register_event || !event->cft->unregister_event) {
ret = -EINVAL;
goto fail;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b582650..acdc087 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -223,9 +223,9 @@ perf_cgroup_match(struct perf_event *event)
return !event->cgrp || event->cgrp == cpuctx->cgrp;
}
-static inline void perf_get_cgroup(struct perf_event *event)
+static inline bool perf_tryget_cgroup(struct perf_event *event)
{
- css_get(&event->cgrp->css);
+ return css_tryget(&event->cgrp->css);
}
static inline void perf_put_cgroup(struct perf_event *event)
@@ -342,6 +342,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ if (cpuctx->unique_pmu != pmu)
+ continue; /* ensure we process each cpuctx once */
perf_pmu_disable(cpuctx->ctx.pmu);
@@ -365,9 +367,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
if (mode & PERF_CGROUP_SWIN) {
WARN_ON_ONCE(cpuctx->cgrp);
- /* set cgrp before ctxsw in to
- * allow event_filter_match() to not
- * have to pass task around
+ /*
+ * set cgrp before ctxsw in to allow
+ * event_filter_match() to not have to pass
+ * task around
*/
cpuctx->cgrp = perf_cgroup_from_task(task);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
@@ -415,7 +418,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
event->cgrp = cgrp;
/* must be done before we fput() the file */
- perf_get_cgroup(event);
+ if (!perf_tryget_cgroup(event)) {
+ event->cgrp = NULL;
+ ret = -ENOENT;
+ goto out;
+ }
/*
* all events in a group must monitor
@@ -651,8 +658,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
{
struct perf_event_context *ctx;
- rcu_read_lock();
retry:
+ /*
+ * One of the few rules of preemptible RCU is that one cannot do
+ * rcu_read_unlock() while holding a scheduler (or nested) lock when
+ * part of the read side critical section was preemptible -- see
+ * rcu_read_unlock_special().
+ *
+ * Since ctx->lock nests under rq->lock we must ensure the entire read
+ * side critical section is non-preemptible.
+ */
+ preempt_disable();
+ rcu_read_lock();
ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
if (ctx) {
/*
@@ -668,6 +685,8 @@ retry:
raw_spin_lock_irqsave(&ctx->lock, *flags);
if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+ rcu_read_unlock();
+ preempt_enable();
goto retry;
}
@@ -677,6 +696,7 @@ retry:
}
}
rcu_read_unlock();
+ preempt_enable();
return ctx;
}
@@ -826,6 +846,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
}
/*
+ * Initialize event state based on the perf_event_attr::disabled.
+ */
+static inline void perf_event__state_init(struct perf_event *event)
+{
+ event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
+ PERF_EVENT_STATE_INACTIVE;
+}
+
+/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
@@ -1616,7 +1645,16 @@ static int __perf_event_enable(void *info)
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;
- if (WARN_ON_ONCE(!ctx->is_active))
+ /*
+ * There's a time window between 'ctx->is_active' check
+ * in perf_event_enable function and this place having:
+ * - IRQs on
+ * - ctx->lock unlocked
+ *
+ * where the task could be killed and 'ctx' deactivated
+ * by perf_event_exit_task.
+ */
+ if (!ctx->is_active)
return -EINVAL;
raw_spin_lock(&ctx->lock);
@@ -4544,7 +4582,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->active_pmu != pmu)
+ if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_task_ctx(&cpuctx->ctx, task_event);
@@ -4690,7 +4728,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->active_pmu != pmu)
+ if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_comm_ctx(&cpuctx->ctx, comm_event);
@@ -4886,7 +4924,7 @@ got_name:
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->active_pmu != pmu)
+ if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
vma->vm_flags & VM_EXEC);
@@ -5912,8 +5950,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
- if (cpuctx->active_pmu == old_pmu)
- cpuctx->active_pmu = pmu;
+ if (cpuctx->unique_pmu == old_pmu)
+ cpuctx->unique_pmu = pmu;
}
}
@@ -6045,7 +6083,7 @@ skip_type:
cpuctx->ctx.pmu = pmu;
cpuctx->jiffies_interval = 1;
INIT_LIST_HEAD(&cpuctx->rotation_list);
- cpuctx->active_pmu = pmu;
+ cpuctx->unique_pmu = pmu;
}
got_cpu_context:
@@ -6219,8 +6257,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->overflow_handler = overflow_handler;
- if (attr->disabled)
- event->state = PERF_EVENT_STATE_OFF;
+ perf_event__state_init(event);
pmu = NULL;
@@ -6594,9 +6631,17 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_lock(&gctx->mutex);
perf_remove_from_context(group_leader);
+
+ /*
+ * Removing from the context ends up with disabled
+ * event. What we want here is event in the initial
+ * startup state, ready to be add into new context.
+ */
+ perf_event__state_init(group_leader);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
perf_remove_from_context(sibling);
+ perf_event__state_init(sibling);
put_ctx(gctx);
}
mutex_unlock(&gctx->mutex);
@@ -7064,7 +7109,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
* child.
*/
- child_ctx = alloc_perf_context(event->pmu, child);
+ child_ctx = alloc_perf_context(parent_ctx->pmu, child);
if (!child_ctx)
return -ENOMEM;
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 086adf2..d99cb4b 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -147,7 +147,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
return;
}
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
unsigned int nr;
nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
@@ -233,7 +233,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
if (cpu >= 0) {
toggle_bp_task_slot(bp, cpu, enable, type, weight);
} else {
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
toggle_bp_task_slot(bp, cpu, enable, type, weight);
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 61e554e..d38153d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -60,6 +60,7 @@
#include <linux/pid.h>
#include <linux/nsproxy.h>
#include <linux/ptrace.h>
+#include <linux/hugetlb.h>
#include <asm/futex.h>
@@ -363,7 +364,7 @@ again:
} else {
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = page_head->mapping->host;
- key->shared.pgoff = page_head->index;
+ key->shared.pgoff = basepage_index(page);
}
get_futex_key_refs(key);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e079c3e..80ec91d 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -61,6 +61,7 @@
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
.clock_base =
{
{
@@ -297,6 +298,10 @@ ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
} else {
unsigned long rem = do_div(nsec, NSEC_PER_SEC);
+ /* Make sure nsec fits into long */
+ if (unlikely(nsec > KTIME_SEC_MAX))
+ return (ktime_t){ .tv64 = KTIME_MAX };
+
tmp = ktime_set((long)nsec, rem);
}
@@ -702,17 +707,20 @@ static int hrtimer_switch_to_hres(void)
return 1;
}
+static void clock_was_set_work(struct work_struct *work)
+{
+ clock_was_set();
+}
+
+static DECLARE_WORK(hrtimer_work, clock_was_set_work);
+
/*
- * Called from timekeeping code to reprogramm the hrtimer interrupt
- * device. If called from the timer interrupt context we defer it to
- * softirq context.
+ * Called from timekeeping and resume code to reprogramm the hrtimer
+ * interrupt device on all cpus.
*/
void clock_was_set_delayed(void)
{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
- cpu_base->clock_was_set = 1;
- __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ schedule_work(&hrtimer_work);
}
#else
@@ -761,8 +769,10 @@ void hrtimers_resume(void)
WARN_ONCE(!irqs_disabled(),
KERN_INFO "hrtimers_resume() called with IRQs enabled!");
+ /* Retrigger on the local CPU */
retrigger_next_event(NULL);
- timerfd_clock_was_set();
+ /* And schedule a retrigger for all others */
+ clock_was_set_delayed();
}
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
@@ -1307,6 +1317,8 @@ retry:
expires = ktime_sub(hrtimer_get_expires(timer),
base->offset);
+ if (expires.tv64 < 0)
+ expires.tv64 = KTIME_MAX;
if (expires.tv64 < expires_next.tv64)
expires_next = expires;
break;
@@ -1409,13 +1421,6 @@ void hrtimer_peek_ahead_timers(void)
static void run_hrtimer_softirq(struct softirq_action *h)
{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
- if (cpu_base->clock_was_set) {
- cpu_base->clock_was_set = 0;
- clock_was_set();
- }
-
hrtimer_peek_ahead_timers();
}
@@ -1640,8 +1645,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
- raw_spin_lock_init(&cpu_base->lock);
-
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
cpu_base->clock_base[i].cpu_base = cpu_base;
timerqueue_init_head(&cpu_base->clock_base[i].active);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3e1bdf9..2f61278 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -536,9 +536,9 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
return 0;
if (irq_settings_can_request(desc)) {
- if (desc->action)
- if (irqflags & desc->action->flags & IRQF_SHARED)
- canrequest =1;
+ if (!desc->action ||
+ irqflags & desc->action->flags & IRQF_SHARED)
+ canrequest = 1;
}
irq_put_desc_unlock(desc, flags);
return canrequest;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index fabfe54..f625b4f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -421,6 +421,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
int retval = 0;
helper_lock();
+ if (!sub_info->path) {
+ retval = -EINVAL;
+ goto out;
+ }
+
if (sub_info->path[0] == '\0')
goto out;
diff --git a/kernel/relay.c b/kernel/relay.c
index 2c242fb..a5be9af 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
struct splice_pipe_desc spd = {
.pages = pages,
.nr_pages = 0,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.partial = partial,
.flags = flags,
.ops = &relay_pipe_buf_ops,
@@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
ret += padding;
out:
- splice_shrink_spd(pipe, &spd);
- return ret;
+ splice_shrink_spd(&spd);
+ return ret;
}
static ssize_t relay_file_splice_read(struct file *in,
diff --git a/kernel/sched.c b/kernel/sched.c
index 93d8b46..efb62f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2774,8 +2774,10 @@ static void try_to_wake_up_local(struct task_struct *p)
{
struct rq *rq = task_rq(p);
- BUG_ON(rq != this_rq());
- BUG_ON(p == current);
+ if (WARN_ON_ONCE(rq != this_rq()) ||
+ WARN_ON_ONCE(p == current))
+ return;
+
lockdep_assert_held(&rq->lock);
if (!raw_spin_trylock(&p->pi_lock)) {
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 9d8af0b..1eeaf74 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
u64 this_clock, remote_clock;
u64 *ptr, old_val, val;
+#if BITS_PER_LONG != 64
+again:
+ /*
+ * Careful here: The local and the remote clock values need to
+ * be read out atomic as we need to compare the values and
+ * then update either the local or the remote side. So the
+ * cmpxchg64 below only protects one readout.
+ *
+ * We must reread via sched_clock_local() in the retry case on
+ * 32bit as an NMI could use sched_clock_local() via the
+ * tracer and hit between the readout of
+ * the low32bit and the high 32bit portion.
+ */
+ this_clock = sched_clock_local(my_scd);
+ /*
+ * We must enforce atomic readout on 32bit, otherwise the
+ * update on the remote cpu can hit inbetween the readout of
+ * the low32bit and the high 32bit portion.
+ */
+ remote_clock = cmpxchg64(&scd->clock, 0, 0);
+#else
+ /*
+ * On 64bit the read of [my]scd->clock is atomic versus the
+ * update, so we can avoid the above 32bit dance.
+ */
sched_clock_local(my_scd);
again:
this_clock = my_scd->clock;
remote_clock = scd->clock;
+#endif
/*
* Use the opportunity that we have both locks
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c768588..fae7d67 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -4277,7 +4277,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
* idle runqueue:
*/
if (rq->cfs.load.weight)
- rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+ rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
return rr_interval;
}
diff --git a/kernel/signal.c b/kernel/signal.c
index b0c0887..f15021b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2664,7 +2664,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
static int do_tkill(pid_t tgid, pid_t pid, int sig)
{
- struct siginfo info;
+ struct siginfo info = {};
info.si_signo = sig;
info.si_errno = 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index 84e353b..1c69aa7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -320,7 +320,6 @@ void kernel_restart_prepare(char *cmd)
system_state = SYSTEM_RESTART;
usermodehelper_disable();
device_shutdown();
- syscore_shutdown();
}
/**
@@ -335,6 +334,7 @@ void kernel_restart(char *cmd)
{
kernel_restart_prepare(cmd);
disable_nonboot_cpus();
+ syscore_shutdown();
if (!cmd)
printk(KERN_EMERG "Restarting system.\n");
else
@@ -360,6 +360,7 @@ static void kernel_shutdown_prepare(enum system_states state)
void kernel_halt(void)
{
kernel_shutdown_prepare(SYSTEM_HALT);
+ disable_nonboot_cpus();
syscore_shutdown();
printk(KERN_EMERG "System halted.\n");
kmsg_dump(KMSG_DUMP_HALT);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 9e40370..20ba7b4 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -66,6 +66,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
*/
int tick_check_broadcast_device(struct clock_event_device *dev)
{
+ struct clock_event_device *cur = tick_broadcast_device.evtdev;
+
if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
(tick_broadcast_device.evtdev &&
tick_broadcast_device.evtdev->rating >= dev->rating) ||
@@ -73,6 +75,8 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
return 0;
clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
+ if (cur)
+ cur->event_handler = clockevents_handle_noop;
tick_broadcast_device.evtdev = dev;
if (!cpumask_empty(tick_get_broadcast_mask()))
tick_broadcast_start_periodic(dev);
@@ -392,7 +396,15 @@ void tick_check_oneshot_broadcast(int cpu)
if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
- clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
+ /*
+ * We might be in the middle of switching over from
+ * periodic to oneshot. If the CPU has not yet
+ * switched over, leave the device alone.
+ */
+ if (td->mode == TICKDEV_MODE_ONESHOT) {
+ clockevents_set_mode(td->evtdev,
+ CLOCK_EVT_MODE_ONESHOT);
+ }
}
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 119528d..c43b479 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -323,6 +323,7 @@ static void tick_shutdown(unsigned int *cpup)
*/
dev->mode = CLOCK_EVT_MODE_UNUSED;
clockevents_exchange_device(dev, NULL);
+ dev->event_handler = clockevents_handle_noop;
td->evtdev = NULL;
}
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c473ce2..c0be5f2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -811,7 +811,7 @@ void tick_cancel_sched_timer(int cpu)
hrtimer_cancel(&ts->sched_timer);
# endif
- ts->nohz_mode = NOHZ_MODE_INACTIVE;
+ memset(ts, 0, sizeof(*ts));
}
#endif
diff --git a/kernel/timer.c b/kernel/timer.c
index 27982d9..5eac0d8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -145,9 +145,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu,
/* now that we have rounded, subtract the extra skew again */
j -= cpu * 3;
- if (j <= jiffies) /* rounding ate our timeout entirely; */
- return original;
- return j;
+ /*
+ * Make sure j is still in the future. Otherwise return the
+ * unmodified value.
+ */
+ return time_is_after_jiffies(j) ? j : original;
}
/**
@@ -1630,12 +1632,12 @@ static int __cpuinit init_timers_cpu(int cpu)
boot_done = 1;
base = &boot_tvec_bases;
}
+ spin_lock_init(&base->lock);
tvec_base_done[cpu] = 1;
} else {
base = per_cpu(tvec_bases, cpu);
}
- spin_lock_init(&base->lock);
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b2ca34a..0d704b0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -548,7 +548,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
- for (i = 0; i < pages; i++) {
+ for (i = 1; i < pages; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
if (!pg->next)
goto out_free;
@@ -566,7 +566,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
free_page(tmp);
}
- free_page((unsigned long)stat->pages);
stat->pages = NULL;
stat->start = NULL;
@@ -934,6 +933,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+static loff_t
+ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
+{
+ loff_t ret;
+
+ if (file->f_mode & FMODE_READ)
+ ret = seq_lseek(file, offset, whence);
+ else
+ file->f_pos = ret = 1;
+
+ return ret;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
#ifndef CONFIG_FTRACE_MCOUNT_RECORD
@@ -2300,19 +2312,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
inode, file);
}
-static loff_t
-ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
-{
- loff_t ret;
-
- if (file->f_mode & FMODE_READ)
- ret = seq_lseek(file, offset, origin);
- else
- file->f_pos = ret = 1;
-
- return ret;
-}
-
static int ftrace_match(char *str, char *regex, int len, int type)
{
int matched = 0;
@@ -3119,7 +3118,7 @@ static const struct file_operations ftrace_filter_fops = {
.open = ftrace_filter_open,
.read = seq_read,
.write = ftrace_filter_write,
- .llseek = ftrace_regex_lseek,
+ .llseek = ftrace_filter_lseek,
.release = ftrace_regex_release,
};
@@ -3127,7 +3126,7 @@ static const struct file_operations ftrace_notrace_fops = {
.open = ftrace_notrace_open,
.read = seq_read,
.write = ftrace_notrace_write,
- .llseek = ftrace_regex_lseek,
+ .llseek = ftrace_filter_lseek,
.release = ftrace_regex_release,
};
@@ -3288,7 +3287,8 @@ out:
if (fail)
return -EINVAL;
- ftrace_graph_filter_enabled = 1;
+ ftrace_graph_filter_enabled = !!(*idx);
+
return 0;
}
@@ -3335,8 +3335,8 @@ static const struct file_operations ftrace_graph_fops = {
.open = ftrace_graph_open,
.read = seq_read,
.write = ftrace_graph_write,
+ .llseek = ftrace_filter_lseek,
.release = ftrace_graph_release,
- .llseek = seq_lseek,
};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -3822,7 +3822,7 @@ static const struct file_operations ftrace_pid_fops = {
.open = ftrace_pid_open,
.write = ftrace_pid_write,
.read = seq_read,
- .llseek = seq_lseek,
+ .llseek = ftrace_filter_lseek,
.release = ftrace_pid_release,
};
@@ -3934,12 +3934,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
ftrace_startup_sysctl();
/* we are starting ftrace again */
- if (ftrace_ops_list != &ftrace_list_end) {
- if (ftrace_ops_list->next == &ftrace_list_end)
- ftrace_trace_function = ftrace_ops_list->func;
- else
- ftrace_trace_function = ftrace_ops_list_func;
- }
+ if (ftrace_ops_list != &ftrace_list_end)
+ update_ftrace_function();
} else {
/* stopping ftrace calls (just send to ftrace_stub) */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b3ae845..34d15ba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -628,7 +628,15 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
max_data->pid = tsk->pid;
- max_data->uid = task_uid(tsk);
+ /*
+ * If tsk == current, then use current_uid(), as that does not use
+ * RCU. The irq tracer can be called out of RCU scope.
+ */
+ if (tsk == current)
+ max_data->uid = current_uid();
+ else
+ max_data->uid = task_uid(tsk);
+
max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
max_data->policy = tsk->policy;
max_data->rt_priority = tsk->rt_priority;
@@ -3237,6 +3245,7 @@ waitagain:
memset(&iter->seq, 0,
sizeof(struct trace_iterator) -
offsetof(struct trace_iterator, seq));
+ cpumask_clear(iter->started);
iter->pos = -1;
trace_event_read_lock();
@@ -3355,6 +3364,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
.pages = pages_def,
.partial = partial_def,
.nr_pages = 0, /* This gets updated below. */
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &tracing_pipe_buf_ops,
.spd_release = tracing_spd_release_pipe,
@@ -3426,7 +3436,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
ret = splice_to_pipe(pipe, &spd);
out:
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return ret;
out_err:
@@ -3839,6 +3849,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
struct splice_pipe_desc spd = {
.pages = pages_def,
.partial = partial_def,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &buffer_pipe_buf_ops,
.spd_release = buffer_spd_release,
@@ -3927,7 +3938,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
}
ret = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
out:
return ret;
}
@@ -4360,6 +4371,8 @@ static __init int tracer_init_debugfs(void)
trace_access_lock_init();
d_tracer = tracing_init_dentry();
+ if (!d_tracer)
+ return 0;
trace_create_file("tracing_enabled", 0644, d_tracer,
&global_trace, &tracing_ctrl_fops);
@@ -4487,30 +4500,32 @@ void trace_init_global_iter(struct trace_iterator *iter)
iter->cpu_file = TRACE_PIPE_ALL_CPU;
}
-static void
-__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
+void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
{
- static arch_spinlock_t ftrace_dump_lock =
- (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
+ static atomic_t dump_running;
unsigned int old_userobj;
- static int dump_ran;
unsigned long flags;
int cnt = 0, cpu;
- /* only one dump */
- local_irq_save(flags);
- arch_spin_lock(&ftrace_dump_lock);
- if (dump_ran)
- goto out;
-
- dump_ran = 1;
+ /* Only allow one dump user at a time. */
+ if (atomic_inc_return(&dump_running) != 1) {
+ atomic_dec(&dump_running);
+ return;
+ }
+ /*
+ * Always turn off tracing when we dump.
+ * We don't need to show trace output of what happens
+ * between multiple crashes.
+ *
+ * If the user does a sysrq-z, then they can re-enable
+ * tracing with echo 1 > tracing_on.
+ */
tracing_off();
- if (disable_tracing)
- ftrace_kill();
+ local_irq_save(flags);
trace_init_global_iter(&iter);
@@ -4581,26 +4596,15 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
printk(KERN_TRACE "---------------------------------\n");
out_enable:
- /* Re-enable tracing if requested */
- if (!disable_tracing) {
- trace_flags |= old_userobj;
+ trace_flags |= old_userobj;
- for_each_tracing_cpu(cpu) {
- atomic_dec(&iter.tr->data[cpu]->disabled);
- }
- tracing_on();
+ for_each_tracing_cpu(cpu) {
+ atomic_dec(&iter.tr->data[cpu]->disabled);
}
-
- out:
- arch_spin_unlock(&ftrace_dump_lock);
+ atomic_dec(&dump_running);
local_irq_restore(flags);
}
-
-/* By default: disable tracing after the dump */
-void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
-{
- __ftrace_dump(true, oops_dump_mode);
-}
+EXPORT_SYMBOL_GPL(ftrace_dump);
__init static int tracer_alloc_buffers(void)
{
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 288541f..09fd98a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -461,8 +461,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
/* Maximum number of functions to trace before diagnosing a hang */
#define GRAPH_MAX_FUNC_TEST 100000000
-static void
-__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode);
static unsigned int graph_hang_thresh;
/* Wrap the real function entry probe to avoid possible hanging */
@@ -472,8 +470,11 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
ftrace_graph_stop();
printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
- if (ftrace_dump_on_oops)
- __ftrace_dump(false, DUMP_ALL);
+ if (ftrace_dump_on_oops) {
+ ftrace_dump(DUMP_ALL);
+ /* ftrace_dump() disables tracing */
+ tracing_on();
+ }
return 0;
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b0b53b8..2d43977 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -17,13 +17,24 @@
#define STACK_TRACE_ENTRIES 500
+#ifdef CC_USING_FENTRY
+# define fentry 1
+#else
+# define fentry 0
+#endif
+
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
{ [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
+/*
+ * Reserve one entry for the passed in ip. This will allow
+ * us to remove most or all of the stack size overhead
+ * added by the stack tracer itself.
+ */
static struct stack_trace max_stack_trace = {
- .max_entries = STACK_TRACE_ENTRIES,
- .entries = stack_dump_trace,
+ .max_entries = STACK_TRACE_ENTRIES - 1,
+ .entries = &stack_dump_trace[1],
};
static unsigned long max_stack_size;
@@ -37,25 +48,34 @@ static DEFINE_MUTEX(stack_sysctl_mutex);
int stack_tracer_enabled;
static int last_stack_tracer_enabled;
-static inline void check_stack(void)
+static inline void
+check_stack(unsigned long ip, unsigned long *stack)
{
unsigned long this_size, flags;
unsigned long *p, *top, *start;
+ static int tracer_frame;
+ int frame_size = ACCESS_ONCE(tracer_frame);
int i;
- this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
+ this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
this_size = THREAD_SIZE - this_size;
+ /* Remove the frame of the tracer */
+ this_size -= frame_size;
if (this_size <= max_stack_size)
return;
/* we do not handle interrupt stacks yet */
- if (!object_is_on_stack(&this_size))
+ if (!object_is_on_stack(stack))
return;
local_irq_save(flags);
arch_spin_lock(&max_stack_lock);
+ /* In case another CPU set the tracer_frame on us */
+ if (unlikely(!frame_size))
+ this_size -= tracer_frame;
+
/* a race could have already updated it */
if (this_size <= max_stack_size)
goto out;
@@ -68,10 +88,18 @@ static inline void check_stack(void)
save_stack_trace(&max_stack_trace);
/*
+ * Add the passed in ip from the function tracer.
+ * Searching for this on the stack will skip over
+ * most of the overhead from the stack tracer itself.
+ */
+ stack_dump_trace[0] = ip;
+ max_stack_trace.nr_entries++;
+
+ /*
* Now find where in the stack these are.
*/
i = 0;
- start = &this_size;
+ start = stack;
top = (unsigned long *)
(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
@@ -95,6 +123,18 @@ static inline void check_stack(void)
found = 1;
/* Start the search from here */
start = p + 1;
+ /*
+ * We do not want to show the overhead
+ * of the stack tracer stack in the
+ * max stack. If we haven't figured
+ * out what that is, then figure it out
+ * now.
+ */
+ if (unlikely(!tracer_frame) && i == 1) {
+ tracer_frame = (p - stack) *
+ sizeof(unsigned long);
+ max_stack_size -= tracer_frame;
+ }
}
}
@@ -110,6 +150,7 @@ static inline void check_stack(void)
static void
stack_trace_call(unsigned long ip, unsigned long parent_ip)
{
+ unsigned long stack;
int cpu;
if (unlikely(!ftrace_enabled || stack_trace_disabled))
@@ -122,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
if (per_cpu(trace_active, cpu)++ != 0)
goto out;
- check_stack();
+ /*
+ * When fentry is used, the traced function does not get
+ * its stack frame set up, and we lose the parent.
+ * The ip is pretty useless because the function tracer
+ * was called before that function set up its stack frame.
+ * In this case, we use the parent ip.
+ *
+ * By adding the return address of either the parent ip
+ * or the current ip we can disregard most of the stack usage
+ * caused by the stack tracer itself.
+ *
+ * The function tracer always reports the address of where the
+ * mcount call was, but the stack will hold the return address.
+ */
+ if (fentry)
+ ip = parent_ip;
+ else
+ ip += MCOUNT_INSN_SIZE;
+
+ check_stack(ip, &stack);
out:
per_cpu(trace_active, cpu)--;
@@ -360,6 +420,8 @@ static __init int stack_trace_init(void)
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
+ if (!d_tracer)
+ return 0;
trace_create_file("stack_max_size", 0644, d_tracer,
&max_stack_size, &stack_max_size_fops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 96cffb2..847f88a 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -307,6 +307,8 @@ static int tracing_stat_init(void)
struct dentry *d_tracing;
d_tracing = tracing_init_dentry();
+ if (!d_tracing)
+ return 0;
stat_dir = debugfs_create_dir("trace_stat", d_tracing);
if (!stat_dir)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ee7b5a0..5819cd5 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -304,6 +304,8 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
struct ring_buffer *buffer;
int size;
int syscall_nr;
+ unsigned long irq_flags;
+ int pc;
syscall_nr = syscall_get_nr(current, regs);
if (syscall_nr < 0)
@@ -317,8 +319,11 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
event = trace_current_buffer_lock_reserve(&buffer,
- sys_data->enter_event->event.type, size, 0, 0);
+ sys_data->enter_event->event.type, size, irq_flags, pc);
if (!event)
return;
@@ -328,7 +333,8 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
if (!filter_current_check_discard(buffer, sys_data->enter_event,
entry, event))
- trace_current_buffer_unlock_commit(buffer, event, 0, 0);
+ trace_current_buffer_unlock_commit(buffer, event,
+ irq_flags, pc);
}
void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
@@ -338,6 +344,8 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
struct ring_buffer_event *event;
struct ring_buffer *buffer;
int syscall_nr;
+ unsigned long irq_flags;
+ int pc;
syscall_nr = syscall_get_nr(current, regs);
if (syscall_nr < 0)
@@ -349,8 +357,12 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
if (!sys_data)
return;
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
event = trace_current_buffer_lock_reserve(&buffer,
- sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
+ sys_data->exit_event->event.type, sizeof(*entry),
+ irq_flags, pc);
if (!event)
return;
@@ -360,7 +372,8 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
if (!filter_current_check_discard(buffer, sys_data->exit_event,
entry, event))
- trace_current_buffer_unlock_commit(buffer, event, 0, 0);
+ trace_current_buffer_unlock_commit(buffer, event,
+ irq_flags, pc);
}
int reg_event_syscall_enter(struct ftrace_event_call *call)