diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 11 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 11 | ||||
-rw-r--r-- | kernel/cpu.c | 74 | ||||
-rw-r--r-- | kernel/exit.c | 9 | ||||
-rw-r--r-- | kernel/futex.c | 28 | ||||
-rw-r--r-- | kernel/hrtimer.c | 6 | ||||
-rw-r--r-- | kernel/hung_task.c | 14 | ||||
-rw-r--r-- | kernel/irq/manage.c | 5 | ||||
-rw-r--r-- | kernel/irq/pm.c | 48 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 6 | ||||
-rw-r--r-- | kernel/jump_label.c | 3 | ||||
-rw-r--r-- | kernel/kmod.c | 4 | ||||
-rw-r--r-- | kernel/kprobes.c | 2 | ||||
-rw-r--r-- | kernel/power/suspend.c | 2 | ||||
-rw-r--r-- | kernel/printk.c | 6 | ||||
-rw-r--r-- | kernel/signal.c | 4 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 2 | ||||
-rw-r--r-- | kernel/taskstats.c | 1 | ||||
-rw-r--r-- | kernel/time.c | 2 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 2 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 4 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 76 | ||||
-rw-r--r-- | kernel/trace/trace.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 6 |
27 files changed, 253 insertions, 84 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 241b74a..5083a09 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1176,10 +1176,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* * If the 'all' option was specified select all the subsystems, - * otherwise 'all, 'none' and a subsystem name options were not - * specified, let's default to 'all' + * otherwise if 'none', 'name=' and a subsystem name options + * were not specified, let's default to 'all' */ - if (all_ss || (!all_ss && !one_ss && !opts->none)) { + if (all_ss || (!one_ss && !opts->none && !opts->name)) { for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss == NULL) @@ -2105,11 +2105,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) continue; /* get old css_set pointer */ task_lock(tsk); - if (tsk->flags & PF_EXITING) { - /* ignore this task if it's going away */ - task_unlock(tsk); - continue; - } oldcg = tsk->cgroups; get_css_set(oldcg); task_unlock(tsk); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e691818..a3f638a 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -153,6 +153,13 @@ static void freezer_destroy(struct cgroup_subsys *ss, kfree(cgroup_freezer(cgroup)); } +/* task is frozen or will freeze immediately when next it gets woken */ +static bool is_task_frozen_enough(struct task_struct *task) +{ + return frozen(task) || + (task_is_stopped_or_traced(task) && freezing(task)); +} + /* * The call to cgroup_lock() in the freezer.state write method prevents * a write to that file racing against an attach, and hence the @@ -231,7 +238,7 @@ static void update_if_frozen(struct cgroup *cgroup, cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { ntotal++; - if (frozen(task)) + if (is_task_frozen_enough(task)) nfrozen++; } @@ -284,7 +291,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) while ((task = cgroup_iter_next(cgroup, &it))) { if (!freeze_task(task, true)) continue; - if (frozen(task)) + if (is_task_frozen_enough(task)) continue; if (!freezing(task) && !freezer_should_skip(task)) num_cant_freeze_now++; diff --git a/kernel/cpu.c b/kernel/cpu.c index 4047707..eae3d9b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -15,6 +15,7 @@ #include <linux/stop_machine.h> #include <linux/mutex.h> #include <linux/gfp.h> +#include <linux/suspend.h> #ifdef CONFIG_SMP /* Serializes the updates to cpu_online_mask, cpu_present_mask */ @@ -476,6 +477,79 @@ static int alloc_frozen_cpus(void) return 0; } core_initcall(alloc_frozen_cpus); + +/* + * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU + * hotplug when tasks are about to be frozen. Also, don't allow the freezer + * to continue until any currently running CPU hotplug operation gets + * completed. + * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the + * 'cpu_add_remove_lock'. And this same lock is also taken by the regular + * CPU hotplug path and released only after it is complete. Thus, we + * (and hence the freezer) will block here until any currently running CPU + * hotplug operation gets completed. + */ +void cpu_hotplug_disable_before_freeze(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 1; + cpu_maps_update_done(); +} + + +/* + * When tasks have been thawed, re-enable regular CPU hotplug (which had been + * disabled while beginning to freeze tasks). + */ +void cpu_hotplug_enable_after_thaw(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 0; + cpu_maps_update_done(); +} + +/* + * When callbacks for CPU hotplug notifications are being executed, we must + * ensure that the state of the system with respect to the tasks being frozen + * or not, as reported by the notification, remains unchanged *throughout the + * duration* of the execution of the callbacks. + * Hence we need to prevent the freezer from racing with regular CPU hotplug. + * + * This synchronization is implemented by mutually excluding regular CPU + * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/ + * Hibernate notifications. + */ +static int +cpu_hotplug_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + switch (action) { + + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + cpu_hotplug_disable_before_freeze(); + break; + + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + cpu_hotplug_enable_after_thaw(); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + + +int cpu_hotplug_pm_sync_init(void) +{ + pm_notifier(cpu_hotplug_pm_callback, 0); + return 0; +} +core_initcall(cpu_hotplug_pm_sync_init); + #endif /* CONFIG_PM_SLEEP_SMP */ /** diff --git a/kernel/exit.c b/kernel/exit.c index f2b321b..303bed2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1553,8 +1553,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, } /* dead body doesn't have much to contribute */ - if (p->exit_state == EXIT_DEAD) + if (unlikely(p->exit_state == EXIT_DEAD)) { + /* + * But do not ignore this task until the tracer does + * wait_task_zombie()->do_notify_parent(). + */ + if (likely(!ptrace) && unlikely(ptrace_reparented(p))) + wo->notask_error = 0; return 0; + } /* slay zombie? */ if (p->exit_state == EXIT_ZOMBIE) { diff --git a/kernel/futex.c b/kernel/futex.c index 11cbe05..e6160fa 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -314,17 +314,29 @@ again: #endif lock_page(page_head); + + /* + * If page_head->mapping is NULL, then it cannot be a PageAnon + * page; but it might be the ZERO_PAGE or in the gate area or + * in a special mapping (all cases which we are happy to fail); + * or it may have been a good file page when get_user_pages_fast + * found it, but truncated or holepunched or subjected to + * invalidate_complete_page2 before we got the page lock (also + * cases which we are happy to fail). And we hold a reference, + * so refcount care in invalidate_complete_page's remove_mapping + * prevents drop_caches from setting mapping to NULL beneath us. + * + * The case we do have to guard against is when memory pressure made + * shmem_writepage move it from filecache to swapcache beneath us: + * an unlikely race, but we do need to retry for page_head->mapping. + */ if (!page_head->mapping) { + int shmem_swizzled = PageSwapCache(page_head); unlock_page(page_head); put_page(page_head); - /* - * ZERO_PAGE pages don't have a mapping. Avoid a busy loop - * trying to find one. RW mapping would have COW'd (and thus - * have a mapping) so this page is RO and won't ever change. - */ - if ((page_head == ZERO_PAGE(address))) - return -EFAULT; - goto again; + if (shmem_swizzled) + goto again; + return -EFAULT; } /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index a9205e3..2043c08 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -885,10 +885,13 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, unsigned long newstate, int reprogram) { + struct timerqueue_node *next_timer; if (!(timer->state & HRTIMER_STATE_ENQUEUED)) goto out; - if (&timer->node == timerqueue_getnext(&base->active)) { + next_timer = timerqueue_getnext(&base->active); + timerqueue_del(&base->active, &timer->node); + if (&timer->node == next_timer) { #ifdef CONFIG_HIGH_RES_TIMERS /* Reprogram the clock event device. if enabled */ if (reprogram && hrtimer_hres_active()) { @@ -901,7 +904,6 @@ static void __remove_hrtimer(struct hrtimer *timer, } #endif } - timerqueue_del(&base->active, &timer->node); if (!timerqueue_getnext(&base->active)) base->cpu_base->active_bases &= ~(1 << base->index); out: diff --git a/kernel/hung_task.c b/kernel/hung_task.c index ea64012..e972276 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -74,11 +74,17 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) /* * Ensure the task is not frozen. - * Also, when a freshly created task is scheduled once, changes - * its state to TASK_UNINTERRUPTIBLE without having ever been - * switched out once, it musn't be checked. + * Also, skip vfork and any other user process that freezer should skip. */ - if (unlikely(t->flags & PF_FROZEN || !switch_count)) + if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) + return; + + /* + * When a freshly created task is scheduled once, changes its state to + * TASK_UNINTERRUPTIBLE without having ever been switched out once, it + * musn't be checked. + */ + if (unlikely(!switch_count)) return; if (switch_count != t->last_switch_count) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a7840ae..a1aadab 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -620,8 +620,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) static int irq_wait_for_interrupt(struct irqaction *action) { + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) { @@ -629,7 +630,9 @@ static int irq_wait_for_interrupt(struct irqaction *action) return 0; } schedule(); + set_current_state(TASK_INTERRUPTIBLE); } + __set_current_state(TASK_RUNNING); return -1; } diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index f323a4c..fe4b09c 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -9,6 +9,7 @@ #include <linux/irq.h> #include <linux/module.h> #include <linux/interrupt.h> +#include <linux/syscore_ops.h> #include "internals.h" @@ -39,25 +40,58 @@ void suspend_device_irqs(void) } EXPORT_SYMBOL_GPL(suspend_device_irqs); -/** - * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() - * - * Enable all interrupt lines previously disabled by suspend_device_irqs() that - * have the IRQS_SUSPENDED flag set. - */ -void resume_device_irqs(void) +static void resume_irqs(bool want_early) { struct irq_desc *desc; int irq; for_each_irq_desc(irq, desc) { unsigned long flags; + bool is_early = desc->action && + desc->action->flags & IRQF_EARLY_RESUME; + + if (is_early != want_early) + continue; raw_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, true); raw_spin_unlock_irqrestore(&desc->lock, flags); } } + +/** + * irq_pm_syscore_ops - enable interrupt lines early + * + * Enable all interrupt lines with %IRQF_EARLY_RESUME set. + */ +static void irq_pm_syscore_resume(void) +{ + resume_irqs(true); +} + +static struct syscore_ops irq_pm_syscore_ops = { + .resume = irq_pm_syscore_resume, +}; + +static int __init irq_pm_init_ops(void) +{ + register_syscore_ops(&irq_pm_syscore_ops); + return 0; +} + +device_initcall(irq_pm_init_ops); + +/** + * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() + * + * Enable all non-%IRQF_EARLY_RESUME interrupt lines previously + * disabled by suspend_device_irqs() that have the IRQS_SUSPENDED flag + * set as well as those with %IRQF_FORCE_RESUME. + */ +void resume_device_irqs(void) +{ + resume_irqs(false); +} EXPORT_SYMBOL_GPL(resume_device_irqs); /** diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index aa57d5d..dc813a9 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -84,7 +84,9 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) */ action = desc->action; if (!action || !(action->flags & IRQF_SHARED) || - (action->flags & __IRQF_TIMER) || !action->next) + (action->flags & __IRQF_TIMER) || + (action->handler(irq, action->dev_id) == IRQ_HANDLED) || + !action->next) goto out; /* Already running on another processor */ @@ -115,7 +117,7 @@ static int misrouted_irq(int irq) struct irq_desc *desc; int i, ok = 0; - if (atomic_inc_return(&irq_poll_active) == 1) + if (atomic_inc_return(&irq_poll_active) != 1) goto out; irq_poll_cpu = smp_processor_id(); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index a8ce450..e6f1f24 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -66,8 +66,9 @@ void jump_label_inc(struct jump_label_key *key) return; jump_label_lock(); - if (atomic_add_return(1, &key->enabled) == 1) + if (atomic_read(&key->enabled) == 0) jump_label_update(key, JUMP_LABEL_ENABLE); + atomic_inc(&key->enabled); jump_label_unlock(); } diff --git a/kernel/kmod.c b/kernel/kmod.c index 47613df..fabfe54 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -114,10 +114,12 @@ int __request_module(bool wait, const char *fmt, ...) atomic_inc(&kmod_concurrent); if (atomic_read(&kmod_concurrent) > max_modprobes) { /* We may be blaming an innocent here, but unlikely */ - if (kmod_loop_msg++ < 5) + if (kmod_loop_msg < 5) { printk(KERN_ERR "request_module: runaway loop modprobe %s\n", module_name); + kmod_loop_msg++; + } atomic_dec(&kmod_concurrent); return -ENOMEM; } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7798181..e0f0bdd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1077,6 +1077,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) /* Early boot. kretprobe_table_locks not yet initialized. */ return; + INIT_HLIST_HEAD(&empty_rp); hash = hash_ptr(tk, KPROBE_HASH_BITS); head = &kretprobe_inst_table[hash]; kretprobe_table_lock(hash, &flags); @@ -1085,7 +1086,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) recycle_rp_inst(ri, &empty_rp); } kretprobe_table_unlock(hash, &flags); - INIT_HLIST_HEAD(&empty_rp); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 63774df..61e6347 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -310,7 +310,7 @@ int enter_state(suspend_state_t state) */ int pm_suspend(suspend_state_t state) { - if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) + if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) return enter_state(state); return -EINVAL; } diff --git a/kernel/printk.c b/kernel/printk.c index dc5aefe..2414614 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -369,8 +369,10 @@ static int check_syslog_permissions(int type, bool from_file) return 0; /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ if (capable(CAP_SYS_ADMIN)) { - WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " - "but no CAP_SYSLOG (deprecated).\n"); + printk_once(KERN_WARNING "%s (%d): " + "Attempt to access syslog with CAP_SYS_ADMIN " + "but no CAP_SYSLOG (deprecated).\n", + current->comm, task_pid_nr(current)); return 0; } return -EPERM; diff --git a/kernel/signal.c b/kernel/signal.c index 415d85d..43fee1c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1894,21 +1894,19 @@ static int do_signal_stop(int signr) */ if (!(sig->flags & SIGNAL_STOP_STOPPED)) sig->group_exit_code = signr; - else - WARN_ON_ONCE(!task_ptrace(current)); current->group_stop &= ~GROUP_STOP_SIGMASK; current->group_stop |= signr | gstop; sig->group_stop_count = 1; for (t = next_thread(current); t != current; t = next_thread(t)) { - t->group_stop &= ~GROUP_STOP_SIGMASK; /* * Setting state to TASK_STOPPED for a group * stop is always done with the siglock held, * so this check has no races. */ if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) { + t->group_stop &= ~GROUP_STOP_SIGMASK; t->group_stop |= signr | gstop; sig->group_stop_count++; signal_wake_up(t, 0); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 3b8e028..e055e8b 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1354,7 +1354,7 @@ static ssize_t binary_sysctl(const int *name, int nlen, fput(file); out_putname: - putname(pathname); + __putname(pathname); out: return result; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index fc0f220..8d597b1 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -657,6 +657,7 @@ static struct genl_ops taskstats_ops = { .cmd = TASKSTATS_CMD_GET, .doit = taskstats_user_cmd, .policy = taskstats_cmd_get_policy, + .flags = GENL_ADMIN_PERM, }; static struct genl_ops cgroupstats_ops = { diff --git a/kernel/time.c b/kernel/time.c index 8e8dc6d..d776062 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -575,7 +575,7 @@ EXPORT_SYMBOL(jiffies_to_timeval); /* * Convert jiffies/jiffies_64 to clock_t and back. */ -clock_t jiffies_to_clock_t(long x) +clock_t jiffies_to_clock_t(unsigned long x) { #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 # if HZ < USER_HZ diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index ea5e1a9..8b70c76 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -181,7 +181,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) struct alarm *alarm; ktime_t expired = next->expires; - if (expired.tv64 >= now.tv64) + if (expired.tv64 > now.tv64) break; alarm = container_of(next, struct alarm, node); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0980f0..8b27006 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -531,7 +531,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs) * note a margin of 12.5% is used because this can be computed with * a shift, versus say 10% which would require division. */ - return max_nsecs - (max_nsecs >> 5); + return max_nsecs - (max_nsecs >> 3); } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -653,7 +653,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) * ~ 0.06ppm granularity for NTP. We apply the same 12.5% * margin as we do in clocksource_max_deferment() */ - sec = (cs->mask - (cs->mask >> 5)); + sec = (cs->mask - (cs->mask >> 3)); do_div(sec, freq); do_div(sec, scale); if (!sec) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c7218d1..7a90d02 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -71,7 +71,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev) (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; - clockevents_exchange_device(NULL, dev); + clockevents_exchange_device(tick_broadcast_device.evtdev, dev); tick_broadcast_device.evtdev = dev; if (!cpumask_empty(tick_get_broadcast_mask())) tick_broadcast_start_periodic(dev); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fa58ec6..9b28d04 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -249,6 +249,8 @@ ktime_t ktime_get(void) secs = xtime.tv_sec + wall_to_monotonic.tv_sec; nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); } while (read_seqretry(&xtime_lock, seq)); /* @@ -280,6 +282,8 @@ void ktime_get_ts(struct timespec *ts) *ts = xtime; tomono = wall_to_monotonic; nsecs = timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); } while (read_seqretry(&xtime_lock, seq)); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ef9271b..9f8e2e1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -952,7 +952,7 @@ struct ftrace_func_probe { }; enum { - FTRACE_ENABLE_CALLS = (1 << 0), + FTRACE_UPDATE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), FTRACE_UPDATE_TRACE_FUNC = (1 << 2), FTRACE_START_FUNC_RET = (1 << 3), @@ -1182,8 +1182,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) return NULL; } +static void +ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash); +static void +ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash); + static int -ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) +ftrace_hash_move(struct ftrace_ops *ops, int enable, + struct ftrace_hash **dst, struct ftrace_hash *src) { struct ftrace_func_entry *entry; struct hlist_node *tp, *tn; @@ -1193,9 +1199,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) unsigned long key; int size = src->count; int bits = 0; + int ret; int i; /* + * Remove the current set, update the hash and add + * them back. + */ + ftrace_hash_rec_disable(ops, enable); + + /* * If the new source is empty, just free dst and assign it * the empty_hash. */ @@ -1215,9 +1228,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) if (bits > FTRACE_HASH_MAX_BITS) bits = FTRACE_HASH_MAX_BITS; + ret = -ENOMEM; new_hash = alloc_ftrace_hash(bits); if (!new_hash) - return -ENOMEM; + goto out; size = 1 << src->size_bits; for (i = 0; i < size; i++) { @@ -1236,7 +1250,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) rcu_assign_pointer(*dst, new_hash); free_ftrace_hash_rcu(old_hash); - return 0; + ret = 0; + out: + /* + * Enable regardless of ret: + * On success, we enable the new hash. + * On failure, we re-enable the original hash. + */ + ftrace_hash_rec_enable(ops, enable); + + return ret; } /* @@ -1498,7 +1521,7 @@ int ftrace_text_reserved(void *start, void *end) static int -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, int update) { unsigned long ftrace_addr; unsigned long flag = 0UL; @@ -1506,17 +1529,17 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) ftrace_addr = (unsigned long)FTRACE_ADDR; /* - * If we are enabling tracing: + * If we are updating calls: * * If the record has a ref count, then we need to enable it * because someone is using it. * * Otherwise we make sure its disabled. * - * If we are disabling tracing, then disable all records that + * If we are disabling calls, then disable all records that * are enabled. */ - if (enable && (rec->flags & ~FTRACE_FL_MASK)) + if (update && (rec->flags & ~FTRACE_FL_MASK)) flag = FTRACE_FL_ENABLED; /* If the state of this record hasn't changed, then do nothing */ @@ -1532,7 +1555,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) return ftrace_make_nop(NULL, rec, ftrace_addr); } -static void ftrace_replace_code(int enable) +static void ftrace_replace_code(int update) { struct dyn_ftrace *rec; struct ftrace_page *pg; @@ -1546,7 +1569,7 @@ static void ftrace_replace_code(int enable) if (rec->flags & FTRACE_FL_FREE) continue; - failed = __ftrace_replace_code(rec, enable); + failed = __ftrace_replace_code(rec, update); if (failed) { ftrace_bug(failed, rec->ip); /* Stop processing */ @@ -1596,7 +1619,7 @@ static int __ftrace_modify_code(void *data) { int *command = data; - if (*command & FTRACE_ENABLE_CALLS) + if (*command & FTRACE_UPDATE_CALLS) ftrace_replace_code(1); else if (*command & FTRACE_DISABLE_CALLS) ftrace_replace_code(0); @@ -1652,7 +1675,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) return -ENODEV; ftrace_start_up++; - command |= FTRACE_ENABLE_CALLS; + command |= FTRACE_UPDATE_CALLS; /* ops marked global share the filter hashes */ if (ops->flags & FTRACE_OPS_FL_GLOBAL) { @@ -1704,8 +1727,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) if (ops != &global_ops || !global_start_up) ops->flags &= ~FTRACE_OPS_FL_ENABLED; - if (!ftrace_start_up) - command |= FTRACE_DISABLE_CALLS; + command |= FTRACE_UPDATE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { saved_ftrace_func = ftrace_trace_function; @@ -1727,7 +1749,7 @@ static void ftrace_startup_sysctl(void) saved_ftrace_func = NULL; /* ftrace_start_up is true if we want ftrace running */ if (ftrace_start_up) - ftrace_run_update_code(FTRACE_ENABLE_CALLS); + ftrace_run_update_code(FTRACE_UPDATE_CALLS); } static void ftrace_shutdown_sysctl(void) @@ -2877,7 +2899,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, ftrace_match_records(hash, buf, len); mutex_lock(&ftrace_lock); - ret = ftrace_hash_move(orig_hash, hash); + ret = ftrace_hash_move(ops, enable, orig_hash, hash); + if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED + && ftrace_enabled) + ftrace_run_update_code(FTRACE_UPDATE_CALLS); + mutex_unlock(&ftrace_lock); mutex_unlock(&ftrace_regex_lock); @@ -3060,18 +3086,12 @@ ftrace_regex_release(struct inode *inode, struct file *file) orig_hash = &iter->ops->notrace_hash; mutex_lock(&ftrace_lock); - /* - * Remove the current set, update the hash and add - * them back. - */ - ftrace_hash_rec_disable(iter->ops, filter_hash); - ret = ftrace_hash_move(orig_hash, iter->hash); - if (!ret) { - ftrace_hash_rec_enable(iter->ops, filter_hash); - if (iter->ops->flags & FTRACE_OPS_FL_ENABLED - && ftrace_enabled) - ftrace_run_update_code(FTRACE_ENABLE_CALLS); - } + ret = ftrace_hash_move(iter->ops, filter_hash, + orig_hash, iter->hash); + if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) + && ftrace_enabled) + ftrace_run_update_code(FTRACE_UPDATE_CALLS); + mutex_unlock(&ftrace_lock); } free_ftrace_hash(iter->hash); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ee9c921..0731e81a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3704,8 +3704,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (info->read < PAGE_SIZE) goto read; - info->read = 0; - trace_access_lock(info->cpu); ret = ring_buffer_read_page(info->tr->buffer, &info->spare, @@ -3715,6 +3713,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (ret < 0) return 0; + info->read = 0; + read: size = PAGE_SIZE - info->read; if (size > count) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3e2a7c9..2d04936 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1096,7 +1096,6 @@ event_subsystem_dir(const char *name, struct dentry *d_events) /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { if (strcmp(system->name, name) == 0) { - __get_system(system); system->nr_events++; return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 256764e..bd3c636 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1766,7 +1766,7 @@ static int replace_system_preds(struct event_subsystem *system, * replace the filter for the call. */ filter = call->filter; - call->filter = filter_item->filter; + rcu_assign_pointer(call->filter, filter_item->filter); filter_item->filter = filter; fail = false; @@ -1821,7 +1821,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) filter = call->filter; if (!filter) goto out_unlock; - call->filter = NULL; + RCU_INIT_POINTER(call->filter, NULL); /* Make sure the filter is not being used */ synchronize_sched(); __free_filter(filter); @@ -1862,7 +1862,7 @@ out: * string */ tmp = call->filter; - call->filter = filter; + rcu_assign_pointer(call->filter, filter); if (tmp) { /* Make sure the call is done with the filter */ synchronize_sched(); |