diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 8 | ||||
-rw-r--r-- | kernel/cpuset.c | 12 | ||||
-rw-r--r-- | kernel/hrtimer.c | 36 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 7 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 23 | ||||
-rw-r--r-- | kernel/posix-timers.c | 7 | ||||
-rw-r--r-- | kernel/printk.c | 13 | ||||
-rw-r--r-- | kernel/ptrace.c | 63 | ||||
-rw-r--r-- | kernel/resource.c | 50 | ||||
-rw-r--r-- | kernel/sched.c | 3 | ||||
-rw-r--r-- | kernel/signal.c | 17 | ||||
-rw-r--r-- | kernel/smp.c | 13 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 3 | ||||
-rw-r--r-- | kernel/timeconst.pl | 6 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 48 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 2 |
16 files changed, 230 insertions, 81 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3948f0a..460aa1b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -366,10 +366,18 @@ static void free_css_set_work(struct work_struct *work) struct cgroup *cgrp = link->cgrp; list_del(&link->cg_link_list); list_del(&link->cgrp_link_list); + /* + * We may not be holding cgroup_mutex, and if cgrp->count is + * dropped to 0 the cgroup can be destroyed at any time, hence + * rcu_read_lock is used to keep it alive. + */ + rcu_read_lock(); if (atomic_dec_and_test(&cgrp->count)) { check_for_release(cgrp); cgroup_wakeup_rmdir_waiter(cgrp); } + rcu_read_unlock(); + kfree(link); } write_unlock(&css_set_lock); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6cbe033..ea76c9c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2499,8 +2499,16 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk) dentry = task_cs(tsk)->css.cgroup->dentry; spin_lock(&cpuset_buffer_lock); - snprintf(cpuset_name, CPUSET_NAME_LEN, - dentry ? (const char *)dentry->d_name.name : "/"); + + if (!dentry) { + strcpy(cpuset_name, "/"); + } else { + spin_lock(&dentry->d_lock); + strlcpy(cpuset_name, (const char *)dentry->d_name.name, + CPUSET_NAME_LEN); + spin_unlock(&dentry->d_lock); + } + nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, tsk->mems_allowed); printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 957869f..e079c3e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -640,21 +640,9 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) * and expiry check is done in the hrtimer_interrupt or in the softirq. */ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base, - int wakeup) + struct hrtimer_clock_base *base) { - if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { - if (wakeup) { - raw_spin_unlock(&base->cpu_base->lock); - raise_softirq_irqoff(HRTIMER_SOFTIRQ); - raw_spin_lock(&base->cpu_base->lock); - } else - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); - - return 1; - } - - return 0; + return base->cpu_base->hres_active && hrtimer_reprogram(timer, base); } static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) @@ -735,8 +723,7 @@ static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base, - int wakeup) + struct hrtimer_clock_base *base) { return 0; } @@ -995,8 +982,21 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * * XXX send_remote_softirq() ? */ - if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) - hrtimer_enqueue_reprogram(timer, new_base, wakeup); + if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases) + && hrtimer_enqueue_reprogram(timer, new_base)) { + if (wakeup) { + /* + * We need to drop cpu_base->lock to avoid a + * lock ordering issue vs. rq->lock. + */ + raw_spin_unlock(&new_base->cpu_base->lock); + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + local_irq_restore(flags); + return ret; + } else { + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + } unlock_hrtimer_base(timer, &flags); diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index dc813a9..63633a3 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -80,13 +80,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) /* * All handlers must agree on IRQF_SHARED, so we test just the - * first. Check for action->next as well. + * first. */ action = desc->action; if (!action || !(action->flags & IRQF_SHARED) || - (action->flags & __IRQF_TIMER) || - (action->handler(irq, action->dev_id) == IRQ_HANDLED) || - !action->next) + (action->flags & __IRQF_TIMER)) goto out; /* Already running on another processor */ @@ -104,6 +102,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) do { if (handle_irq_event(desc) == IRQ_HANDLED) ret = IRQ_HANDLED; + /* Make sure that there is still a valid action */ action = desc->action; } while ((desc->istate & IRQS_PENDING) && action); desc->istate &= ~IRQS_POLL_INPROGRESS; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 640ded8..93d5e4a 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1450,8 +1450,10 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, while (!signal_pending(current)) { if (timer.it.cpu.expires.sched == 0) { /* - * Our timer fired and was reset. + * Our timer fired and was reset, below + * deletion can not fail. */ + posix_cpu_timer_del(&timer); spin_unlock_irq(&timer.it_lock); return 0; } @@ -1469,9 +1471,26 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, * We were interrupted by a signal. */ sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); - posix_cpu_timer_set(&timer, 0, &zero_it, it); + error = posix_cpu_timer_set(&timer, 0, &zero_it, it); + if (!error) { + /* + * Timer is now unarmed, deletion can not fail. + */ + posix_cpu_timer_del(&timer); + } spin_unlock_irq(&timer.it_lock); + while (error == TIMER_RETRY) { + /* + * We need to handle case when timer was or is in the + * middle of firing. In other cases we already freed + * resources. + */ + spin_lock_irq(&timer.it_lock); + error = posix_cpu_timer_del(&timer); + spin_unlock_irq(&timer.it_lock); + } + if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { /* * It actually did fire already. diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4556182..d2da8ad 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -639,6 +639,13 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) { struct k_itimer *timr; + /* + * timer_t could be any type >= int and we want to make sure any + * @timer_id outside positive int range fails lookup. + */ + if ((unsigned long long)timer_id > INT_MAX) + return NULL; + rcu_read_lock(); timr = idr_find(&posix_timers_id, (int)timer_id); if (timr) { diff --git a/kernel/printk.c b/kernel/printk.c index 2414614..a1d702c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -684,8 +684,19 @@ static void call_console_drivers(unsigned start, unsigned end) start_print = start; while (cur_index != end) { if (msg_level < 0 && ((end - cur_index) > 2)) { + /* + * prepare buf_prefix, as a contiguous array, + * to be processed by log_prefix function + */ + char buf_prefix[SYSLOG_PRI_MAX_LENGTH+1]; + unsigned i; + for (i = 0; i < ((end - cur_index)) && (i < SYSLOG_PRI_MAX_LENGTH); i++) { + buf_prefix[i] = LOG_BUF(cur_index + i); + } + buf_prefix[i] = '\0'; /* force '\0' as last string character */ + /* strip log prefix */ - cur_index += log_prefix(&LOG_BUF(cur_index), &msg_level, NULL); + cur_index += log_prefix((const char *)&buf_prefix, &msg_level, NULL); start_print = cur_index; } while (cur_index != end) { diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 2df1157..40581ee 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -38,6 +38,36 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) child->parent = new_parent; } +/* Ensure that nothing can wake it up, even SIGKILL */ +static bool ptrace_freeze_traced(struct task_struct *task) +{ + bool ret = false; + + spin_lock_irq(&task->sighand->siglock); + if (task_is_traced(task) && !__fatal_signal_pending(task)) { + task->state = __TASK_TRACED; + ret = true; + } + spin_unlock_irq(&task->sighand->siglock); + + return ret; +} + +static void ptrace_unfreeze_traced(struct task_struct *task) +{ + if (task->state != __TASK_TRACED) + return; + + WARN_ON(!task->ptrace || task->parent != current); + + spin_lock_irq(&task->sighand->siglock); + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + spin_unlock_irq(&task->sighand->siglock); +} + /** * __ptrace_unlink - unlink ptracee and restore its execution state * @child: ptracee to be unlinked @@ -92,7 +122,7 @@ void __ptrace_unlink(struct task_struct *child) * TASK_KILLABLE sleeps. */ if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child)) - signal_wake_up(child, task_is_traced(child)); + ptrace_signal_wake_up(child, true); spin_unlock(&child->sighand->siglock); } @@ -112,23 +142,29 @@ int ptrace_check_attach(struct task_struct *child, int kill) * be changed by us so it's not changing right after this. */ read_lock(&tasklist_lock); - if ((child->ptrace & PT_PTRACED) && child->parent == current) { + if (child->ptrace && child->parent == current) { + WARN_ON(child->state == __TASK_TRACED); /* * child->sighand can't be NULL, release_task() * does ptrace_unlink() before __exit_signal(). */ - spin_lock_irq(&child->sighand->siglock); - WARN_ON_ONCE(task_is_stopped(child)); - if (task_is_traced(child) || kill) + if (kill || ptrace_freeze_traced(child)) ret = 0; - spin_unlock_irq(&child->sighand->siglock); } read_unlock(&tasklist_lock); - if (!ret && !kill) - ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; + if (!ret && !kill) { + if (!wait_task_inactive(child, __TASK_TRACED)) { + /* + * This can only happen if may_ptrace_stop() fails and + * ptrace_stop() changes ->state back to TASK_RUNNING, + * so we should not worry about leaking __TASK_TRACED. + */ + WARN_ON(child->state == __TASK_TRACED); + ret = -ESRCH; + } + } - /* All systems go.. */ return ret; } @@ -245,7 +281,7 @@ static int ptrace_attach(struct task_struct *task) */ if (task_is_stopped(task)) { task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING; - signal_wake_up(task, 1); + signal_wake_up_state(task, __TASK_STOPPED); wait_trap = true; } @@ -777,6 +813,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, goto out_put_task_struct; ret = arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); out_put_task_struct: put_task_struct(child); @@ -915,8 +953,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, } ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (!ret) + if (!ret) { ret = compat_arch_ptrace(child, request, addr, data); + if (ret || request != PTRACE_DETACH) + ptrace_unfreeze_traced(child); + } out_put_task_struct: put_task_struct(child); diff --git a/kernel/resource.c b/kernel/resource.c index b29b83d..d005cd3 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -736,6 +736,7 @@ static void __init __reserve_region_with_split(struct resource *root, struct resource *parent = root; struct resource *conflict; struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC); + struct resource *next_res = NULL; if (!res) return; @@ -745,21 +746,46 @@ static void __init __reserve_region_with_split(struct resource *root, res->end = end; res->flags = IORESOURCE_BUSY; - conflict = __request_resource(parent, res); - if (!conflict) - return; + while (1) { - /* failed, split and try again */ - kfree(res); + conflict = __request_resource(parent, res); + if (!conflict) { + if (!next_res) + break; + res = next_res; + next_res = NULL; + continue; + } - /* conflict covered whole area */ - if (conflict->start <= start && conflict->end >= end) - return; + /* conflict covered whole area */ + if (conflict->start <= res->start && + conflict->end >= res->end) { + kfree(res); + WARN_ON(next_res); + break; + } + + /* failed, split and try again */ + if (conflict->start > res->start) { + end = res->end; + res->end = conflict->start - 1; + if (conflict->end < end) { + next_res = kzalloc(sizeof(*next_res), + GFP_ATOMIC); + if (!next_res) { + kfree(res); + break; + } + next_res->name = name; + next_res->start = conflict->end + 1; + next_res->end = end; + next_res->flags = IORESOURCE_BUSY; + } + } else { + res->start = conflict->end + 1; + } + } - if (conflict->start > start) - __reserve_region_with_split(root, start, conflict->start-1, name); - if (conflict->end < end) - __reserve_region_with_split(root, conflict->end+1, end, name); } void __init reserve_region_with_split(struct resource *root, diff --git a/kernel/sched.c b/kernel/sched.c index e788b66..89a9c34 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2779,7 +2779,8 @@ out: */ int wake_up_process(struct task_struct *p) { - return try_to_wake_up(p, TASK_ALL, 0); + WARN_ON(task_is_stopped_or_traced(p)); + return try_to_wake_up(p, TASK_NORMAL, 0); } EXPORT_SYMBOL(wake_up_process); diff --git a/kernel/signal.c b/kernel/signal.c index 43fee1c..51f2e69 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -631,23 +631,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * No need to set need_resched since signal event passing * goes through ->blocked */ -void signal_wake_up(struct task_struct *t, int resume) +void signal_wake_up_state(struct task_struct *t, unsigned int state) { - unsigned int mask; - set_tsk_thread_flag(t, TIF_SIGPENDING); - /* - * For SIGKILL, we want to wake it up in the stopped/traced/killable + * TASK_WAKEKILL also means wake it up in the stopped/traced/killable * case. We don't check t->state here because there is a race with it * executing another processor and just now entering stopped state. * By using wake_up_state, we ensure the process will wake up and * handle its death signal. */ - mask = TASK_INTERRUPTIBLE; - if (resume) - mask |= TASK_WAKEKILL; - if (!wake_up_state(t, mask)) + if (!wake_up_state(t, state | TASK_INTERRUPTIBLE)) kick_process(t); } @@ -1675,6 +1669,10 @@ static inline int may_ptrace_stop(void) * If SIGKILL was already sent before the caller unlocked * ->siglock we must see ->core_state != NULL. Otherwise it * is safe to enter schedule(). + * + * This is almost outdated, a task with the pending SIGKILL can't + * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported + * after SIGKILL was already dequeued. */ if (unlikely(current->mm->core_state) && unlikely(current->mm == current->parent->mm)) @@ -1806,6 +1804,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) if (gstop_done) do_notify_parent_cldstop(current, false, why); + /* tasklist protects us from ptrace_freeze_traced() */ __set_current_state(TASK_RUNNING); if (clear_code) current->exit_code = 0; diff --git a/kernel/smp.c b/kernel/smp.c index fb67dfa..38d9e03 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -31,6 +31,7 @@ struct call_function_data { struct call_single_data csd; atomic_t refs; cpumask_var_t cpumask; + cpumask_var_t cpumask_ipi; }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); @@ -54,6 +55,9 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, cpu_to_node(cpu))) return notifier_from_errno(-ENOMEM); + if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, + cpu_to_node(cpu))) + return notifier_from_errno(-ENOMEM); break; #ifdef CONFIG_HOTPLUG_CPU @@ -63,6 +67,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DEAD: case CPU_DEAD_FROZEN: free_cpumask_var(cfd->cpumask); + free_cpumask_var(cfd->cpumask_ipi); break; #endif }; @@ -524,6 +529,12 @@ void smp_call_function_many(const struct cpumask *mask, return; } + /* + * After we put an entry into the list, data->cpumask + * may be cleared again when another CPU sends another IPI for + * a SMP function call, so data->cpumask will be zero. + */ + cpumask_copy(data->cpumask_ipi, data->cpumask); raw_spin_lock_irqsave(&call_function.lock, flags); /* * Place entry at the _HEAD_ of the list, so that any cpu still @@ -547,7 +558,7 @@ void smp_call_function_many(const struct cpumask *mask, smp_mb(); /* Send a message to all CPUs in the map */ - arch_send_call_function_ipi_mask(data->cpumask); + arch_send_call_function_ipi_mask(data->cpumask_ipi); /* Optionally wait for the CPUs to complete */ if (wait) diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index e055e8b..17c20c7 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1194,9 +1194,10 @@ static ssize_t bin_dn_node_address(struct file *file, /* Convert the decnet address to binary */ result = -EIO; - nodep = strchr(buf, '.') + 1; + nodep = strchr(buf, '.'); if (!nodep) goto out; + ++nodep; area = simple_strtoul(buf, NULL, 10); node = simple_strtoul(nodep, NULL, 10); diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl index eb51d76..3f42652 100644 --- a/kernel/timeconst.pl +++ b/kernel/timeconst.pl @@ -369,10 +369,8 @@ if ($hz eq '--can') { die "Usage: $0 HZ\n"; } - @val = @{$canned_values{$hz}}; - if (!defined(@val)) { - @val = compute_values($hz); - } + $cv = $canned_values{$hz}; + @val = defined($cv) ? @$cv : compute_values($hz); output($hz, @val); } exit 0; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f88ea18..86fd417 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3432,35 +3432,49 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify(struct notifier_block *self, - unsigned long val, void *data) +static int ftrace_module_notify_enter(struct notifier_block *self, + unsigned long val, void *data) { struct module *mod = data; - switch (val) { - case MODULE_STATE_COMING: + if (val == MODULE_STATE_COMING) ftrace_init_module(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); - break; - case MODULE_STATE_GOING: + return 0; +} + +static int ftrace_module_notify_exit(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + if (val == MODULE_STATE_GOING) ftrace_release_mod(mod); - break; - } return 0; } #else -static int ftrace_module_notify(struct notifier_block *self, - unsigned long val, void *data) +static int ftrace_module_notify_enter(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +static int ftrace_module_notify_exit(struct notifier_block *self, + unsigned long val, void *data) { return 0; } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_nb = { - .notifier_call = ftrace_module_notify, - .priority = 0, +struct notifier_block ftrace_module_enter_nb = { + .notifier_call = ftrace_module_notify_enter, + .priority = INT_MAX, /* Run before anything that can use kprobes */ +}; + +struct notifier_block ftrace_module_exit_nb = { + .notifier_call = ftrace_module_notify_exit, + .priority = INT_MIN, /* Run after anything that can remove kprobes */ }; extern unsigned long __start_mcount_loc[]; @@ -3494,9 +3508,13 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_nb); + ret = register_module_notifier(&ftrace_module_enter_nb); + if (ret) + pr_warning("Failed to register trace ftrace module enter notifier\n"); + + ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) - pr_warning("Failed to register trace ftrace module notifier\n"); + pr_warning("Failed to register trace ftrace module exit notifier\n"); set_ftrace_early_filters(); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b0c7aa4..20dff64 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2926,6 +2926,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Splice the empty reader page into the list around the head. */ reader = rb_set_head_page(cpu_buffer); + if (!reader) + goto out; cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); cpu_buffer->reader_page->list.prev = reader->list.prev; |