diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 4 | ||||
-rw-r--r-- | kernel/cpu.c | 83 | ||||
-rw-r--r-- | kernel/cpuset.c | 5 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 15 | ||||
-rw-r--r-- | kernel/futex.c | 21 | ||||
-rw-r--r-- | kernel/irq/manage.c | 15 | ||||
-rw-r--r-- | kernel/kprobes.c | 36 | ||||
-rw-r--r-- | kernel/params.c | 2 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 2 | ||||
-rw-r--r-- | kernel/posix-timers.c | 10 | ||||
-rw-r--r-- | kernel/power/Kconfig | 9 | ||||
-rw-r--r-- | kernel/power/Makefile | 3 | ||||
-rw-r--r-- | kernel/power/main.c | 21 | ||||
-rw-r--r-- | kernel/power/pm.c | 1 | ||||
-rw-r--r-- | kernel/printk.c | 18 | ||||
-rw-r--r-- | kernel/ptrace.c | 5 | ||||
-rw-r--r-- | kernel/rcupdate.c | 59 | ||||
-rw-r--r-- | kernel/rcutorture.c | 37 | ||||
-rw-r--r-- | kernel/sched.c | 10 | ||||
-rw-r--r-- | kernel/signal.c | 13 | ||||
-rw-r--r-- | kernel/stop_machine.c | 6 | ||||
-rw-r--r-- | kernel/sys.c | 5 | ||||
-rw-r--r-- | kernel/sysctl.c | 29 | ||||
-rw-r--r-- | kernel/time.c | 22 | ||||
-rw-r--r-- | kernel/workqueue.c | 12 |
26 files changed, 305 insertions, 140 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 0c56320..32fa03a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -291,8 +291,10 @@ int kauditd_thread(void *dummy) set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&kauditd_wait, &wait); - if (!skb_queue_len(&audit_skb_queue)) + if (!skb_queue_len(&audit_skb_queue)) { + try_to_freeze(); schedule(); + } __set_current_state(TASK_RUNNING); remove_wait_queue(&kauditd_wait, &wait); diff --git a/kernel/cpu.c b/kernel/cpu.c index d61ba88..e882c6b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -16,47 +16,76 @@ #include <asm/semaphore.h> /* This protects CPUs going up and down... */ -DECLARE_MUTEX(cpucontrol); -EXPORT_SYMBOL_GPL(cpucontrol); +static DECLARE_MUTEX(cpucontrol); static struct notifier_block *cpu_chain; -/* - * Used to check by callers if they need to acquire the cpucontrol - * or not to protect a cpu from being removed. Its sometimes required to - * call these functions both for normal operations, and in response to - * a cpu being added/removed. If the context of the call is in the same - * thread context as a CPU hotplug thread, we dont need to take the lock - * since its already protected - * check drivers/cpufreq/cpufreq.c for its usage - Ashok Raj - */ +#ifdef CONFIG_HOTPLUG_CPU +static struct task_struct *lock_cpu_hotplug_owner; +static int lock_cpu_hotplug_depth; -int current_in_cpu_hotplug(void) +static int __lock_cpu_hotplug(int interruptible) { - return (current->flags & PF_HOTPLUG_CPU); + int ret = 0; + + if (lock_cpu_hotplug_owner != current) { + if (interruptible) + ret = down_interruptible(&cpucontrol); + else + down(&cpucontrol); + } + + /* + * Set only if we succeed in locking + */ + if (!ret) { + lock_cpu_hotplug_depth++; + lock_cpu_hotplug_owner = current; + } + + return ret; } -EXPORT_SYMBOL_GPL(current_in_cpu_hotplug); +void lock_cpu_hotplug(void) +{ + __lock_cpu_hotplug(0); +} +EXPORT_SYMBOL_GPL(lock_cpu_hotplug); +void unlock_cpu_hotplug(void) +{ + if (--lock_cpu_hotplug_depth == 0) { + lock_cpu_hotplug_owner = NULL; + up(&cpucontrol); + } +} +EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); + +int lock_cpu_hotplug_interruptible(void) +{ + return __lock_cpu_hotplug(1); +} +EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible); +#endif /* CONFIG_HOTPLUG_CPU */ /* Need to know about CPUs going up/down? */ int register_cpu_notifier(struct notifier_block *nb) { int ret; - if ((ret = down_interruptible(&cpucontrol)) != 0) + if ((ret = lock_cpu_hotplug_interruptible()) != 0) return ret; ret = notifier_chain_register(&cpu_chain, nb); - up(&cpucontrol); + unlock_cpu_hotplug(); return ret; } EXPORT_SYMBOL(register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) { - down(&cpucontrol); + lock_cpu_hotplug(); notifier_chain_unregister(&cpu_chain, nb); - up(&cpucontrol); + unlock_cpu_hotplug(); } EXPORT_SYMBOL(unregister_cpu_notifier); @@ -112,13 +141,6 @@ int cpu_down(unsigned int cpu) goto out; } - /* - * Leave a trace in current->flags indicating we are already in - * process of performing CPU hotplug. Callers can check if cpucontrol - * is already acquired by current thread, and if so not cause - * a dead lock by not acquiring the lock - */ - current->flags |= PF_HOTPLUG_CPU; err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { @@ -171,7 +193,6 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out: - current->flags &= ~PF_HOTPLUG_CPU; unlock_cpu_hotplug(); return err; } @@ -182,7 +203,7 @@ int __devinit cpu_up(unsigned int cpu) int ret; void *hcpu = (void *)(long)cpu; - if ((ret = down_interruptible(&cpucontrol)) != 0) + if ((ret = lock_cpu_hotplug_interruptible()) != 0) return ret; if (cpu_online(cpu) || !cpu_present(cpu)) { @@ -190,11 +211,6 @@ int __devinit cpu_up(unsigned int cpu) goto out; } - /* - * Leave a trace in current->flags indicating we are already in - * process of performing CPU hotplug. - */ - current->flags |= PF_HOTPLUG_CPU; ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { printk("%s: attempt to bring up CPU %u failed\n", @@ -217,7 +233,6 @@ out_notify: if (ret != 0) notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); out: - current->flags &= ~PF_HOTPLUG_CPU; - up(&cpucontrol); + unlock_cpu_hotplug(); return ret; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 5a737ed..7430640 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1809,11 +1809,12 @@ int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ return 0; + if (current->flags & PF_EXITING) /* Let dying task have memory */ + return 1; + /* Not hardwall and node outside mems_allowed: scan up cpusets */ down(&callback_sem); - if (current->flags & PF_EXITING) /* Let dying task have memory */ - return 1; task_lock(current); cs = nearest_exclusive_ancestor(current->cpuset); task_unlock(current); diff --git a/kernel/exit.c b/kernel/exit.c index 452a1d1..ee51568 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -859,7 +859,7 @@ fastcall NORET_TYPE void do_exit(long code) if (group_dead && tsk->signal->leader) disassociate_ctty(1); - module_put(tsk->thread_info->exec_domain->module); + module_put(task_thread_info(tsk)->exec_domain->module); if (tsk->binfmt) module_put(tsk->binfmt->module); diff --git a/kernel/fork.c b/kernel/fork.c index 158710d..fb8572a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -171,10 +171,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return NULL; } - *ti = *orig->thread_info; *tsk = *orig; tsk->thread_info = ti; - ti->task = tsk; + setup_thread_stack(tsk, orig); /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); @@ -264,7 +263,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) rb_parent = &tmp->vm_rb; mm->map_count++; - retval = copy_page_range(mm, oldmm, tmp); + retval = copy_page_range(mm, oldmm, mpnt); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); @@ -324,7 +323,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm) spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; - mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; @@ -919,7 +917,7 @@ static task_t *copy_process(unsigned long clone_flags, if (nr_threads >= max_threads) goto bad_fork_cleanup_count; - if (!try_module_get(p->thread_info->exec_domain->module)) + if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) @@ -1126,8 +1124,6 @@ static task_t *copy_process(unsigned long clone_flags, if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); - cpuset_fork(p); - attach_pid(p, PIDTYPE_PID, p->pid); attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { @@ -1137,13 +1133,14 @@ static task_t *copy_process(unsigned long clone_flags, __get_cpu_var(process_counts)++; } - proc_fork_connector(p); if (!current->signal->tty && p->signal->tty) p->signal->tty = NULL; nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); + cpuset_fork(p); retval = 0; fork_out: @@ -1180,7 +1177,7 @@ bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: - module_put(p->thread_info->exec_domain->module); + module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); diff --git a/kernel/futex.c b/kernel/futex.c index aca8d10..5e71a6b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -201,21 +201,6 @@ static int get_futex_key(unsigned long uaddr, union futex_key *key) * from swap. But that's a lot of code to duplicate here * for a rare case, so we simply fetch the page. */ - - /* - * Do a quick atomic lookup first - this is the fastpath. - */ - page = follow_page(mm, uaddr, FOLL_TOUCH|FOLL_GET); - if (likely(page != NULL)) { - key->shared.pgoff = - page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - put_page(page); - return 0; - } - - /* - * Do it the general way. - */ err = get_user_pages(current, mm, uaddr, 1, 0, 0, &page, NULL); if (err >= 0) { key->shared.pgoff = @@ -285,7 +270,13 @@ static void wake_futex(struct futex_q *q) /* * The waiting task can free the futex_q as soon as this is written, * without taking any locks. This must come last. + * + * A memory barrier is required here to prevent the following store + * to lock_ptr from getting ahead of the wakeup. Clearing the lock + * at the end of wake_up_all() does not prevent this store from + * moving. */ + wmb(); q->lock_ptr = NULL; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3bd7226..81c49a4 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -36,6 +36,9 @@ void synchronize_irq(unsigned int irq) { struct irq_desc *desc = irq_desc + irq; + if (irq >= NR_IRQS) + return; + while (desc->status & IRQ_INPROGRESS) cpu_relax(); } @@ -60,6 +63,9 @@ void disable_irq_nosync(unsigned int irq) irq_desc_t *desc = irq_desc + irq; unsigned long flags; + if (irq >= NR_IRQS) + return; + spin_lock_irqsave(&desc->lock, flags); if (!desc->depth++) { desc->status |= IRQ_DISABLED; @@ -86,6 +92,9 @@ void disable_irq(unsigned int irq) { irq_desc_t *desc = irq_desc + irq; + if (irq >= NR_IRQS) + return; + disable_irq_nosync(irq); if (desc->action) synchronize_irq(irq); @@ -108,6 +117,9 @@ void enable_irq(unsigned int irq) irq_desc_t *desc = irq_desc + irq; unsigned long flags; + if (irq >= NR_IRQS) + return; + spin_lock_irqsave(&desc->lock, flags); switch (desc->depth) { case 0: @@ -163,6 +175,9 @@ int setup_irq(unsigned int irq, struct irqaction * new) unsigned long flags; int shared = 0; + if (irq >= NR_IRQS) + return -EINVAL; + if (desc->handler == &no_irq_type) return -ENOSYS; /* diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5beda37..3bb71e6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -246,6 +246,19 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) return ret; } +/* Walks the list and increments nmissed count for multiprobe case */ +void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) +{ + struct kprobe *kp; + if (p->pre_handler != aggr_pre_handler) { + p->nmissed++; + } else { + list_for_each_entry_rcu(kp, &p->list, list) + kp->nmissed++; + } + return; +} + /* Called with kretprobe_lock held */ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) { @@ -399,10 +412,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) INIT_LIST_HEAD(&ap->list); list_add_rcu(&p->list, &ap->list); - INIT_HLIST_NODE(&ap->hlist); - hlist_del_rcu(&p->hlist); - hlist_add_head_rcu(&ap->hlist, - &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); + hlist_replace_rcu(&p->hlist, &ap->hlist); } /* @@ -462,9 +472,16 @@ int __kprobes register_kprobe(struct kprobe *p) int ret = 0; unsigned long flags = 0; struct kprobe *old_p; + struct module *mod; + + if ((!kernel_text_address((unsigned long) p->addr)) || + in_kprobes_functions((unsigned long) p->addr)) + return -EINVAL; + + if ((mod = module_text_address((unsigned long) p->addr)) && + (unlikely(!try_module_get(mod)))) + return -EINVAL; - if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) - return ret; if ((ret = arch_prepare_kprobe(p)) != 0) goto rm_kprobe; @@ -488,6 +505,8 @@ out: rm_kprobe: if (ret == -EEXIST) arch_remove_kprobe(p); + if (ret && mod) + module_put(mod); return ret; } @@ -495,6 +514,7 @@ void __kprobes unregister_kprobe(struct kprobe *p) { unsigned long flags; struct kprobe *old_p; + struct module *mod; spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); @@ -506,6 +526,10 @@ void __kprobes unregister_kprobe(struct kprobe *p) cleanup_kprobe(p, flags); synchronize_sched(); + + if ((mod = module_text_address((unsigned long)p->addr))) + module_put(mod); + if (old_p->pre_handler == aggr_pre_handler && list_empty(&old_p->list)) kfree(old_p); diff --git a/kernel/params.c b/kernel/params.c index 47ba695..c76ad25 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -619,7 +619,7 @@ static void __init param_sysfs_builtin(void) /* module-related sysfs stuff */ -#ifdef CONFIG_MODULES +#ifdef CONFIG_SYSFS #define to_module_attr(n) container_of(n, struct module_attribute, attr); #define to_module_kobject(n) container_of(n, struct module_kobject, kobj); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 84af54c..cae4f57 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -36,7 +36,7 @@ timespec_to_sample(clockid_t which_clock, const struct timespec *tp) union cpu_time_count ret; ret.sched = 0; /* high half always zero when .cpu used */ if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - ret.sched = tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; + ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; } else { ret.cpu = timespec_to_cputime(tp); } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ea55c7a..5870efb 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -270,7 +270,7 @@ static void tstojiffie(struct timespec *tp, int res, u64 *jiff) long sec = tp->tv_sec; long nsec = tp->tv_nsec + res - 1; - if (nsec > NSEC_PER_SEC) { + if (nsec >= NSEC_PER_SEC) { sec++; nsec -= NSEC_PER_SEC; } @@ -1209,13 +1209,9 @@ static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp) do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); - tp->tv_sec += wall_to_mono.tv_sec; - tp->tv_nsec += wall_to_mono.tv_nsec; + set_normalized_timespec(tp, tp->tv_sec + wall_to_mono.tv_sec, + tp->tv_nsec + wall_to_mono.tv_nsec); - if ((tp->tv_nsec - NSEC_PER_SEC) > 0) { - tp->tv_nsec -= NSEC_PER_SEC; - tp->tv_sec++; - } return 0; } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 46a5e5a..5ec248c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -19,6 +19,15 @@ config PM will issue the hlt instruction if nothing is to be done, thereby sending the processor to sleep and saving power. +config PM_LEGACY + bool "Legacy Power Management API" + depends on PM + default y + ---help--- + Support for pm_register() and friends. + + If unsure, say Y. + config PM_DEBUG bool "Power Management Debug Support" depends on PM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c71eb45..04be7d0 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,7 +3,8 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -obj-y := main.o process.o console.o pm.o +obj-y := main.o process.o console.o +obj-$(CONFIG_PM_LEGACY) += pm.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o obj-$(CONFIG_SUSPEND_SMP) += smp.o diff --git a/kernel/power/main.c b/kernel/power/main.c index 6ee2cad..d253f3a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -24,7 +24,7 @@ DECLARE_MUTEX(pm_sem); -struct pm_ops * pm_ops = NULL; +struct pm_ops *pm_ops; suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN; /** @@ -151,6 +151,18 @@ static char *pm_states[PM_SUSPEND_MAX] = { #endif }; +static inline int valid_state(suspend_state_t state) +{ + /* Suspend-to-disk does not really need low-level support. + * It can work with reboot if needed. */ + if (state == PM_SUSPEND_DISK) + return 1; + + if (pm_ops && pm_ops->valid && !pm_ops->valid(state)) + return 0; + return 1; +} + /** * enter_state - Do common work of entering low-power state. @@ -167,7 +179,7 @@ static int enter_state(suspend_state_t state) { int error; - if (pm_ops && pm_ops->valid && !pm_ops->valid(state)) + if (!valid_state(state)) return -ENODEV; if (down_trylock(&pm_sem)) return -EBUSY; @@ -238,9 +250,8 @@ static ssize_t state_show(struct subsystem * subsys, char * buf) char * s = buf; for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (pm_states[i] && pm_ops && (!pm_ops->valid - ||(pm_ops->valid && pm_ops->valid(i)))) - s += sprintf(s,"%s ",pm_states[i]); + if (pm_states[i] && valid_state(i)) + s += sprintf(s,"%s ", pm_states[i]); } s += sprintf(s,"\n"); return (s - buf); diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 1591493..33c508e 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c @@ -23,6 +23,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/pm.h> +#include <linux/pm_legacy.h> #include <linux/interrupt.h> int pm_active; diff --git a/kernel/printk.c b/kernel/printk.c index e9be027..5287be8 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -491,7 +491,10 @@ __attribute__((weak)) unsigned long long printk_clock(void) return sched_clock(); } -/* +/** + * printk - print a kernel message + * @fmt: format string + * * This is printk. It can be called from any context. We want it to work. * * We try to grab the console_sem. If we succeed, it's easy - we log the output and @@ -503,6 +506,9 @@ __attribute__((weak)) unsigned long long printk_clock(void) * One effect of this deferred printing is that code which calls printk() and * then changes console_loglevel may break. This is because console_loglevel * is inspected when the actual printing occurs. + * + * See also: + * printf(3) */ asmlinkage int printk(const char *fmt, ...) @@ -655,6 +661,9 @@ static void call_console_drivers(unsigned long start, unsigned long end) /** * add_preferred_console - add a device to the list of preferred consoles. + * @name: device name + * @idx: device index + * @options: options for this console * * The last preferred console added will be used for kernel messages * and stdin/out/err for init. Normally this is used by console_setup @@ -764,7 +773,8 @@ void release_console_sem(void) } EXPORT_SYMBOL(release_console_sem); -/** console_conditional_schedule - yield the CPU if required +/** + * console_conditional_schedule - yield the CPU if required * * If the console code is currently allowed to sleep, and * if this CPU should yield the CPU to another task, do @@ -946,7 +956,7 @@ int unregister_console(struct console *console) if (console_drivers == console) { console_drivers=console->next; res = 0; - } else { + } else if (console_drivers) { for (a=console_drivers->next, b=console_drivers ; a; b=a, a=b->next) { if (a == console) { @@ -976,6 +986,8 @@ EXPORT_SYMBOL(unregister_console); /** * tty_write_message - write a message to a certain tty, not just the console. + * @tty: the destination tty_struct + * @msg: the message to write * * This is used for messages that need to be redirected to a specific tty. * We don't put it into the syslog queue right now maybe in the future if diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b88d418..656476e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -241,7 +241,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in if (write) { copy_to_user_page(vma, page, addr, maddr + offset, buf, bytes); - set_page_dirty_lock(page); + if (!PageCompound(page)) + set_page_dirty_lock(page); } else { copy_from_user_page(vma, page, addr, buf, maddr + offset, bytes); @@ -470,7 +471,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); - goto out; + goto out_put_task_struct; } ret = ptrace_check_attach(child, request == PTRACE_KILL); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c4d159a..48d3bce 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -116,6 +116,10 @@ void fastcall call_rcu(struct rcu_head *head, local_irq_restore(flags); } +static atomic_t rcu_barrier_cpu_count; +static struct semaphore rcu_barrier_sema; +static struct completion rcu_barrier_completion; + /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -162,6 +166,42 @@ long rcu_batches_completed(void) return rcu_ctrlblk.completed; } +static void rcu_barrier_callback(struct rcu_head *notused) +{ + if (atomic_dec_and_test(&rcu_barrier_cpu_count)) + complete(&rcu_barrier_completion); +} + +/* + * Called with preemption disabled, and from cross-cpu IRQ context. + */ +static void rcu_barrier_func(void *notused) +{ + int cpu = smp_processor_id(); + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_head *head; + + head = &rdp->barrier; + atomic_inc(&rcu_barrier_cpu_count); + call_rcu(head, rcu_barrier_callback); +} + +/** + * rcu_barrier - Wait until all the in-flight RCUs are complete. + */ +void rcu_barrier(void) +{ + BUG_ON(in_interrupt()); + /* Take cpucontrol semaphore to protect against CPU hotplug */ + down(&rcu_barrier_sema); + init_completion(&rcu_barrier_completion); + atomic_set(&rcu_barrier_cpu_count, 0); + on_each_cpu(rcu_barrier_func, NULL, 0, 1); + wait_for_completion(&rcu_barrier_completion); + up(&rcu_barrier_sema); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + /* * Invoke the completed RCU callbacks. They are expected to be in * a per-cpu list. @@ -217,15 +257,23 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, if (rcp->next_pending && rcp->completed == rcp->cur) { - /* Can't change, since spin lock held. */ - cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); - rcp->next_pending = 0; - /* next_pending == 0 must be visible in __rcu_process_callbacks() - * before it can see new value of cur. + /* + * next_pending == 0 must be visible in + * __rcu_process_callbacks() before it can see new value of cur. */ smp_wmb(); rcp->cur++; + + /* + * Accessing nohz_cpu_mask before incrementing rcp->cur needs a + * Barrier Otherwise it can cause tickless idle CPUs to be + * included in rsp->cpumask, which will extend graceperiods + * unnecessarily. + */ + smp_mb(); + cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); + } } @@ -457,6 +505,7 @@ static struct notifier_block __devinitdata rcu_nb = { */ void __init rcu_init(void) { + sema_init(&rcu_barrier_sema, 1); rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)smp_processor_id()); /* Register notifier for non-boot CPUs */ diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9b58f1e..49fbbef 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -80,6 +80,7 @@ struct rcu_torture { struct rcu_head rtort_rcu; int rtort_pipe_count; struct list_head rtort_free; + int rtort_mbtest; }; static int fullstop = 0; /* stop generating callbacks at test end. */ @@ -96,6 +97,8 @@ static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; atomic_t n_rcu_torture_alloc; atomic_t n_rcu_torture_alloc_fail; atomic_t n_rcu_torture_free; +atomic_t n_rcu_torture_mberror; +atomic_t n_rcu_torture_error; /* * Allocate an element from the rcu_tortures pool. @@ -145,9 +148,10 @@ rcu_torture_cb(struct rcu_head *p) if (i > RCU_TORTURE_PIPE_LEN) i = RCU_TORTURE_PIPE_LEN; atomic_inc(&rcu_torture_wcount[i]); - if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) + if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { + rp->rtort_mbtest = 0; rcu_torture_free(rp); - else + } else call_rcu(p, rcu_torture_cb); } @@ -195,6 +199,8 @@ rcu_torture_writer(void *arg) static DEFINE_RCU_RANDOM(rand); VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); + set_user_nice(current, 19); + do { schedule_timeout_uninterruptible(1); if (rcu_batches_completed() == oldbatch) @@ -204,6 +210,7 @@ rcu_torture_writer(void *arg) rp->rtort_pipe_count = 0; udelay(rcu_random(&rand) & 0x3ff); old_rp = rcu_torture_current; + rp->rtort_mbtest = 1; rcu_assign_pointer(rcu_torture_current, rp); smp_wmb(); if (old_rp != NULL) { @@ -238,6 +245,8 @@ rcu_torture_reader(void *arg) int pipe_count; VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); + set_user_nice(current, 19); + do { rcu_read_lock(); completed = rcu_batches_completed(); @@ -248,6 +257,8 @@ rcu_torture_reader(void *arg) schedule_timeout_interruptible(HZ); continue; } + if (p->rtort_mbtest == 0) + atomic_inc(&n_rcu_torture_mberror); udelay(rcu_random(&rand) & 0x7f); preempt_disable(); pipe_count = p->rtort_pipe_count; @@ -296,16 +307,22 @@ rcu_torture_printk(char *page) } cnt += sprintf(&page[cnt], "rcutorture: "); cnt += sprintf(&page[cnt], - "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d", + "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " + "rtmbe: %d", rcu_torture_current, rcu_torture_current_version, list_empty(&rcu_torture_freelist), atomic_read(&n_rcu_torture_alloc), atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free)); + atomic_read(&n_rcu_torture_free), + atomic_read(&n_rcu_torture_mberror)); + if (atomic_read(&n_rcu_torture_mberror) != 0) + cnt += sprintf(&page[cnt], " !!!"); cnt += sprintf(&page[cnt], "\nrcutorture: "); - if (i > 1) + if (i > 1) { cnt += sprintf(&page[cnt], "!!! "); + atomic_inc(&n_rcu_torture_error); + } cnt += sprintf(&page[cnt], "Reader Pipe: "); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) cnt += sprintf(&page[cnt], " %ld", pipesummary[i]); @@ -392,11 +409,12 @@ rcu_torture_cleanup(void) stats_task = NULL; /* Wait for all RCU callbacks to fire. */ + rcu_barrier(); - for (i = 0; i < RCU_TORTURE_PIPE_LEN; i++) - synchronize_rcu(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ - PRINTK_STRING("--- End of test"); + printk(KERN_ALERT TORTURE_FLAG + "--- End of test: %s\n", + atomic_read(&n_rcu_torture_error) == 0 ? "SUCCESS" : "FAILURE"); } static int @@ -421,6 +439,7 @@ rcu_torture_init(void) INIT_LIST_HEAD(&rcu_torture_freelist); for (i = 0; i < sizeof(rcu_tortures) / sizeof(rcu_tortures[0]); i++) { + rcu_tortures[i].rtort_mbtest = 0; list_add_tail(&rcu_tortures[i].rtort_free, &rcu_torture_freelist); } @@ -432,6 +451,8 @@ rcu_torture_init(void) atomic_set(&n_rcu_torture_alloc, 0); atomic_set(&n_rcu_torture_alloc_fail, 0); atomic_set(&n_rcu_torture_free, 0); + atomic_set(&n_rcu_torture_mberror, 0); + atomic_set(&n_rcu_torture_error, 0); for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) atomic_set(&rcu_torture_wcount[i], 0); for_each_cpu(cpu) { diff --git a/kernel/sched.c b/kernel/sched.c index b650667..6f46c94 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1437,7 +1437,7 @@ void fastcall sched_fork(task_t *p, int clone_flags) #endif #ifdef CONFIG_PREEMPT /* Want to start with kernel preemption disabled. */ - p->thread_info->preempt_count = 1; + task_thread_info(p)->preempt_count = 1; #endif /* * Share the timeslice between parent and child, thus the @@ -4327,10 +4327,10 @@ static void show_task(task_t *p) #endif #ifdef CONFIG_DEBUG_STACK_USAGE { - unsigned long *n = (unsigned long *) (p->thread_info+1); + unsigned long *n = end_of_stack(p); while (!*n) n++; - free = (unsigned long) n - (unsigned long)(p->thread_info+1); + free = (unsigned long)n - (unsigned long)end_of_stack(p); } #endif printk("%5lu %5d %6d ", free, p->pid, p->parent->pid); @@ -4410,9 +4410,9 @@ void __devinit init_idle(task_t *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) - idle->thread_info->preempt_count = (idle->lock_depth >= 0); + task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); #else - idle->thread_info->preempt_count = 0; + task_thread_info(idle)->preempt_count = 0; #endif } diff --git a/kernel/signal.c b/kernel/signal.c index 1bf3c39..d7611f1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -513,16 +513,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, { int sig = 0; - /* SIGKILL must have priority, otherwise it is quite easy - * to create an unkillable process, sending sig < SIGKILL - * to self */ - if (unlikely(sigismember(&pending->signal, SIGKILL))) { - if (!sigismember(mask, SIGKILL)) - sig = SIGKILL; - } - - if (likely(!sig)) - sig = next_signal(pending, mask); + sig = next_signal(pending, mask); if (sig) { if (current->notifier) { if (sigismember(current->notifier_mask, sig)) { @@ -1499,7 +1490,7 @@ void do_notify_parent(struct task_struct *tsk, int sig) psig = tsk->parent->sighand; spin_lock_irqsave(&psig->siglock, flags); - if (sig == SIGCHLD && + if (!tsk->ptrace && sig == SIGCHLD && (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT))) { /* diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 84a9d18..b3d4dc8 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -119,13 +119,12 @@ static int stop_machine(void) return ret; } - /* Don't schedule us away at this point, please. */ - local_irq_disable(); - /* Now they are all started, make them hold the CPUs, ready. */ + preempt_disable(); stopmachine_set_state(STOPMACHINE_PREPARE); /* Make them disable irqs. */ + local_irq_disable(); stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); return 0; @@ -135,6 +134,7 @@ static void restart_machine(void) { stopmachine_set_state(STOPMACHINE_EXIT); local_irq_enable(); + preempt_enable_no_resched(); } struct stop_machine_data diff --git a/kernel/sys.c b/kernel/sys.c index c43b3e2..eecf845 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -32,6 +32,7 @@ #include <linux/compat.h> #include <linux/syscalls.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -168,7 +169,7 @@ EXPORT_SYMBOL(notifier_chain_unregister); * of the last notifier function called. */ -int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) +int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v) { int ret=NOTIFY_DONE; struct notifier_block *nb = *n; @@ -1497,6 +1498,8 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); +EXPORT_SYMBOL(uts_sem); + asmlinkage long sys_newuname(struct new_utsname __user * name) { int errno = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9990e10..b53115b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2192,29 +2192,32 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, void **context) { - size_t l, len; - if (!table->data || !table->maxlen) return -ENOTDIR; if (oldval && oldlenp) { - if (get_user(len, oldlenp)) + size_t bufsize; + if (get_user(bufsize, oldlenp)) return -EFAULT; - if (len) { - l = strlen(table->data); - if (len > l) len = l; - if (len >= table->maxlen) + if (bufsize) { + size_t len = strlen(table->data), copied; + + /* This shouldn't trigger for a well-formed sysctl */ + if (len > table->maxlen) len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(0, ((char __user *) oldval) + len)) + + /* Copy up to a max of bufsize-1 bytes of the string */ + copied = (len >= bufsize) ? bufsize - 1 : len; + + if (copy_to_user(oldval, table->data, copied) || + put_user(0, (char __user *)(oldval + copied))) return -EFAULT; - if(put_user(len, oldlenp)) + if (put_user(len, oldlenp)) return -EFAULT; } } if (newval && newlen) { - len = newlen; + size_t len = newlen; if (len > table->maxlen) len = table->maxlen; if(copy_from_user(table->data, newval, len)) @@ -2223,7 +2226,7 @@ int sysctl_string(ctl_table *table, int __user *name, int nlen, len--; ((char *) table->data)[len] = 0; } - return 0; + return 1; } /* diff --git a/kernel/time.c b/kernel/time.c index 245d595..b94bfa8 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -561,6 +561,28 @@ void getnstimeofday(struct timespec *tv) EXPORT_SYMBOL_GPL(getnstimeofday); #endif +void getnstimestamp(struct timespec *ts) +{ + unsigned int seq; + struct timespec wall2mono; + + /* synchronize with settimeofday() changes */ + do { + seq = read_seqbegin(&xtime_lock); + getnstimeofday(ts); + wall2mono = wall_to_monotonic; + } while(unlikely(read_seqretry(&xtime_lock, seq))); + + /* adjust to monotonicaly-increasing values */ + ts->tv_sec += wall2mono.tv_sec; + ts->tv_nsec += wall2mono.tv_nsec; + while (unlikely(ts->tv_nsec >= NSEC_PER_SEC)) { + ts->tv_nsec -= NSEC_PER_SEC; + ts->tv_sec++; + } +} +EXPORT_SYMBOL_GPL(getnstimestamp); + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 42df83d..2bd5aee 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -102,7 +102,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) if (!test_and_set_bit(0, &work->pending)) { if (unlikely(is_single_threaded(wq))) - cpu = 0; + cpu = any_online_cpu(cpu_online_map); BUG_ON(!list_empty(&work->entry)); __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); ret = 1; @@ -118,7 +118,7 @@ static void delayed_work_timer_fn(unsigned long __data) int cpu = smp_processor_id(); if (unlikely(is_single_threaded(wq))) - cpu = 0; + cpu = any_online_cpu(cpu_online_map); __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); } @@ -266,8 +266,8 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) might_sleep(); if (is_single_threaded(wq)) { - /* Always use cpu 0's area. */ - flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, 0)); + /* Always use first cpu's area. */ + flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, any_online_cpu(cpu_online_map))); } else { int cpu; @@ -320,7 +320,7 @@ struct workqueue_struct *__create_workqueue(const char *name, lock_cpu_hotplug(); if (singlethread) { INIT_LIST_HEAD(&wq->list); - p = create_workqueue_thread(wq, 0); + p = create_workqueue_thread(wq, any_online_cpu(cpu_online_map)); if (!p) destroy = 1; else @@ -374,7 +374,7 @@ void destroy_workqueue(struct workqueue_struct *wq) /* We don't need the distraction of CPUs appearing and vanishing. */ lock_cpu_hotplug(); if (is_single_threaded(wq)) - cleanup_workqueue_thread(wq, 0); + cleanup_workqueue_thread(wq, any_online_cpu(cpu_online_map)); else { for_each_online_cpu(cpu) cleanup_workqueue_thread(wq, cpu); |