aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cgroup.c23
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/debug/debug_core.c2
-rw-r--r--kernel/debug/gdbstub.c2
-rw-r--r--kernel/hw_breakpoint.c12
-rw-r--r--kernel/module.c1088
-rw-r--r--kernel/padata.c755
-rw-r--r--kernel/pm_qos_params.c215
-rw-r--r--kernel/power/hibernate.c26
-rw-r--r--kernel/power/main.c55
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/power/suspend.c13
-rw-r--r--kernel/power/swap.c6
-rw-r--r--kernel/printk.c33
-rw-r--r--kernel/signal.c9
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/user_namespace.c44
19 files changed, 1470 insertions, 834 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index c71bd26..8296aa5 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -407,7 +407,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
audit_hold_skb(skb);
} else
/* drop the extra reference if sent ok */
- kfree_skb(skb);
+ consume_skb(skb);
}
static int kauditd_thread(void *dummy)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3ac6f5b..a8ce099 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1788,6 +1788,29 @@ out:
return retval;
}
+/**
+ * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup
+ * @tsk: the task to be attached
+ */
+int cgroup_attach_task_current_cg(struct task_struct *tsk)
+{
+ struct cgroupfs_root *root;
+ struct cgroup *cur_cg;
+ int retval = 0;
+
+ cgroup_lock();
+ for_each_active_root(root) {
+ cur_cg = task_cgroup_from_root(current, root);
+ retval = cgroup_attach_task(cur_cg, tsk);
+ if (retval)
+ break;
+ }
+ cgroup_unlock();
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg);
+
/*
* Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
* held. May take task_lock of task
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 02b9611..7cb37d8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -105,7 +105,7 @@ struct cpuset {
/* for custom sched domain */
int relax_domain_level;
- /* used for walking a cpuset heirarchy */
+ /* used for walking a cpuset hierarchy */
struct list_head stack_list;
};
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 9ed9307..3c2d497 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -6,7 +6,7 @@
* Copyright (C) 2000-2001 VERITAS Software Corporation.
* Copyright (C) 2002-2004 Timesys Corporation
* Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
- * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2004 Pavel Machek <pavel@ucw.cz>
* Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
* Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
* Copyright (C) 2005-2009 Wind River Systems, Inc.
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index fc7b174..481a7bd 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -6,7 +6,7 @@
* Copyright (C) 2000-2001 VERITAS Software Corporation.
* Copyright (C) 2002-2004 Timesys Corporation
* Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
- * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2004 Pavel Machek <pavel@ucw.cz>
* Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
* Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
* Copyright (C) 2005-2009 Wind River Systems, Inc.
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 7a56b22..71ed3ce 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -242,6 +242,17 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
}
/*
+ * Function to perform processor-specific cleanup during unregistration
+ */
+__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
+{
+ /*
+ * A weak stub function here for those archs that don't define
+ * it inside arch/.../kernel/hw_breakpoint.c
+ */
+}
+
+/*
* Contraints to check before allowing this new breakpoint counter:
*
* == Non-pinned counter == (Considered as pinned for now)
@@ -339,6 +350,7 @@ void release_bp_slot(struct perf_event *bp)
{
mutex_lock(&nr_bp_mutex);
+ arch_unregister_hw_breakpoint(bp);
__release_bp_slot(bp);
mutex_unlock(&nr_bp_mutex);
diff --git a/kernel/module.c b/kernel/module.c
index 6c56282..d0b5f8d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1,6 +1,6 @@
/*
Copyright (C) 2002 Richard Henderson
- Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
+ Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -110,6 +110,20 @@ int unregister_module_notifier(struct notifier_block * nb)
}
EXPORT_SYMBOL(unregister_module_notifier);
+struct load_info {
+ Elf_Ehdr *hdr;
+ unsigned long len;
+ Elf_Shdr *sechdrs;
+ char *secstrings, *strtab;
+ unsigned long *strmap;
+ unsigned long symoffs, stroffs;
+ struct _ddebug *debug;
+ unsigned int num_debug;
+ struct {
+ unsigned int sym, str, mod, vers, info, pcpu;
+ } index;
+};
+
/* We require a truly strong try_module_get(): 0 means failure due to
ongoing or failed initialization etc. */
static inline int strong_try_module_get(struct module *mod)
@@ -140,42 +154,38 @@ void __module_put_and_exit(struct module *mod, long code)
EXPORT_SYMBOL(__module_put_and_exit);
/* Find a module section: 0 means not found. */
-static unsigned int find_sec(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- const char *secstrings,
- const char *name)
+static unsigned int find_sec(const struct load_info *info, const char *name)
{
unsigned int i;
- for (i = 1; i < hdr->e_shnum; i++)
+ for (i = 1; i < info->hdr->e_shnum; i++) {
+ Elf_Shdr *shdr = &info->sechdrs[i];
/* Alloc bit cleared means "ignore it." */
- if ((sechdrs[i].sh_flags & SHF_ALLOC)
- && strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
+ if ((shdr->sh_flags & SHF_ALLOC)
+ && strcmp(info->secstrings + shdr->sh_name, name) == 0)
return i;
+ }
return 0;
}
/* Find a module section, or NULL. */
-static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs,
- const char *secstrings, const char *name)
+static void *section_addr(const struct load_info *info, const char *name)
{
/* Section 0 has sh_addr 0. */
- return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr;
+ return (void *)info->sechdrs[find_sec(info, name)].sh_addr;
}
/* Find a module section, or NULL. Fill in number of "objects" in section. */
-static void *section_objs(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- const char *secstrings,
+static void *section_objs(const struct load_info *info,
const char *name,
size_t object_size,
unsigned int *num)
{
- unsigned int sec = find_sec(hdr, sechdrs, secstrings, name);
+ unsigned int sec = find_sec(info, name);
/* Section 0 has sh_addr 0 and sh_size 0. */
- *num = sechdrs[sec].sh_size / object_size;
- return (void *)sechdrs[sec].sh_addr;
+ *num = info->sechdrs[sec].sh_size / object_size;
+ return (void *)info->sechdrs[sec].sh_addr;
}
/* Provided by the linker */
@@ -227,7 +237,7 @@ bool each_symbol(bool (*fn)(const struct symsearch *arr, struct module *owner,
unsigned int symnum, void *data), void *data)
{
struct module *mod;
- const struct symsearch arr[] = {
+ static const struct symsearch arr[] = {
{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
NOT_GPL_ONLY, false },
{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
@@ -392,7 +402,8 @@ static int percpu_modalloc(struct module *mod,
mod->percpu = __alloc_reserved_percpu(size, align);
if (!mod->percpu) {
printk(KERN_WARNING
- "Could not allocate %lu bytes percpu data\n", size);
+ "%s: Could not allocate %lu bytes percpu data\n",
+ mod->name, size);
return -ENOMEM;
}
mod->percpu_size = size;
@@ -404,11 +415,9 @@ static void percpu_modfree(struct module *mod)
free_percpu(mod->percpu);
}
-static unsigned int find_pcpusec(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- const char *secstrings)
+static unsigned int find_pcpusec(struct load_info *info)
{
- return find_sec(hdr, sechdrs, secstrings, ".data..percpu");
+ return find_sec(info, ".data..percpu");
}
static void percpu_modcopy(struct module *mod,
@@ -468,9 +477,7 @@ static inline int percpu_modalloc(struct module *mod,
static inline void percpu_modfree(struct module *mod)
{
}
-static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- const char *secstrings)
+static unsigned int find_pcpusec(struct load_info *info)
{
return 0;
}
@@ -524,21 +531,21 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
EXPORT_TRACEPOINT_SYMBOL(module_get);
/* Init the unload section of the module. */
-static void module_unload_init(struct module *mod)
+static int module_unload_init(struct module *mod)
{
- int cpu;
+ mod->refptr = alloc_percpu(struct module_ref);
+ if (!mod->refptr)
+ return -ENOMEM;
INIT_LIST_HEAD(&mod->source_list);
INIT_LIST_HEAD(&mod->target_list);
- for_each_possible_cpu(cpu) {
- per_cpu_ptr(mod->refptr, cpu)->incs = 0;
- per_cpu_ptr(mod->refptr, cpu)->decs = 0;
- }
/* Hold reference count during initialization. */
__this_cpu_write(mod->refptr->incs, 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
+
+ return 0;
}
/* Does a already use b? */
@@ -618,6 +625,8 @@ static void module_unload_free(struct module *mod)
kfree(use);
}
mutex_unlock(&module_mutex);
+
+ free_percpu(mod->refptr);
}
#ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -891,8 +900,9 @@ int ref_module(struct module *a, struct module *b)
}
EXPORT_SYMBOL_GPL(ref_module);
-static inline void module_unload_init(struct module *mod)
+static inline int module_unload_init(struct module *mod)
{
+ return 0;
}
#endif /* CONFIG_MODULE_UNLOAD */
@@ -1051,10 +1061,9 @@ static inline int same_magic(const char *amagic, const char *bmagic,
#endif /* CONFIG_MODVERSIONS */
/* Resolve a symbol for this module. I.e. if we find one, record usage. */
-static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static const struct kernel_symbol *resolve_symbol(struct module *mod,
+ const struct load_info *info,
const char *name,
- struct module *mod,
char ownername[])
{
struct module *owner;
@@ -1068,7 +1077,8 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
if (!sym)
goto unlock;
- if (!check_version(sechdrs, versindex, name, mod, crc, owner)) {
+ if (!check_version(info->sechdrs, info->index.vers, name, mod, crc,
+ owner)) {
sym = ERR_PTR(-EINVAL);
goto getname;
}
@@ -1087,21 +1097,20 @@ unlock:
return sym;
}
-static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs,
- unsigned int versindex,
- const char *name,
- struct module *mod)
+static const struct kernel_symbol *
+resolve_symbol_wait(struct module *mod,
+ const struct load_info *info,
+ const char *name)
{
const struct kernel_symbol *ksym;
- char ownername[MODULE_NAME_LEN];
+ char owner[MODULE_NAME_LEN];
if (wait_event_interruptible_timeout(module_wq,
- !IS_ERR(ksym = resolve_symbol(sechdrs, versindex, name,
- mod, ownername)) ||
- PTR_ERR(ksym) != -EBUSY,
+ !IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
+ || PTR_ERR(ksym) != -EBUSY,
30 * HZ) <= 0) {
printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
- mod->name, ownername);
+ mod->name, owner);
}
return ksym;
}
@@ -1110,8 +1119,9 @@ static const struct kernel_symbol *resolve_symbol_wait(Elf_Shdr *sechdrs,
* /sys/module/foo/sections stuff
* J. Corbet <corbet@lwn.net>
*/
-#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS)
+#ifdef CONFIG_SYSFS
+#ifdef CONFIG_KALLSYMS
static inline bool sect_empty(const Elf_Shdr *sect)
{
return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
@@ -1148,8 +1158,7 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
kfree(sect_attrs);
}
-static void add_sect_attrs(struct module *mod, unsigned int nsect,
- char *secstrings, Elf_Shdr *sechdrs)
+static void add_sect_attrs(struct module *mod, const struct load_info *info)
{
unsigned int nloaded = 0, i, size[2];
struct module_sect_attrs *sect_attrs;
@@ -1157,8 +1166,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
struct attribute **gattr;
/* Count loaded sections and allocate structures */
- for (i = 0; i < nsect; i++)
- if (!sect_empty(&sechdrs[i]))
+ for (i = 0; i < info->hdr->e_shnum; i++)
+ if (!sect_empty(&info->sechdrs[i]))
nloaded++;
size[0] = ALIGN(sizeof(*sect_attrs)
+ nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1175,11 +1184,12 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
sect_attrs->nsections = 0;
sattr = &sect_attrs->attrs[0];
gattr = &sect_attrs->grp.attrs[0];
- for (i = 0; i < nsect; i++) {
- if (sect_empty(&sechdrs[i]))
+ for (i = 0; i < info->hdr->e_shnum; i++) {
+ Elf_Shdr *sec = &info->sechdrs[i];
+ if (sect_empty(sec))
continue;
- sattr->address = sechdrs[i].sh_addr;
- sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
+ sattr->address = sec->sh_addr;
+ sattr->name = kstrdup(info->secstrings + sec->sh_name,
GFP_KERNEL);
if (sattr->name == NULL)
goto out;
@@ -1247,8 +1257,7 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
kfree(notes_attrs);
}
-static void add_notes_attrs(struct module *mod, unsigned int nsect,
- char *secstrings, Elf_Shdr *sechdrs)
+static void add_notes_attrs(struct module *mod, const struct load_info *info)
{
unsigned int notes, loaded, i;
struct module_notes_attrs *notes_attrs;
@@ -1260,9 +1269,9 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
/* Count notes sections and allocate structures. */
notes = 0;
- for (i = 0; i < nsect; i++)
- if (!sect_empty(&sechdrs[i]) &&
- (sechdrs[i].sh_type == SHT_NOTE))
+ for (i = 0; i < info->hdr->e_shnum; i++)
+ if (!sect_empty(&info->sechdrs[i]) &&
+ (info->sechdrs[i].sh_type == SHT_NOTE))
++notes;
if (notes == 0)
@@ -1276,15 +1285,15 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
notes_attrs->notes = notes;
nattr = &notes_attrs->attrs[0];
- for (loaded = i = 0; i < nsect; ++i) {
- if (sect_empty(&sechdrs[i]))
+ for (loaded = i = 0; i < info->hdr->e_shnum; ++i) {
+ if (sect_empty(&info->sechdrs[i]))
continue;
- if (sechdrs[i].sh_type == SHT_NOTE) {
+ if (info->sechdrs[i].sh_type == SHT_NOTE) {
sysfs_bin_attr_init(nattr);
nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
nattr->attr.mode = S_IRUGO;
- nattr->size = sechdrs[i].sh_size;
- nattr->private = (void *) sechdrs[i].sh_addr;
+ nattr->size = info->sechdrs[i].sh_size;
+ nattr->private = (void *) info->sechdrs[i].sh_addr;
nattr->read = module_notes_read;
++nattr;
}
@@ -1315,8 +1324,8 @@ static void remove_notes_attrs(struct module *mod)
#else
-static inline void add_sect_attrs(struct module *mod, unsigned int nsect,
- char *sectstrings, Elf_Shdr *sechdrs)
+static inline void add_sect_attrs(struct module *mod,
+ const struct load_info *info)
{
}
@@ -1324,17 +1333,16 @@ static inline void remove_sect_attrs(struct module *mod)
{
}
-static inline void add_notes_attrs(struct module *mod, unsigned int nsect,
- char *sectstrings, Elf_Shdr *sechdrs)
+static inline void add_notes_attrs(struct module *mod,
+ const struct load_info *info)
{
}
static inline void remove_notes_attrs(struct module *mod)
{
}
-#endif
+#endif /* CONFIG_KALLSYMS */
-#ifdef CONFIG_SYSFS
static void add_usage_links(struct module *mod)
{
#ifdef CONFIG_MODULE_UNLOAD
@@ -1439,6 +1447,7 @@ out:
}
static int mod_sysfs_setup(struct module *mod,
+ const struct load_info *info,
struct kernel_param *kparam,
unsigned int num_params)
{
@@ -1463,6 +1472,8 @@ static int mod_sysfs_setup(struct module *mod,
goto out_unreg_param;
add_usage_links(mod);
+ add_sect_attrs(mod, info);
+ add_notes_attrs(mod, info);
kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
return 0;
@@ -1479,33 +1490,26 @@ out:
static void mod_sysfs_fini(struct module *mod)
{
+ remove_notes_attrs(mod);
+ remove_sect_attrs(mod);
kobject_put(&mod->mkobj.kobj);
}
-#else /* CONFIG_SYSFS */
-
-static inline int mod_sysfs_init(struct module *mod)
-{
- return 0;
-}
+#else /* !CONFIG_SYSFS */
-static inline int mod_sysfs_setup(struct module *mod,
+static int mod_sysfs_setup(struct module *mod,
+ const struct load_info *info,
struct kernel_param *kparam,
unsigned int num_params)
{
return 0;
}
-static inline int module_add_modinfo_attrs(struct module *mod)
-{
- return 0;
-}
-
-static inline void module_remove_modinfo_attrs(struct module *mod)
+static void mod_sysfs_fini(struct module *mod)
{
}
-static void mod_sysfs_fini(struct module *mod)
+static void module_remove_modinfo_attrs(struct module *mod)
{
}
@@ -1515,7 +1519,7 @@ static void del_usage_links(struct module *mod)
#endif /* CONFIG_SYSFS */
-static void mod_kobject_remove(struct module *mod)
+static void mod_sysfs_teardown(struct module *mod)
{
del_usage_links(mod);
module_remove_modinfo_attrs(mod);
@@ -1545,9 +1549,7 @@ static void free_module(struct module *mod)
mutex_lock(&module_mutex);
stop_machine(__unlink_module, mod, NULL);
mutex_unlock(&module_mutex);
- remove_notes_attrs(mod);
- remove_sect_attrs(mod);
- mod_kobject_remove(mod);
+ mod_sysfs_teardown(mod);
/* Remove dynamic debug info */
ddebug_remove_module(mod->name);
@@ -1565,10 +1567,7 @@ static void free_module(struct module *mod)
module_free(mod, mod->module_init);
kfree(mod->args);
percpu_modfree(mod);
-#if defined(CONFIG_MODULE_UNLOAD)
- if (mod->refptr)
- free_percpu(mod->refptr);
-#endif
+
/* Free lock-classes: */
lockdep_free_key_range(mod->module_core, mod->core_size);
@@ -1634,25 +1633,23 @@ static int verify_export_symbols(struct module *mod)
}
/* Change all symbols so that st_value encodes the pointer directly. */
-static int simplify_symbols(Elf_Shdr *sechdrs,
- unsigned int symindex,
- const char *strtab,
- unsigned int versindex,
- unsigned int pcpuindex,
- struct module *mod)
-{
- Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
+static int simplify_symbols(struct module *mod, const struct load_info *info)
+{
+ Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
+ Elf_Sym *sym = (void *)symsec->sh_addr;
unsigned long secbase;
- unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
+ unsigned int i;
int ret = 0;
const struct kernel_symbol *ksym;
- for (i = 1; i < n; i++) {
+ for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
+ const char *name = info->strtab + sym[i].st_name;
+
switch (sym[i].st_shndx) {
case SHN_COMMON:
/* We compiled with -fno-common. These are not
supposed to happen. */
- DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name);
+ DEBUGP("Common symbol: %s\n", name);
printk("%s: please compile with -fno-common\n",
mod->name);
ret = -ENOEXEC;
@@ -1665,9 +1662,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
break;
case SHN_UNDEF:
- ksym = resolve_symbol_wait(sechdrs, versindex,
- strtab + sym[i].st_name,
- mod);
+ ksym = resolve_symbol_wait(mod, info, name);
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
sym[i].st_value = ksym->value;
@@ -1679,17 +1674,16 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
break;
printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
- mod->name, strtab + sym[i].st_name,
- PTR_ERR(ksym));
+ mod->name, name, PTR_ERR(ksym));
ret = PTR_ERR(ksym) ?: -ENOENT;
break;
default:
/* Divert to percpu allocation if a percpu var. */
- if (sym[i].st_shndx == pcpuindex)
+ if (sym[i].st_shndx == info->index.pcpu)
secbase = (unsigned long)mod_percpu(mod);
else
- secbase = sechdrs[sym[i].st_shndx].sh_addr;
+ secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
sym[i].st_value += secbase;
break;
}
@@ -1698,6 +1692,35 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
return ret;
}
+static int apply_relocations(struct module *mod, const struct load_info *info)
+{
+ unsigned int i;
+ int err = 0;
+
+ /* Now do relocations. */
+ for (i = 1; i < info->hdr->e_shnum; i++) {
+ unsigned int infosec = info->sechdrs[i].sh_info;
+
+ /* Not a valid relocation section? */
+ if (infosec >= info->hdr->e_shnum)
+ continue;
+
+ /* Don't bother with non-allocated sections */
+ if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
+ continue;
+
+ if (info->sechdrs[i].sh_type == SHT_REL)
+ err = apply_relocate(info->sechdrs, info->strtab,
+ info->index.sym, i, mod);
+ else if (info->sechdrs[i].sh_type == SHT_RELA)
+ err = apply_relocate_add(info->sechdrs, info->strtab,
+ info->index.sym, i, mod);
+ if (err < 0)
+ break;
+ }
+ return err;
+}
+
/* Additional bytes needed by arch in front of individual sections */
unsigned int __weak arch_mod_section_prepend(struct module *mod,
unsigned int section)
@@ -1722,10 +1745,7 @@ static long get_offset(struct module *mod, unsigned int *size,
might -- code, read-only data, read-write data, small data. Tally
sizes, and place the offsets into sh_entsize fields: high bit means it
belongs in init. */
-static void layout_sections(struct module *mod,
- const Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs,
- const char *secstrings)
+static void layout_sections(struct module *mod, struct load_info *info)
{
static unsigned long const masks[][2] = {
/* NOTE: all executable code must be the first section
@@ -1738,21 +1758,22 @@ static void layout_sections(struct module *mod,
};
unsigned int m, i;
- for (i = 0; i < hdr->e_shnum; i++)
- sechdrs[i].sh_entsize = ~0UL;
+ for (i = 0; i < info->hdr->e_shnum; i++)
+ info->sechdrs[i].sh_entsize = ~0UL;
DEBUGP("Core section allocation order:\n");
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
- for (i = 0; i < hdr->e_shnum; ++i) {
- Elf_Shdr *s = &sechdrs[i];
+ for (i = 0; i < info->hdr->e_shnum; ++i) {
+ Elf_Shdr *s = &info->sechdrs[i];
+ const char *sname = info->secstrings + s->sh_name;
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
- || strstarts(secstrings + s->sh_name, ".init"))
+ || strstarts(sname, ".init"))
continue;
s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
- DEBUGP("\t%s\n", secstrings + s->sh_name);
+ DEBUGP("\t%s\n", name);
}
if (m == 0)
mod->core_text_size = mod->core_size;
@@ -1760,17 +1781,18 @@ static void layout_sections(struct module *mod,
DEBUGP("Init section allocation order:\n");
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
- for (i = 0; i < hdr->e_shnum; ++i) {
- Elf_Shdr *s = &sechdrs[i];
+ for (i = 0; i < info->hdr->e_shnum; ++i) {
+ Elf_Shdr *s = &info->sechdrs[i];
+ const char *sname = info->secstrings + s->sh_name;
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
- || !strstarts(secstrings + s->sh_name, ".init"))
+ || !strstarts(sname, ".init"))
continue;
s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
| INIT_OFFSET_MASK);
- DEBUGP("\t%s\n", secstrings + s->sh_name);
+ DEBUGP("\t%s\n", sname);
}
if (m == 0)
mod->init_text_size = mod->init_size;
@@ -1809,33 +1831,28 @@ static char *next_string(char *string, unsigned long *secsize)
return string;
}
-static char *get_modinfo(Elf_Shdr *sechdrs,
- unsigned int info,
- const char *tag)
+static char *get_modinfo(struct load_info *info, const char *tag)
{
char *p;
unsigned int taglen = strlen(tag);
- unsigned long size = sechdrs[info].sh_size;
+ Elf_Shdr *infosec = &info->sechdrs[info->index.info];
+ unsigned long size = infosec->sh_size;
- for (p = (char *)sechdrs[info].sh_addr; p; p = next_string(p, &size)) {
+ for (p = (char *)infosec->sh_addr; p; p = next_string(p, &size)) {
if (strncmp(p, tag, taglen) == 0 && p[taglen] == '=')
return p + taglen + 1;
}
return NULL;
}
-static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
- unsigned int infoindex)
+static void setup_modinfo(struct module *mod, struct load_info *info)
{
struct module_attribute *attr;
int i;
for (i = 0; (attr = modinfo_attrs[i]); i++) {
if (attr->setup)
- attr->setup(mod,
- get_modinfo(sechdrs,
- infoindex,
- attr->attr.name));
+ attr->setup(mod, get_modinfo(info, attr->attr.name));
}
}
@@ -1876,11 +1893,10 @@ static int is_exported(const char *name, unsigned long value,
}
/* As per nm */
-static char elf_type(const Elf_Sym *sym,
- Elf_Shdr *sechdrs,
- const char *secstrings,
- struct module *mod)
+static char elf_type(const Elf_Sym *sym, const struct load_info *info)
{
+ const Elf_Shdr *sechdrs = info->sechdrs;
+
if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT)
return 'v';
@@ -1910,8 +1926,10 @@ static char elf_type(const Elf_Sym *sym,
else
return 'b';
}
- if (strstarts(secstrings + sechdrs[sym->st_shndx].sh_name, ".debug"))
+ if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name,
+ ".debug")) {
return 'n';
+ }
return '?';
}
@@ -1936,127 +1954,96 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
return true;
}
-static unsigned long layout_symtab(struct module *mod,
- Elf_Shdr *sechdrs,
- unsigned int symindex,
- unsigned int strindex,
- const Elf_Ehdr *hdr,
- const char *secstrings,
- unsigned long *pstroffs,
- unsigned long *strmap)
+static void layout_symtab(struct module *mod, struct load_info *info)
{
- unsigned long symoffs;
- Elf_Shdr *symsect = sechdrs + symindex;
- Elf_Shdr *strsect = sechdrs + strindex;
+ Elf_Shdr *symsect = info->sechdrs + info->index.sym;
+ Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
- const char *strtab;
unsigned int i, nsrc, ndst;
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
- symindex) | INIT_OFFSET_MASK;
- DEBUGP("\t%s\n", secstrings + symsect->sh_name);
+ info->index.sym) | INIT_OFFSET_MASK;
+ DEBUGP("\t%s\n", info->secstrings + symsect->sh_name);
- src = (void *)hdr + symsect->sh_offset;
+ src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
- strtab = (void *)hdr + strsect->sh_offset;
for (ndst = i = 1; i < nsrc; ++i, ++src)
- if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
+ if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
unsigned int j = src->st_name;
- while(!__test_and_set_bit(j, strmap) && strtab[j])
+ while (!__test_and_set_bit(j, info->strmap)
+ && info->strtab[j])
++j;
++ndst;
}
/* Append room for core symbols at end of core part. */
- symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
- mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
+ info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+ mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
- strindex) | INIT_OFFSET_MASK;
- DEBUGP("\t%s\n", secstrings + strsect->sh_name);
+ info->index.str) | INIT_OFFSET_MASK;
+ DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
/* Append room for core symbols' strings at end of core part. */
- *pstroffs = mod->core_size;
- __set_bit(0, strmap);
- mod->core_size += bitmap_weight(strmap, strsect->sh_size);
-
- return symoffs;
+ info->stroffs = mod->core_size;
+ __set_bit(0, info->strmap);
+ mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);
}
-static void add_kallsyms(struct module *mod,
- Elf_Shdr *sechdrs,
- unsigned int shnum,
- unsigned int symindex,
- unsigned int strindex,
- unsigned long symoffs,
- unsigned long stroffs,
- const char *secstrings,
- unsigned long *strmap)
+static void add_kallsyms(struct module *mod, const struct load_info *info)
{
unsigned int i, ndst;
const Elf_Sym *src;
Elf_Sym *dst;
char *s;
+ Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
- mod->symtab = (void *)sechdrs[symindex].sh_addr;
- mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
- mod->strtab = (void *)sechdrs[strindex].sh_addr;
+ mod->symtab = (void *)symsec->sh_addr;
+ mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+ /* Make sure we get permanent strtab: don't use info->strtab. */
+ mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
/* Set types up while we still have access to sections. */
for (i = 0; i < mod->num_symtab; i++)
- mod->symtab[i].st_info
- = elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
+ mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
- mod->core_symtab = dst = mod->module_core + symoffs;
+ mod->core_symtab = dst = mod->module_core + info->symoffs;
src = mod->symtab;
*dst = *src;
for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
- if (!is_core_symbol(src, sechdrs, shnum))
+ if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
continue;
dst[ndst] = *src;
- dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
+ dst[ndst].st_name = bitmap_weight(info->strmap,
+ dst[ndst].st_name);
++ndst;
}
mod->core_num_syms = ndst;
- mod->core_strtab = s = mod->module_core + stroffs;
- for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
- if (test_bit(i, strmap))
+ mod->core_strtab = s = mod->module_core + info->stroffs;
+ for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i)
+ if (test_bit(i, info->strmap))
*++s = mod->strtab[i];
}
#else
-static inline unsigned long layout_symtab(struct module *mod,
- Elf_Shdr *sechdrs,
- unsigned int symindex,
- unsigned int strindex,
- const Elf_Ehdr *hdr,
- const char *secstrings,
- unsigned long *pstroffs,
- unsigned long *strmap)
+static inline void layout_symtab(struct module *mod, struct load_info *info)
{
- return 0;
}
-static inline void add_kallsyms(struct module *mod,
- Elf_Shdr *sechdrs,
- unsigned int shnum,
- unsigned int symindex,
- unsigned int strindex,
- unsigned long symoffs,
- unsigned long stroffs,
- const char *secstrings,
- const unsigned long *strmap)
+static void add_kallsyms(struct module *mod, struct load_info *info)
{
}
#endif /* CONFIG_KALLSYMS */
static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
{
+ if (!debug)
+ return;
#ifdef CONFIG_DYNAMIC_DEBUG
if (ddebug_add_module(debug, num, debug->modname))
printk(KERN_ERR "dynamic debug error adding module: %s\n",
@@ -2087,65 +2074,47 @@ static void *module_alloc_update_bounds(unsigned long size)
}
#ifdef CONFIG_DEBUG_KMEMLEAK
-static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs, char *secstrings)
+static void kmemleak_load_module(const struct module *mod,
+ const struct load_info *info)
{
unsigned int i;
/* only scan the sections containing data */
kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
- for (i = 1; i < hdr->e_shnum; i++) {
- if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ for (i = 1; i < info->hdr->e_shnum; i++) {
+ const char *name = info->secstrings + info->sechdrs[i].sh_name;
+ if (!(info->sechdrs[i].sh_flags & SHF_ALLOC))
continue;
- if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0
- && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
+ if (!strstarts(name, ".data") && !strstarts(name, ".bss"))
continue;
- kmemleak_scan_area((void *)sechdrs[i].sh_addr,
- sechdrs[i].sh_size, GFP_KERNEL);
+ kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
+ info->sechdrs[i].sh_size, GFP_KERNEL);
}
}
#else
-static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
- Elf_Shdr *sechdrs, char *secstrings)
+static inline void kmemleak_load_module(const struct module *mod,
+ const struct load_info *info)
{
}
#endif
-/* Allocate and load the module: note that size of section 0 is always
- zero, and we rely on this for optional sections. */
-static noinline struct module *load_module(void __user *umod,
- unsigned long len,
- const char __user *uargs)
+/* Sets info->hdr and info->len. */
+static int copy_and_check(struct load_info *info,
+ const void __user *umod, unsigned long len,
+ const char __user *uargs)
{
+ int err;
Elf_Ehdr *hdr;
- Elf_Shdr *sechdrs;
- char *secstrings, *args, *modmagic, *strtab = NULL;
- char *staging;
- unsigned int i;
- unsigned int symindex = 0;
- unsigned int strindex = 0;
- unsigned int modindex, versindex, infoindex, pcpuindex;
- struct module *mod;
- long err = 0;
- void *ptr = NULL; /* Stops spurious gcc warning */
- unsigned long symoffs, stroffs, *strmap;
- void __percpu *percpu;
- struct _ddebug *debug = NULL;
- unsigned int num_debug = 0;
- mm_segment_t old_fs;
-
- DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
- umod, len, uargs);
if (len < sizeof(*hdr))
- return ERR_PTR(-ENOEXEC);
+ return -ENOEXEC;
/* Suck in entire file: we'll want most of it. */
/* vmalloc barfs on "unusual" numbers. Check here */
if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) {
err = -EFAULT;
@@ -2153,135 +2122,225 @@ static noinline struct module *load_module(void __user *umod,
}
/* Sanity checks against insmoding binaries or wrong arch,
- weird elf version */
+ weird elf version */
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
|| hdr->e_type != ET_REL
|| !elf_check_arch(hdr)
- || hdr->e_shentsize != sizeof(*sechdrs)) {
+ || hdr->e_shentsize != sizeof(Elf_Shdr)) {
err = -ENOEXEC;
goto free_hdr;
}
- if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr))
- goto truncated;
+ if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
+ err = -ENOEXEC;
+ goto free_hdr;
+ }
- /* Convenience variables */
- sechdrs = (void *)hdr + hdr->e_shoff;
- secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- sechdrs[0].sh_addr = 0;
+ info->hdr = hdr;
+ info->len = len;
+ return 0;
- for (i = 1; i < hdr->e_shnum; i++) {
- if (sechdrs[i].sh_type != SHT_NOBITS
- && len < sechdrs[i].sh_offset + sechdrs[i].sh_size)
- goto truncated;
+free_hdr:
+ vfree(hdr);
+ return err;
+}
+
+static void free_copy(struct load_info *info)
+{
+ vfree(info->hdr);
+}
+
+static int rewrite_section_headers(struct load_info *info)
+{
+ unsigned int i;
+
+ /* This should always be true, but let's be sure. */
+ info->sechdrs[0].sh_addr = 0;
+
+ for (i = 1; i < info->hdr->e_shnum; i++) {
+ Elf_Shdr *shdr = &info->sechdrs[i];
+ if (shdr->sh_type != SHT_NOBITS
+ && info->len < shdr->sh_offset + shdr->sh_size) {
+ printk(KERN_ERR "Module len %lu truncated\n",
+ info->len);
+ return -ENOEXEC;
+ }
/* Mark all sections sh_addr with their address in the
temporary image. */
- sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset;
+ shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
- /* Internal symbols and strings. */
- if (sechdrs[i].sh_type == SHT_SYMTAB) {
- symindex = i;
- strindex = sechdrs[i].sh_link;
- strtab = (char *)hdr + sechdrs[strindex].sh_offset;
- }
#ifndef CONFIG_MODULE_UNLOAD
/* Don't load .exit sections */
- if (strstarts(secstrings+sechdrs[i].sh_name, ".exit"))
- sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
+ if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
+ shdr->sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
}
- modindex = find_sec(hdr, sechdrs, secstrings,
- ".gnu.linkonce.this_module");
- if (!modindex) {
+ /* Track but don't keep modinfo and version sections. */
+ info->index.vers = find_sec(info, "__versions");
+ info->index.info = find_sec(info, ".modinfo");
+ info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
+ info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+ return 0;
+}
+
+/*
+ * Set up our basic convenience variables (pointers to section headers,
+ * search for module section index etc), and do some basic section
+ * verification.
+ *
+ * Return the temporary module pointer (we'll replace it with the final
+ * one when we move the module sections around).
+ */
+static struct module *setup_load_info(struct load_info *info)
+{
+ unsigned int i;
+ int err;
+ struct module *mod;
+
+ /* Set up the convenience variables */
+ info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
+ info->secstrings = (void *)info->hdr
+ + info->sechdrs[info->hdr->e_shstrndx].sh_offset;
+
+ err = rewrite_section_headers(info);
+ if (err)
+ return ERR_PTR(err);
+
+ /* Find internal symbols and strings. */
+ for (i = 1; i < info->hdr->e_shnum; i++) {
+ if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
+ info->index.sym = i;
+ info->index.str = info->sechdrs[i].sh_link;
+ info->strtab = (char *)info->hdr
+ + info->sechdrs[info->index.str].sh_offset;
+ break;
+ }
+ }
+
+ info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
+ if (!info->index.mod) {
printk(KERN_WARNING "No module found in object\n");
- err = -ENOEXEC;
- goto free_hdr;
+ return ERR_PTR(-ENOEXEC);
}
/* This is temporary: point mod into copy of data. */
- mod = (void *)sechdrs[modindex].sh_addr;
+ mod = (void *)info->sechdrs[info->index.mod].sh_addr;
- if (symindex == 0) {
+ if (info->index.sym == 0) {
printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
mod->name);
- err = -ENOEXEC;
- goto free_hdr;
+ return ERR_PTR(-ENOEXEC);
}
- versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
- infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
- pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
-
- /* Don't keep modinfo and version sections. */
- sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
- sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
+ info->index.pcpu = find_pcpusec(info);
/* Check module struct version now, before we try to use module. */
- if (!check_modstruct_version(sechdrs, versindex, mod)) {
- err = -ENOEXEC;
- goto free_hdr;
- }
+ if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
+ return ERR_PTR(-ENOEXEC);
+
+ return mod;
+}
+
+static int check_modinfo(struct module *mod, struct load_info *info)
+{
+ const char *modmagic = get_modinfo(info, "vermagic");
+ int err;
- modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
/* This is allowed: modprobe --force will invalidate it. */
if (!modmagic) {
err = try_to_force_load(mod, "bad vermagic");
if (err)
- goto free_hdr;
- } else if (!same_magic(modmagic, vermagic, versindex)) {
+ return err;
+ } else if (!same_magic(modmagic, vermagic, info->index.vers)) {
printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
mod->name, modmagic, vermagic);
- err = -ENOEXEC;
- goto free_hdr;
+ return -ENOEXEC;
}
- staging = get_modinfo(sechdrs, infoindex, "staging");
- if (staging) {
+ if (get_modinfo(info, "staging")) {
add_taint_module(mod, TAINT_CRAP);
printk(KERN_WARNING "%s: module is from the staging directory,"
" the quality is unknown, you have been warned.\n",
mod->name);
}
- /* Now copy in args */
- args = strndup_user(uargs, ~0UL >> 1);
- if (IS_ERR(args)) {
- err = PTR_ERR(args);
- goto free_hdr;
- }
+ /* Set up license info based on the info section */
+ set_license(mod, get_modinfo(info, "license"));
- strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
- * sizeof(long), GFP_KERNEL);
- if (!strmap) {
- err = -ENOMEM;
- goto free_mod;
- }
+ return 0;
+}
- mod->state = MODULE_STATE_COMING;
+static void find_module_sections(struct module *mod, struct load_info *info)
+{
+ mod->kp = section_objs(info, "__param",
+ sizeof(*mod->kp), &mod->num_kp);
+ mod->syms = section_objs(info, "__ksymtab",
+ sizeof(*mod->syms), &mod->num_syms);
+ mod->crcs = section_addr(info, "__kcrctab");
+ mod->gpl_syms = section_objs(info, "__ksymtab_gpl",
+ sizeof(*mod->gpl_syms),
+ &mod->num_gpl_syms);
+ mod->gpl_crcs = section_addr(info, "__kcrctab_gpl");
+ mod->gpl_future_syms = section_objs(info,
+ "__ksymtab_gpl_future",
+ sizeof(*mod->gpl_future_syms),
+ &mod->num_gpl_future_syms);
+ mod->gpl_future_crcs = section_addr(info, "__kcrctab_gpl_future");
- /* Allow arches to frob section contents and sizes. */
- err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod);
- if (err < 0)
- goto free_mod;
+#ifdef CONFIG_UNUSED_SYMBOLS
+ mod->unused_syms = section_objs(info, "__ksymtab_unused",
+ sizeof(*mod->unused_syms),
+ &mod->num_unused_syms);
+ mod->unused_crcs = section_addr(info, "__kcrctab_unused");
+ mod->unused_gpl_syms = section_objs(info, "__ksymtab_unused_gpl",
+ sizeof(*mod->unused_gpl_syms),
+ &mod->num_unused_gpl_syms);
+ mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl");
+#endif
+#ifdef CONFIG_CONSTRUCTORS
+ mod->ctors = section_objs(info, ".ctors",
+ sizeof(*mod->ctors), &mod->num_ctors);
+#endif
- if (pcpuindex) {
- /* We have a special allocation for this section. */
- err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size,
- sechdrs[pcpuindex].sh_addralign);
- if (err)
- goto free_mod;
- sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
- }
- /* Keep this around for failure path. */
- percpu = mod_percpu(mod);
+#ifdef CONFIG_TRACEPOINTS
+ mod->tracepoints = section_objs(info, "__tracepoints",
+ sizeof(*mod->tracepoints),
+ &mod->num_tracepoints);
+#endif
+#ifdef CONFIG_EVENT_TRACING
+ mod->trace_events = section_objs(info, "_ftrace_events",
+ sizeof(*mod->trace_events),
+ &mod->num_trace_events);
+ /*
+ * This section contains pointers to allocated objects in the trace
+ * code and not scanning it leads to false positives.
+ */
+ kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
+ mod->num_trace_events, GFP_KERNEL);
+#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ /* sechdrs[0].sh_size is always zero */
+ mod->ftrace_callsites = section_objs(info, "__mcount_loc",
+ sizeof(*mod->ftrace_callsites),
+ &mod->num_ftrace_callsites);
+#endif
- /* Determine total sizes, and put offsets in sh_entsize. For now
- this is done generically; there doesn't appear to be any
- special cases for the architectures. */
- layout_sections(mod, hdr, sechdrs, secstrings);
- symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
- secstrings, &stroffs, strmap);
+ mod->extable = section_objs(info, "__ex_table",
+ sizeof(*mod->extable), &mod->num_exentries);
+
+ if (section_addr(info, "__obsparm"))
+ printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
+ mod->name);
+
+ info->debug = section_objs(info, "__verbose",
+ sizeof(*info->debug), &info->num_debug);
+}
+
+static int move_module(struct module *mod, struct load_info *info)
+{
+ int i;
+ void *ptr;
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
@@ -2291,10 +2350,9 @@ static noinline struct module *load_module(void __user *umod,
* leak.
*/
kmemleak_not_leak(ptr);
- if (!ptr) {
- err = -ENOMEM;
- goto free_percpu;
- }
+ if (!ptr)
+ return -ENOMEM;
+
memset(ptr, 0, mod->core_size);
mod->module_core = ptr;
@@ -2307,50 +2365,40 @@ static noinline struct module *load_module(void __user *umod,
*/
kmemleak_ignore(ptr);
if (!ptr && mod->init_size) {
- err = -ENOMEM;
- goto free_core;
+ module_free(mod, mod->module_core);
+ return -ENOMEM;
}
memset(ptr, 0, mod->init_size);
mod->module_init = ptr;
/* Transfer each section which specifies SHF_ALLOC */
DEBUGP("final section addresses:\n");
- for (i = 0; i < hdr->e_shnum; i++) {
+ for (i = 0; i < info->hdr->e_shnum; i++) {
void *dest;
+ Elf_Shdr *shdr = &info->sechdrs[i];
- if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ if (!(shdr->sh_flags & SHF_ALLOC))
continue;
- if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK)
+ if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init
- + (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK);
+ + (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
- dest = mod->module_core + sechdrs[i].sh_entsize;
+ dest = mod->module_core + shdr->sh_entsize;
- if (sechdrs[i].sh_type != SHT_NOBITS)
- memcpy(dest, (void *)sechdrs[i].sh_addr,
- sechdrs[i].sh_size);
+ if (shdr->sh_type != SHT_NOBITS)
+ memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
/* Update sh_addr to point to copy in image. */
- sechdrs[i].sh_addr = (unsigned long)dest;
- DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name);
- }
- /* Module has been moved. */
- mod = (void *)sechdrs[modindex].sh_addr;
- kmemleak_load_module(mod, hdr, sechdrs, secstrings);
-
-#if defined(CONFIG_MODULE_UNLOAD)
- mod->refptr = alloc_percpu(struct module_ref);
- if (!mod->refptr) {
- err = -ENOMEM;
- goto free_init;
+ shdr->sh_addr = (unsigned long)dest;
+ DEBUGP("\t0x%lx %s\n",
+ shdr->sh_addr, info->secstrings + shdr->sh_name);
}
-#endif
- /* Now we've moved module, initialize linked lists, etc. */
- module_unload_init(mod);
- /* Set up license info based on the info section */
- set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
+ return 0;
+}
+static int check_module_license_and_versions(struct module *mod)
+{
/*
* ndiswrapper is under GPL by itself, but loads proprietary modules.
* Don't use add_taint_module(), as it would prevent ndiswrapper from
@@ -2363,77 +2411,6 @@ static noinline struct module *load_module(void __user *umod,
if (strcmp(mod->name, "driverloader") == 0)
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
- /* Set up MODINFO_ATTR fields */
- setup_modinfo(mod, sechdrs, infoindex);
-
- /* Fix up syms, so that st_value is a pointer to location. */
- err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
- mod);
- if (err < 0)
- goto cleanup;
-
- /* Now we've got everything in the final locations, we can
- * find optional sections. */
- mod->kp = section_objs(hdr, sechdrs, secstrings, "__param",
- sizeof(*mod->kp), &mod->num_kp);
- mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
- sizeof(*mod->syms), &mod->num_syms);
- mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
- mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
- sizeof(*mod->gpl_syms),
- &mod->num_gpl_syms);
- mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
- mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
- "__ksymtab_gpl_future",
- sizeof(*mod->gpl_future_syms),
- &mod->num_gpl_future_syms);
- mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
- "__kcrctab_gpl_future");
-
-#ifdef CONFIG_UNUSED_SYMBOLS
- mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
- "__ksymtab_unused",
- sizeof(*mod->unused_syms),
- &mod->num_unused_syms);
- mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
- "__kcrctab_unused");
- mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
- "__ksymtab_unused_gpl",
- sizeof(*mod->unused_gpl_syms),
- &mod->num_unused_gpl_syms);
- mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
- "__kcrctab_unused_gpl");
-#endif
-#ifdef CONFIG_CONSTRUCTORS
- mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors",
- sizeof(*mod->ctors), &mod->num_ctors);
-#endif
-
-#ifdef CONFIG_TRACEPOINTS
- mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
- "__tracepoints",
- sizeof(*mod->tracepoints),
- &mod->num_tracepoints);
-#endif
-#ifdef CONFIG_EVENT_TRACING
- mod->trace_events = section_objs(hdr, sechdrs, secstrings,
- "_ftrace_events",
- sizeof(*mod->trace_events),
- &mod->num_trace_events);
- /*
- * This section contains pointers to allocated objects in the trace
- * code and not scanning it leads to false positives.
- */
- kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
- mod->num_trace_events, GFP_KERNEL);
-#endif
-#ifdef CONFIG_FTRACE_MCOUNT_RECORD
- /* sechdrs[0].sh_size is always zero */
- mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
- "__mcount_loc",
- sizeof(*mod->ftrace_callsites),
- &mod->num_ftrace_callsites);
-#endif
#ifdef CONFIG_MODVERSIONS
if ((mod->num_syms && !mod->crcs)
|| (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2443,56 +2420,16 @@ static noinline struct module *load_module(void __user *umod,
|| (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
#endif
) {
- err = try_to_force_load(mod,
- "no versions for exported symbols");
- if (err)
- goto cleanup;
+ return try_to_force_load(mod,
+ "no versions for exported symbols");
}
#endif
+ return 0;
+}
- /* Now do relocations. */
- for (i = 1; i < hdr->e_shnum; i++) {
- const char *strtab = (char *)sechdrs[strindex].sh_addr;
- unsigned int info = sechdrs[i].sh_info;
-
- /* Not a valid relocation section? */
- if (info >= hdr->e_shnum)
- continue;
-
- /* Don't bother with non-allocated sections */
- if (!(sechdrs[info].sh_flags & SHF_ALLOC))
- continue;
-
- if (sechdrs[i].sh_type == SHT_REL)
- err = apply_relocate(sechdrs, strtab, symindex, i,mod);
- else if (sechdrs[i].sh_type == SHT_RELA)
- err = apply_relocate_add(sechdrs, strtab, symindex, i,
- mod);
- if (err < 0)
- goto cleanup;
- }
-
- /* Set up and sort exception table */
- mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
- sizeof(*mod->extable), &mod->num_exentries);
- sort_extable(mod->extable, mod->extable + mod->num_exentries);
-
- /* Finally, copy percpu area over. */
- percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr,
- sechdrs[pcpuindex].sh_size);
-
- add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
- symoffs, stroffs, secstrings, strmap);
- kfree(strmap);
- strmap = NULL;
-
- if (!mod->taints)
- debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
- sizeof(*debug), &num_debug);
-
- err = module_finalize(hdr, sechdrs, mod);
- if (err < 0)
- goto cleanup;
+static void flush_module_icache(const struct module *mod)
+{
+ mm_segment_t old_fs;
/* flush the icache in correct context */
old_fs = get_fs();
@@ -2511,11 +2448,160 @@ static noinline struct module *load_module(void __user *umod,
(unsigned long)mod->module_core + mod->core_size);
set_fs(old_fs);
+}
- mod->args = args;
- if (section_addr(hdr, sechdrs, secstrings, "__obsparm"))
- printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
- mod->name);
+static struct module *layout_and_allocate(struct load_info *info)
+{
+ /* Module within temporary copy. */
+ struct module *mod;
+ Elf_Shdr *pcpusec;
+ int err;
+
+ mod = setup_load_info(info);
+ if (IS_ERR(mod))
+ return mod;
+
+ err = check_modinfo(mod, info);
+ if (err)
+ return ERR_PTR(err);
+
+ /* Allow arches to frob section contents and sizes. */
+ err = module_frob_arch_sections(info->hdr, info->sechdrs,
+ info->secstrings, mod);
+ if (err < 0)
+ goto out;
+
+ pcpusec = &info->sechdrs[info->index.pcpu];
+ if (pcpusec->sh_size) {
+ /* We have a special allocation for this section. */
+ err = percpu_modalloc(mod,
+ pcpusec->sh_size, pcpusec->sh_addralign);
+ if (err)
+ goto out;
+ pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
+ }
+
+ /* Determine total sizes, and put offsets in sh_entsize. For now
+ this is done generically; there doesn't appear to be any
+ special cases for the architectures. */
+ layout_sections(mod, info);
+
+ info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)
+ * sizeof(long), GFP_KERNEL);
+ if (!info->strmap) {
+ err = -ENOMEM;
+ goto free_percpu;
+ }
+ layout_symtab(mod, info);
+
+ /* Allocate and move to the final place */
+ err = move_module(mod, info);
+ if (err)
+ goto free_strmap;
+
+ /* Module has been copied to its final place now: return it. */
+ mod = (void *)info->sechdrs[info->index.mod].sh_addr;
+ kmemleak_load_module(mod, info);
+ return mod;
+
+free_strmap:
+ kfree(info->strmap);
+free_percpu:
+ percpu_modfree(mod);
+out:
+ return ERR_PTR(err);
+}
+
+/* mod is no longer valid after this! */
+static void module_deallocate(struct module *mod, struct load_info *info)
+{
+ kfree(info->strmap);
+ percpu_modfree(mod);
+ module_free(mod, mod->module_init);
+ module_free(mod, mod->module_core);
+}
+
+static int post_relocation(struct module *mod, const struct load_info *info)
+{
+ /* Sort exception table now relocations are done. */
+ sort_extable(mod->extable, mod->extable + mod->num_exentries);
+
+ /* Copy relocated percpu area over. */
+ percpu_modcopy(mod, (void *)info->sechdrs[info->index.pcpu].sh_addr,
+ info->sechdrs[info->index.pcpu].sh_size);
+
+ /* Setup kallsyms-specific fields. */
+ add_kallsyms(mod, info);
+
+ /* Arch-specific module finalizing. */
+ return module_finalize(info->hdr, info->sechdrs, mod);
+}
+
+/* Allocate and load the module: note that size of section 0 is always
+ zero, and we rely on this for optional sections. */
+static struct module *load_module(void __user *umod,
+ unsigned long len,
+ const char __user *uargs)
+{
+ struct load_info info = { NULL, };
+ struct module *mod;
+ long err;
+
+ DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
+ umod, len, uargs);
+
+ /* Copy in the blobs from userspace, check they are vaguely sane. */
+ err = copy_and_check(&info, umod, len, uargs);
+ if (err)
+ return ERR_PTR(err);
+
+ /* Figure out module layout, and allocate all the memory. */
+ mod = layout_and_allocate(&info);
+ if (IS_ERR(mod)) {
+ err = PTR_ERR(mod);
+ goto free_copy;
+ }
+
+ /* Now module is in final location, initialize linked lists, etc. */
+ err = module_unload_init(mod);
+ if (err)
+ goto free_module;
+
+ /* Now we've got everything in the final locations, we can
+ * find optional sections. */
+ find_module_sections(mod, &info);
+
+ err = check_module_license_and_versions(mod);
+ if (err)
+ goto free_unload;
+
+ /* Set up MODINFO_ATTR fields */
+ setup_modinfo(mod, &info);
+
+ /* Fix up syms, so that st_value is a pointer to location. */
+ err = simplify_symbols(mod, &info);
+ if (err < 0)
+ goto free_modinfo;
+
+ err = apply_relocations(mod, &info);
+ if (err < 0)
+ goto free_modinfo;
+
+ err = post_relocation(mod, &info);
+ if (err < 0)
+ goto free_modinfo;
+
+ flush_module_icache(mod);
+
+ /* Now copy in args */
+ mod->args = strndup_user(uargs, ~0UL >> 1);
+ if (IS_ERR(mod->args)) {
+ err = PTR_ERR(mod->args);
+ goto free_arch_cleanup;
+ }
+
+ /* Mark state as coming so strong_try_module_get() ignores us. */
+ mod->state = MODULE_STATE_COMING;
/* Now sew it into the lists so we can get lockdep and oops
* info during argument parsing. Noone should access us, since
@@ -2530,8 +2616,9 @@ static noinline struct module *load_module(void __user *umod,
goto unlock;
}
- if (debug)
- dynamic_debug_setup(debug, num_debug);
+ /* This has to be done once we're sure module name is unique. */
+ if (!mod->taints)
+ dynamic_debug_setup(info.debug, info.num_debug);
/* Find duplicate symbols */
err = verify_export_symbols(mod);
@@ -2541,23 +2628,22 @@ static noinline struct module *load_module(void __user *umod,
list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex);
+ /* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
if (err < 0)
goto unlink;
- err = mod_sysfs_setup(mod, mod->kp, mod->num_kp);
+ /* Link in to syfs. */
+ err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
if (err < 0)
goto unlink;
- add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
- add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
-
- /* Get rid of temporary copy */
- vfree(hdr);
-
- trace_module_load(mod);
+ /* Get rid of temporary copy and strmap. */
+ kfree(info.strmap);
+ free_copy(&info);
/* Done! */
+ trace_module_load(mod);
return mod;
unlink:
@@ -2565,35 +2651,23 @@ static noinline struct module *load_module(void __user *umod,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
ddebug:
- dynamic_debug_remove(debug);
+ if (!mod->taints)
+ dynamic_debug_remove(info.debug);
unlock:
mutex_unlock(&module_mutex);
synchronize_sched();
+ kfree(mod->args);
+ free_arch_cleanup:
module_arch_cleanup(mod);
- cleanup:
+ free_modinfo:
free_modinfo(mod);
+ free_unload:
module_unload_free(mod);
-#if defined(CONFIG_MODULE_UNLOAD)
- free_percpu(mod->refptr);
- free_init:
-#endif
- module_free(mod, mod->module_init);
- free_core:
- module_free(mod, mod->module_core);
- /* mod will be freed with core. Don't access it beyond this line! */
- free_percpu:
- free_percpu(percpu);
- free_mod:
- kfree(args);
- kfree(strmap);
- free_hdr:
- vfree(hdr);
+ free_module:
+ module_deallocate(mod, &info);
+ free_copy:
+ free_copy(&info);
return ERR_PTR(err);
-
- truncated:
- printk(KERN_ERR "Module len %lu truncated\n", len);
- err = -ENOEXEC;
- goto free_hdr;
}
/* Call module constructors. */
diff --git a/kernel/padata.c b/kernel/padata.c
index fdd8ae6..7510194 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -26,18 +26,19 @@
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/sysfs.h>
#include <linux/rcupdate.h>
-#define MAX_SEQ_NR INT_MAX - NR_CPUS
+#define MAX_SEQ_NR (INT_MAX - NR_CPUS)
#define MAX_OBJ_NUM 1000
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
int cpu, target_cpu;
- target_cpu = cpumask_first(pd->cpumask);
+ target_cpu = cpumask_first(pd->cpumask.pcpu);
for (cpu = 0; cpu < cpu_index; cpu++)
- target_cpu = cpumask_next(target_cpu, pd->cpumask);
+ target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
return target_cpu;
}
@@ -53,26 +54,27 @@ static int padata_cpu_hash(struct padata_priv *padata)
* Hash the sequence numbers to the cpus by taking
* seq_nr mod. number of cpus in use.
*/
- cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask);
+ cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask.pcpu);
return padata_index_to_cpu(pd, cpu_index);
}
-static void padata_parallel_worker(struct work_struct *work)
+static void padata_parallel_worker(struct work_struct *parallel_work)
{
- struct padata_queue *queue;
+ struct padata_parallel_queue *pqueue;
struct parallel_data *pd;
struct padata_instance *pinst;
LIST_HEAD(local_list);
local_bh_disable();
- queue = container_of(work, struct padata_queue, pwork);
- pd = queue->pd;
+ pqueue = container_of(parallel_work,
+ struct padata_parallel_queue, work);
+ pd = pqueue->pd;
pinst = pd->pinst;
- spin_lock(&queue->parallel.lock);
- list_replace_init(&queue->parallel.list, &local_list);
- spin_unlock(&queue->parallel.lock);
+ spin_lock(&pqueue->parallel.lock);
+ list_replace_init(&pqueue->parallel.list, &local_list);
+ spin_unlock(&pqueue->parallel.lock);
while (!list_empty(&local_list)) {
struct padata_priv *padata;
@@ -94,7 +96,7 @@ static void padata_parallel_worker(struct work_struct *work)
* @pinst: padata instance
* @padata: object to be parallelized
* @cb_cpu: cpu the serialization callback function will run on,
- * must be in the cpumask of padata.
+ * must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
*
* The parallelization callback function will run with BHs off.
* Note: Every object which is parallelized by padata_do_parallel
@@ -104,15 +106,18 @@ int padata_do_parallel(struct padata_instance *pinst,
struct padata_priv *padata, int cb_cpu)
{
int target_cpu, err;
- struct padata_queue *queue;
+ struct padata_parallel_queue *queue;
struct parallel_data *pd;
rcu_read_lock_bh();
pd = rcu_dereference(pinst->pd);
- err = 0;
- if (!(pinst->flags & PADATA_INIT))
+ err = -EINVAL;
+ if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
+ goto out;
+
+ if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
goto out;
err = -EBUSY;
@@ -122,11 +127,7 @@ int padata_do_parallel(struct padata_instance *pinst,
if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
goto out;
- err = -EINVAL;
- if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
- goto out;
-
- err = -EINPROGRESS;
+ err = 0;
atomic_inc(&pd->refcnt);
padata->pd = pd;
padata->cb_cpu = cb_cpu;
@@ -137,13 +138,13 @@ int padata_do_parallel(struct padata_instance *pinst,
padata->seq_nr = atomic_inc_return(&pd->seq_nr);
target_cpu = padata_cpu_hash(padata);
- queue = per_cpu_ptr(pd->queue, target_cpu);
+ queue = per_cpu_ptr(pd->pqueue, target_cpu);
spin_lock(&queue->parallel.lock);
list_add_tail(&padata->list, &queue->parallel.list);
spin_unlock(&queue->parallel.lock);
- queue_work_on(target_cpu, pinst->wq, &queue->pwork);
+ queue_work_on(target_cpu, pinst->wq, &queue->work);
out:
rcu_read_unlock_bh();
@@ -171,84 +172,52 @@ EXPORT_SYMBOL(padata_do_parallel);
*/
static struct padata_priv *padata_get_next(struct parallel_data *pd)
{
- int cpu, num_cpus, empty, calc_seq_nr;
- int seq_nr, next_nr, overrun, next_overrun;
- struct padata_queue *queue, *next_queue;
+ int cpu, num_cpus;
+ int next_nr, next_index;
+ struct padata_parallel_queue *queue, *next_queue;
struct padata_priv *padata;
struct padata_list *reorder;
- empty = 0;
- next_nr = -1;
- next_overrun = 0;
- next_queue = NULL;
-
- num_cpus = cpumask_weight(pd->cpumask);
-
- for_each_cpu(cpu, pd->cpumask) {
- queue = per_cpu_ptr(pd->queue, cpu);
- reorder = &queue->reorder;
-
- /*
- * Calculate the seq_nr of the object that should be
- * next in this reorder queue.
- */
- overrun = 0;
- calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
- + queue->cpu_index;
+ num_cpus = cpumask_weight(pd->cpumask.pcpu);
- if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
- calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
- overrun = 1;
- }
-
- if (!list_empty(&reorder->list)) {
- padata = list_entry(reorder->list.next,
- struct padata_priv, list);
-
- seq_nr = padata->seq_nr;
- BUG_ON(calc_seq_nr != seq_nr);
- } else {
- seq_nr = calc_seq_nr;
- empty++;
- }
-
- if (next_nr < 0 || seq_nr < next_nr
- || (next_overrun && !overrun)) {
- next_nr = seq_nr;
- next_overrun = overrun;
- next_queue = queue;
- }
+ /*
+ * Calculate the percpu reorder queue and the sequence
+ * number of the next object.
+ */
+ next_nr = pd->processed;
+ next_index = next_nr % num_cpus;
+ cpu = padata_index_to_cpu(pd, next_index);
+ next_queue = per_cpu_ptr(pd->pqueue, cpu);
+
+ if (unlikely(next_nr > pd->max_seq_nr)) {
+ next_nr = next_nr - pd->max_seq_nr - 1;
+ next_index = next_nr % num_cpus;
+ cpu = padata_index_to_cpu(pd, next_index);
+ next_queue = per_cpu_ptr(pd->pqueue, cpu);
+ pd->processed = 0;
}
padata = NULL;
- if (empty == num_cpus)
- goto out;
-
reorder = &next_queue->reorder;
if (!list_empty(&reorder->list)) {
padata = list_entry(reorder->list.next,
struct padata_priv, list);
- if (unlikely(next_overrun)) {
- for_each_cpu(cpu, pd->cpumask) {
- queue = per_cpu_ptr(pd->queue, cpu);
- atomic_set(&queue->num_obj, 0);
- }
- }
+ BUG_ON(next_nr != padata->seq_nr);
spin_lock(&reorder->lock);
list_del_init(&padata->list);
atomic_dec(&pd->reorder_objects);
spin_unlock(&reorder->lock);
- atomic_inc(&next_queue->num_obj);
+ pd->processed++;
goto out;
}
- queue = per_cpu_ptr(pd->queue, smp_processor_id());
+ queue = per_cpu_ptr(pd->pqueue, smp_processor_id());
if (queue->cpu_index == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
goto out;
@@ -262,7 +231,7 @@ out:
static void padata_reorder(struct parallel_data *pd)
{
struct padata_priv *padata;
- struct padata_queue *queue;
+ struct padata_serial_queue *squeue;
struct padata_instance *pinst = pd->pinst;
/*
@@ -301,13 +270,13 @@ static void padata_reorder(struct parallel_data *pd)
return;
}
- queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
+ squeue = per_cpu_ptr(pd->squeue, padata->cb_cpu);
- spin_lock(&queue->serial.lock);
- list_add_tail(&padata->list, &queue->serial.list);
- spin_unlock(&queue->serial.lock);
+ spin_lock(&squeue->serial.lock);
+ list_add_tail(&padata->list, &squeue->serial.list);
+ spin_unlock(&squeue->serial.lock);
- queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
+ queue_work_on(padata->cb_cpu, pinst->wq, &squeue->work);
}
spin_unlock_bh(&pd->lock);
@@ -333,19 +302,19 @@ static void padata_reorder_timer(unsigned long arg)
padata_reorder(pd);
}
-static void padata_serial_worker(struct work_struct *work)
+static void padata_serial_worker(struct work_struct *serial_work)
{
- struct padata_queue *queue;
+ struct padata_serial_queue *squeue;
struct parallel_data *pd;
LIST_HEAD(local_list);
local_bh_disable();
- queue = container_of(work, struct padata_queue, swork);
- pd = queue->pd;
+ squeue = container_of(serial_work, struct padata_serial_queue, work);
+ pd = squeue->pd;
- spin_lock(&queue->serial.lock);
- list_replace_init(&queue->serial.list, &local_list);
- spin_unlock(&queue->serial.lock);
+ spin_lock(&squeue->serial.lock);
+ list_replace_init(&squeue->serial.list, &local_list);
+ spin_unlock(&squeue->serial.lock);
while (!list_empty(&local_list)) {
struct padata_priv *padata;
@@ -372,18 +341,18 @@ static void padata_serial_worker(struct work_struct *work)
void padata_do_serial(struct padata_priv *padata)
{
int cpu;
- struct padata_queue *queue;
+ struct padata_parallel_queue *pqueue;
struct parallel_data *pd;
pd = padata->pd;
cpu = get_cpu();
- queue = per_cpu_ptr(pd->queue, cpu);
+ pqueue = per_cpu_ptr(pd->pqueue, cpu);
- spin_lock(&queue->reorder.lock);
+ spin_lock(&pqueue->reorder.lock);
atomic_inc(&pd->reorder_objects);
- list_add_tail(&padata->list, &queue->reorder.list);
- spin_unlock(&queue->reorder.lock);
+ list_add_tail(&padata->list, &pqueue->reorder.list);
+ spin_unlock(&pqueue->reorder.lock);
put_cpu();
@@ -391,52 +360,89 @@ void padata_do_serial(struct padata_priv *padata)
}
EXPORT_SYMBOL(padata_do_serial);
-/* Allocate and initialize the internal cpumask dependend resources. */
-static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
- const struct cpumask *cpumask)
+static int padata_setup_cpumasks(struct parallel_data *pd,
+ const struct cpumask *pcpumask,
+ const struct cpumask *cbcpumask)
{
- int cpu, cpu_index, num_cpus;
- struct padata_queue *queue;
- struct parallel_data *pd;
-
- cpu_index = 0;
+ if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
+ return -ENOMEM;
- pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
- if (!pd)
- goto err;
+ cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask);
+ if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
+ free_cpumask_var(pd->cpumask.cbcpu);
+ return -ENOMEM;
+ }
- pd->queue = alloc_percpu(struct padata_queue);
- if (!pd->queue)
- goto err_free_pd;
+ cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask);
+ return 0;
+}
- if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
- goto err_free_queue;
+static void __padata_list_init(struct padata_list *pd_list)
+{
+ INIT_LIST_HEAD(&pd_list->list);
+ spin_lock_init(&pd_list->lock);
+}
- cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+/* Initialize all percpu queues used by serial workers */
+static void padata_init_squeues(struct parallel_data *pd)
+{
+ int cpu;
+ struct padata_serial_queue *squeue;
- for_each_cpu(cpu, pd->cpumask) {
- queue = per_cpu_ptr(pd->queue, cpu);
+ for_each_cpu(cpu, pd->cpumask.cbcpu) {
+ squeue = per_cpu_ptr(pd->squeue, cpu);
+ squeue->pd = pd;
+ __padata_list_init(&squeue->serial);
+ INIT_WORK(&squeue->work, padata_serial_worker);
+ }
+}
- queue->pd = pd;
+/* Initialize all percpu queues used by parallel workers */
+static void padata_init_pqueues(struct parallel_data *pd)
+{
+ int cpu_index, num_cpus, cpu;
+ struct padata_parallel_queue *pqueue;
- queue->cpu_index = cpu_index;
+ cpu_index = 0;
+ for_each_cpu(cpu, pd->cpumask.pcpu) {
+ pqueue = per_cpu_ptr(pd->pqueue, cpu);
+ pqueue->pd = pd;
+ pqueue->cpu_index = cpu_index;
cpu_index++;
- INIT_LIST_HEAD(&queue->reorder.list);
- INIT_LIST_HEAD(&queue->parallel.list);
- INIT_LIST_HEAD(&queue->serial.list);
- spin_lock_init(&queue->reorder.lock);
- spin_lock_init(&queue->parallel.lock);
- spin_lock_init(&queue->serial.lock);
-
- INIT_WORK(&queue->pwork, padata_parallel_worker);
- INIT_WORK(&queue->swork, padata_serial_worker);
- atomic_set(&queue->num_obj, 0);
+ __padata_list_init(&pqueue->reorder);
+ __padata_list_init(&pqueue->parallel);
+ INIT_WORK(&pqueue->work, padata_parallel_worker);
+ atomic_set(&pqueue->num_obj, 0);
}
- num_cpus = cpumask_weight(pd->cpumask);
- pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+ num_cpus = cpumask_weight(pd->cpumask.pcpu);
+ pd->max_seq_nr = num_cpus ? (MAX_SEQ_NR / num_cpus) * num_cpus - 1 : 0;
+}
+
+/* Allocate and initialize the internal cpumask dependend resources. */
+static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+ const struct cpumask *pcpumask,
+ const struct cpumask *cbcpumask)
+{
+ struct parallel_data *pd;
+ pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
+ if (!pd)
+ goto err;
+
+ pd->pqueue = alloc_percpu(struct padata_parallel_queue);
+ if (!pd->pqueue)
+ goto err_free_pd;
+
+ pd->squeue = alloc_percpu(struct padata_serial_queue);
+ if (!pd->squeue)
+ goto err_free_pqueue;
+ if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
+ goto err_free_squeue;
+
+ padata_init_pqueues(pd);
+ padata_init_squeues(pd);
setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
@@ -446,8 +452,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
return pd;
-err_free_queue:
- free_percpu(pd->queue);
+err_free_squeue:
+ free_percpu(pd->squeue);
+err_free_pqueue:
+ free_percpu(pd->pqueue);
err_free_pd:
kfree(pd);
err:
@@ -456,8 +464,10 @@ err:
static void padata_free_pd(struct parallel_data *pd)
{
- free_cpumask_var(pd->cpumask);
- free_percpu(pd->queue);
+ free_cpumask_var(pd->cpumask.pcpu);
+ free_cpumask_var(pd->cpumask.cbcpu);
+ free_percpu(pd->pqueue);
+ free_percpu(pd->squeue);
kfree(pd);
}
@@ -465,11 +475,12 @@ static void padata_free_pd(struct parallel_data *pd)
static void padata_flush_queues(struct parallel_data *pd)
{
int cpu;
- struct padata_queue *queue;
+ struct padata_parallel_queue *pqueue;
+ struct padata_serial_queue *squeue;
- for_each_cpu(cpu, pd->cpumask) {
- queue = per_cpu_ptr(pd->queue, cpu);
- flush_work(&queue->pwork);
+ for_each_cpu(cpu, pd->cpumask.pcpu) {
+ pqueue = per_cpu_ptr(pd->pqueue, cpu);
+ flush_work(&pqueue->work);
}
del_timer_sync(&pd->timer);
@@ -477,19 +488,39 @@ static void padata_flush_queues(struct parallel_data *pd)
if (atomic_read(&pd->reorder_objects))
padata_reorder(pd);
- for_each_cpu(cpu, pd->cpumask) {
- queue = per_cpu_ptr(pd->queue, cpu);
- flush_work(&queue->swork);
+ for_each_cpu(cpu, pd->cpumask.cbcpu) {
+ squeue = per_cpu_ptr(pd->squeue, cpu);
+ flush_work(&squeue->work);
}
BUG_ON(atomic_read(&pd->refcnt) != 0);
}
+static void __padata_start(struct padata_instance *pinst)
+{
+ pinst->flags |= PADATA_INIT;
+}
+
+static void __padata_stop(struct padata_instance *pinst)
+{
+ if (!(pinst->flags & PADATA_INIT))
+ return;
+
+ pinst->flags &= ~PADATA_INIT;
+
+ synchronize_rcu();
+
+ get_online_cpus();
+ padata_flush_queues(pinst->pd);
+ put_online_cpus();
+}
+
/* Replace the internal control stucture with a new one. */
static void padata_replace(struct padata_instance *pinst,
struct parallel_data *pd_new)
{
struct parallel_data *pd_old = pinst->pd;
+ int notification_mask = 0;
pinst->flags |= PADATA_RESET;
@@ -497,41 +528,162 @@ static void padata_replace(struct padata_instance *pinst,
synchronize_rcu();
+ if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
+ notification_mask |= PADATA_CPU_PARALLEL;
+ if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
+ notification_mask |= PADATA_CPU_SERIAL;
+
padata_flush_queues(pd_old);
padata_free_pd(pd_old);
+ if (notification_mask)
+ blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
+ notification_mask,
+ &pd_new->cpumask);
+
pinst->flags &= ~PADATA_RESET;
}
/**
- * padata_set_cpumask - set the cpumask that padata should use
+ * padata_register_cpumask_notifier - Registers a notifier that will be called
+ * if either pcpu or cbcpu or both cpumasks change.
*
- * @pinst: padata instance
- * @cpumask: the cpumask to use
+ * @pinst: A poineter to padata instance
+ * @nblock: A pointer to notifier block.
*/
-int padata_set_cpumask(struct padata_instance *pinst,
- cpumask_var_t cpumask)
+int padata_register_cpumask_notifier(struct padata_instance *pinst,
+ struct notifier_block *nblock)
{
+ return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
+ nblock);
+}
+EXPORT_SYMBOL(padata_register_cpumask_notifier);
+
+/**
+ * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
+ * registered earlier using padata_register_cpumask_notifier
+ *
+ * @pinst: A pointer to data instance.
+ * @nlock: A pointer to notifier block.
+ */
+int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+ struct notifier_block *nblock)
+{
+ return blocking_notifier_chain_unregister(
+ &pinst->cpumask_change_notifier,
+ nblock);
+}
+EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
+
+
+/* If cpumask contains no active cpu, we mark the instance as invalid. */
+static bool padata_validate_cpumask(struct padata_instance *pinst,
+ const struct cpumask *cpumask)
+{
+ if (!cpumask_intersects(cpumask, cpu_active_mask)) {
+ pinst->flags |= PADATA_INVALID;
+ return false;
+ }
+
+ pinst->flags &= ~PADATA_INVALID;
+ return true;
+}
+
+static int __padata_set_cpumasks(struct padata_instance *pinst,
+ cpumask_var_t pcpumask,
+ cpumask_var_t cbcpumask)
+{
+ int valid;
struct parallel_data *pd;
- int err = 0;
+
+ valid = padata_validate_cpumask(pinst, pcpumask);
+ if (!valid) {
+ __padata_stop(pinst);
+ goto out_replace;
+ }
+
+ valid = padata_validate_cpumask(pinst, cbcpumask);
+ if (!valid)
+ __padata_stop(pinst);
+
+out_replace:
+ pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
+ if (!pd)
+ return -ENOMEM;
+
+ cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+ cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
+
+ padata_replace(pinst, pd);
+
+ if (valid)
+ __padata_start(pinst);
+
+ return 0;
+}
+
+/**
+ * padata_set_cpumasks - Set both parallel and serial cpumasks. The first
+ * one is used by parallel workers and the second one
+ * by the wokers doing serialization.
+ *
+ * @pinst: padata instance
+ * @pcpumask: the cpumask to use for parallel workers
+ * @cbcpumask: the cpumsak to use for serial workers
+ */
+int padata_set_cpumasks(struct padata_instance *pinst, cpumask_var_t pcpumask,
+ cpumask_var_t cbcpumask)
+{
+ int err;
mutex_lock(&pinst->lock);
+ get_online_cpus();
+ err = __padata_set_cpumasks(pinst, pcpumask, cbcpumask);
+
+ put_online_cpus();
+ mutex_unlock(&pinst->lock);
+
+ return err;
+
+}
+EXPORT_SYMBOL(padata_set_cpumasks);
+
+/**
+ * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
+ * equivalent to @cpumask.
+ *
+ * @pinst: padata instance
+ * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
+ * to parallel and serial cpumasks respectively.
+ * @cpumask: the cpumask to use
+ */
+int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+ cpumask_var_t cpumask)
+{
+ struct cpumask *serial_mask, *parallel_mask;
+ int err = -EINVAL;
+
+ mutex_lock(&pinst->lock);
get_online_cpus();
- pd = padata_alloc_pd(pinst, cpumask);
- if (!pd) {
- err = -ENOMEM;
- goto out;
+ switch (cpumask_type) {
+ case PADATA_CPU_PARALLEL:
+ serial_mask = pinst->cpumask.cbcpu;
+ parallel_mask = cpumask;
+ break;
+ case PADATA_CPU_SERIAL:
+ parallel_mask = pinst->cpumask.pcpu;
+ serial_mask = cpumask;
+ break;
+ default:
+ goto out;
}
- cpumask_copy(pinst->cpumask, cpumask);
-
- padata_replace(pinst, pd);
+ err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
out:
put_online_cpus();
-
mutex_unlock(&pinst->lock);
return err;
@@ -543,30 +695,48 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
struct parallel_data *pd;
if (cpumask_test_cpu(cpu, cpu_active_mask)) {
- pd = padata_alloc_pd(pinst, pinst->cpumask);
+ pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+ pinst->cpumask.cbcpu);
if (!pd)
return -ENOMEM;
padata_replace(pinst, pd);
+
+ if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
+ padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
+ __padata_start(pinst);
}
return 0;
}
-/**
- * padata_add_cpu - add a cpu to the padata cpumask
+ /**
+ * padata_add_cpu - add a cpu to one or both(parallel and serial)
+ * padata cpumasks.
*
* @pinst: padata instance
* @cpu: cpu to add
+ * @mask: bitmask of flags specifying to which cpumask @cpu shuld be added.
+ * The @mask may be any combination of the following flags:
+ * PADATA_CPU_SERIAL - serial cpumask
+ * PADATA_CPU_PARALLEL - parallel cpumask
*/
-int padata_add_cpu(struct padata_instance *pinst, int cpu)
+
+int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask)
{
int err;
+ if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+ return -EINVAL;
+
mutex_lock(&pinst->lock);
get_online_cpus();
- cpumask_set_cpu(cpu, pinst->cpumask);
+ if (mask & PADATA_CPU_SERIAL)
+ cpumask_set_cpu(cpu, pinst->cpumask.cbcpu);
+ if (mask & PADATA_CPU_PARALLEL)
+ cpumask_set_cpu(cpu, pinst->cpumask.pcpu);
+
err = __padata_add_cpu(pinst, cpu);
put_online_cpus();
@@ -578,10 +748,16 @@ EXPORT_SYMBOL(padata_add_cpu);
static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
{
- struct parallel_data *pd;
+ struct parallel_data *pd = NULL;
if (cpumask_test_cpu(cpu, cpu_online_mask)) {
- pd = padata_alloc_pd(pinst, pinst->cpumask);
+
+ if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
+ !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
+ __padata_stop(pinst);
+
+ pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
+ pinst->cpumask.cbcpu);
if (!pd)
return -ENOMEM;
@@ -591,20 +767,32 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
return 0;
}
-/**
- * padata_remove_cpu - remove a cpu from the padata cpumask
+ /**
+ * padata_remove_cpu - remove a cpu from the one or both(serial and paralell)
+ * padata cpumasks.
*
* @pinst: padata instance
* @cpu: cpu to remove
+ * @mask: bitmask specifying from which cpumask @cpu should be removed
+ * The @mask may be any combination of the following flags:
+ * PADATA_CPU_SERIAL - serial cpumask
+ * PADATA_CPU_PARALLEL - parallel cpumask
*/
-int padata_remove_cpu(struct padata_instance *pinst, int cpu)
+int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
{
int err;
+ if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
+ return -EINVAL;
+
mutex_lock(&pinst->lock);
get_online_cpus();
- cpumask_clear_cpu(cpu, pinst->cpumask);
+ if (mask & PADATA_CPU_SERIAL)
+ cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
+ if (mask & PADATA_CPU_PARALLEL)
+ cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
+
err = __padata_remove_cpu(pinst, cpu);
put_online_cpus();
@@ -619,11 +807,20 @@ EXPORT_SYMBOL(padata_remove_cpu);
*
* @pinst: padata instance to start
*/
-void padata_start(struct padata_instance *pinst)
+int padata_start(struct padata_instance *pinst)
{
+ int err = 0;
+
mutex_lock(&pinst->lock);
- pinst->flags |= PADATA_INIT;
+
+ if (pinst->flags & PADATA_INVALID)
+ err =-EINVAL;
+
+ __padata_start(pinst);
+
mutex_unlock(&pinst->lock);
+
+ return err;
}
EXPORT_SYMBOL(padata_start);
@@ -635,12 +832,20 @@ EXPORT_SYMBOL(padata_start);
void padata_stop(struct padata_instance *pinst)
{
mutex_lock(&pinst->lock);
- pinst->flags &= ~PADATA_INIT;
+ __padata_stop(pinst);
mutex_unlock(&pinst->lock);
}
EXPORT_SYMBOL(padata_stop);
#ifdef CONFIG_HOTPLUG_CPU
+
+static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
+{
+ return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
+ cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
+}
+
+
static int padata_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
@@ -653,7 +858,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ if (!pinst_has_cpu(pinst, cpu))
break;
mutex_lock(&pinst->lock);
err = __padata_add_cpu(pinst, cpu);
@@ -664,7 +869,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
- if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ if (!pinst_has_cpu(pinst, cpu))
break;
mutex_lock(&pinst->lock);
err = __padata_remove_cpu(pinst, cpu);
@@ -675,7 +880,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
- if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ if (!pinst_has_cpu(pinst, cpu))
break;
mutex_lock(&pinst->lock);
__padata_remove_cpu(pinst, cpu);
@@ -683,7 +888,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ if (!pinst_has_cpu(pinst, cpu))
break;
mutex_lock(&pinst->lock);
__padata_add_cpu(pinst, cpu);
@@ -694,36 +899,202 @@ static int padata_cpu_callback(struct notifier_block *nfb,
}
#endif
+static void __padata_free(struct padata_instance *pinst)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ unregister_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+
+ padata_stop(pinst);
+ padata_free_pd(pinst->pd);
+ free_cpumask_var(pinst->cpumask.pcpu);
+ free_cpumask_var(pinst->cpumask.cbcpu);
+ kfree(pinst);
+}
+
+#define kobj2pinst(_kobj) \
+ container_of(_kobj, struct padata_instance, kobj)
+#define attr2pentry(_attr) \
+ container_of(_attr, struct padata_sysfs_entry, attr)
+
+static void padata_sysfs_release(struct kobject *kobj)
+{
+ struct padata_instance *pinst = kobj2pinst(kobj);
+ __padata_free(pinst);
+}
+
+struct padata_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
+ ssize_t (*store)(struct padata_instance *, struct attribute *,
+ const char *, size_t);
+};
+
+static ssize_t show_cpumask(struct padata_instance *pinst,
+ struct attribute *attr, char *buf)
+{
+ struct cpumask *cpumask;
+ ssize_t len;
+
+ mutex_lock(&pinst->lock);
+ if (!strcmp(attr->name, "serial_cpumask"))
+ cpumask = pinst->cpumask.cbcpu;
+ else
+ cpumask = pinst->cpumask.pcpu;
+
+ len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask),
+ nr_cpu_ids);
+ if (PAGE_SIZE - len < 2)
+ len = -EINVAL;
+ else
+ len += sprintf(buf + len, "\n");
+
+ mutex_unlock(&pinst->lock);
+ return len;
+}
+
+static ssize_t store_cpumask(struct padata_instance *pinst,
+ struct attribute *attr,
+ const char *buf, size_t count)
+{
+ cpumask_var_t new_cpumask;
+ ssize_t ret;
+ int mask_type;
+
+ if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
+ nr_cpumask_bits);
+ if (ret < 0)
+ goto out;
+
+ mask_type = !strcmp(attr->name, "serial_cpumask") ?
+ PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
+ ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
+ if (!ret)
+ ret = count;
+
+out:
+ free_cpumask_var(new_cpumask);
+ return ret;
+}
+
+#define PADATA_ATTR_RW(_name, _show_name, _store_name) \
+ static struct padata_sysfs_entry _name##_attr = \
+ __ATTR(_name, 0644, _show_name, _store_name)
+#define PADATA_ATTR_RO(_name, _show_name) \
+ static struct padata_sysfs_entry _name##_attr = \
+ __ATTR(_name, 0400, _show_name, NULL)
+
+PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
+PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
+
+/*
+ * Padata sysfs provides the following objects:
+ * serial_cpumask [RW] - cpumask for serial workers
+ * parallel_cpumask [RW] - cpumask for parallel workers
+ */
+static struct attribute *padata_default_attrs[] = {
+ &serial_cpumask_attr.attr,
+ &parallel_cpumask_attr.attr,
+ NULL,
+};
+
+static ssize_t padata_sysfs_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct padata_instance *pinst;
+ struct padata_sysfs_entry *pentry;
+ ssize_t ret = -EIO;
+
+ pinst = kobj2pinst(kobj);
+ pentry = attr2pentry(attr);
+ if (pentry->show)
+ ret = pentry->show(pinst, attr, buf);
+
+ return ret;
+}
+
+static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct padata_instance *pinst;
+ struct padata_sysfs_entry *pentry;
+ ssize_t ret = -EIO;
+
+ pinst = kobj2pinst(kobj);
+ pentry = attr2pentry(attr);
+ if (pentry->show)
+ ret = pentry->store(pinst, attr, buf, count);
+
+ return ret;
+}
+
+static const struct sysfs_ops padata_sysfs_ops = {
+ .show = padata_sysfs_show,
+ .store = padata_sysfs_store,
+};
+
+static struct kobj_type padata_attr_type = {
+ .sysfs_ops = &padata_sysfs_ops,
+ .default_attrs = padata_default_attrs,
+ .release = padata_sysfs_release,
+};
+
/**
- * padata_alloc - allocate and initialize a padata instance
+ * padata_alloc_possible - Allocate and initialize padata instance.
+ * Use the cpu_possible_mask for serial and
+ * parallel workers.
*
- * @cpumask: cpumask that padata uses for parallelization
* @wq: workqueue to use for the allocated padata instance
*/
-struct padata_instance *padata_alloc(const struct cpumask *cpumask,
- struct workqueue_struct *wq)
+struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+{
+ return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+}
+EXPORT_SYMBOL(padata_alloc_possible);
+
+/**
+ * padata_alloc - allocate and initialize a padata instance and specify
+ * cpumasks for serial and parallel workers.
+ *
+ * @wq: workqueue to use for the allocated padata instance
+ * @pcpumask: cpumask that will be used for padata parallelization
+ * @cbcpumask: cpumask that will be used for padata serialization
+ */
+struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+ const struct cpumask *pcpumask,
+ const struct cpumask *cbcpumask)
{
struct padata_instance *pinst;
- struct parallel_data *pd;
+ struct parallel_data *pd = NULL;
pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
if (!pinst)
goto err;
get_online_cpus();
-
- pd = padata_alloc_pd(pinst, cpumask);
- if (!pd)
+ if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
goto err_free_inst;
+ if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
+ free_cpumask_var(pinst->cpumask.pcpu);
+ goto err_free_inst;
+ }
+ if (!padata_validate_cpumask(pinst, pcpumask) ||
+ !padata_validate_cpumask(pinst, cbcpumask))
+ goto err_free_masks;
- if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL))
- goto err_free_pd;
+ pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
+ if (!pd)
+ goto err_free_masks;
rcu_assign_pointer(pinst->pd, pd);
pinst->wq = wq;
- cpumask_copy(pinst->cpumask, cpumask);
+ cpumask_copy(pinst->cpumask.pcpu, pcpumask);
+ cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
pinst->flags = 0;
@@ -735,12 +1106,15 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
put_online_cpus();
+ BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
+ kobject_init(&pinst->kobj, &padata_attr_type);
mutex_init(&pinst->lock);
return pinst;
-err_free_pd:
- padata_free_pd(pd);
+err_free_masks:
+ free_cpumask_var(pinst->cpumask.pcpu);
+ free_cpumask_var(pinst->cpumask.cbcpu);
err_free_inst:
kfree(pinst);
put_online_cpus();
@@ -756,19 +1130,6 @@ EXPORT_SYMBOL(padata_alloc);
*/
void padata_free(struct padata_instance *pinst)
{
- padata_stop(pinst);
-
- synchronize_rcu();
-
-#ifdef CONFIG_HOTPLUG_CPU
- unregister_hotcpu_notifier(&pinst->cpu_notifier);
-#endif
- get_online_cpus();
- padata_flush_queues(pinst->pd);
- put_online_cpus();
-
- padata_free_pd(pinst->pd);
- free_cpumask_var(pinst->cpumask);
- kfree(pinst);
+ kobject_put(&pinst->kobj);
}
EXPORT_SYMBOL(padata_free);
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index f42d3f7..996a4de 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -48,59 +48,49 @@
* or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
* held, taken with _irqsave. One lock to rule them all
*/
-struct pm_qos_request_list {
- struct list_head list;
- union {
- s32 value;
- s32 usec;
- s32 kbps;
- };
- int pm_qos_class;
+enum pm_qos_type {
+ PM_QOS_MAX, /* return the largest value */
+ PM_QOS_MIN /* return the smallest value */
};
-static s32 max_compare(s32 v1, s32 v2);
-static s32 min_compare(s32 v1, s32 v2);
-
struct pm_qos_object {
- struct pm_qos_request_list requests;
+ struct plist_head requests;
struct blocking_notifier_head *notifiers;
struct miscdevice pm_qos_power_miscdev;
char *name;
s32 default_value;
- atomic_t target_value;
- s32 (*comparitor)(s32, s32);
+ enum pm_qos_type type;
};
+static DEFINE_SPINLOCK(pm_qos_lock);
+
static struct pm_qos_object null_pm_qos;
static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
static struct pm_qos_object cpu_dma_pm_qos = {
- .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
+ .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
.notifiers = &cpu_dma_lat_notifier,
.name = "cpu_dma_latency",
.default_value = 2000 * USEC_PER_SEC,
- .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
- .comparitor = min_compare
+ .type = PM_QOS_MIN,
};
static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
static struct pm_qos_object network_lat_pm_qos = {
- .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
+ .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
.notifiers = &network_lat_notifier,
.name = "network_latency",
.default_value = 2000 * USEC_PER_SEC,
- .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
- .comparitor = min_compare
+ .type = PM_QOS_MIN
};
static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
static struct pm_qos_object network_throughput_pm_qos = {
- .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
+ .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
.notifiers = &network_throughput_notifier,
.name = "network_throughput",
.default_value = 0,
- .target_value = ATOMIC_INIT(0),
- .comparitor = max_compare
+ .type = PM_QOS_MAX,
};
@@ -111,8 +101,6 @@ static struct pm_qos_object *pm_qos_array[] = {
&network_throughput_pm_qos
};
-static DEFINE_SPINLOCK(pm_qos_lock);
-
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos);
static int pm_qos_power_open(struct inode *inode, struct file *filp);
@@ -124,46 +112,55 @@ static const struct file_operations pm_qos_power_fops = {
.release = pm_qos_power_release,
};
-/* static helper functions */
-static s32 max_compare(s32 v1, s32 v2)
+/* unlocked internal variant */
+static inline int pm_qos_get_value(struct pm_qos_object *o)
{
- return max(v1, v2);
-}
+ if (plist_head_empty(&o->requests))
+ return o->default_value;
-static s32 min_compare(s32 v1, s32 v2)
-{
- return min(v1, v2);
-}
+ switch (o->type) {
+ case PM_QOS_MIN:
+ return plist_last(&o->requests)->prio;
+ case PM_QOS_MAX:
+ return plist_first(&o->requests)->prio;
-static void update_target(int pm_qos_class)
+ default:
+ /* runtime check for not using enum */
+ BUG();
+ }
+}
+
+static void update_target(struct pm_qos_object *o, struct plist_node *node,
+ int del, int value)
{
- s32 extreme_value;
- struct pm_qos_request_list *node;
unsigned long flags;
- int call_notifier = 0;
+ int prev_value, curr_value;
spin_lock_irqsave(&pm_qos_lock, flags);
- extreme_value = pm_qos_array[pm_qos_class]->default_value;
- list_for_each_entry(node,
- &pm_qos_array[pm_qos_class]->requests.list, list) {
- extreme_value = pm_qos_array[pm_qos_class]->comparitor(
- extreme_value, node->value);
- }
- if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
- extreme_value) {
- call_notifier = 1;
- atomic_set(&pm_qos_array[pm_qos_class]->target_value,
- extreme_value);
- pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
- atomic_read(&pm_qos_array[pm_qos_class]->target_value));
+ prev_value = pm_qos_get_value(o);
+ /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */
+ if (value != PM_QOS_DEFAULT_VALUE) {
+ /*
+ * to change the list, we atomically remove, reinit
+ * with new value and add, then see if the extremal
+ * changed
+ */
+ plist_del(node, &o->requests);
+ plist_node_init(node, value);
+ plist_add(node, &o->requests);
+ } else if (del) {
+ plist_del(node, &o->requests);
+ } else {
+ plist_add(node, &o->requests);
}
+ curr_value = pm_qos_get_value(o);
spin_unlock_irqrestore(&pm_qos_lock, flags);
- if (call_notifier)
- blocking_notifier_call_chain(
- pm_qos_array[pm_qos_class]->notifiers,
- (unsigned long) extreme_value, NULL);
+ if (prev_value != curr_value)
+ blocking_notifier_call_chain(o->notifiers,
+ (unsigned long)curr_value,
+ NULL);
}
static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -196,10 +193,23 @@ static int find_pm_qos_object_by_minor(int minor)
*/
int pm_qos_request(int pm_qos_class)
{
- return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
+ unsigned long flags;
+ int value;
+
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+ return value;
}
EXPORT_SYMBOL_GPL(pm_qos_request);
+int pm_qos_request_active(struct pm_qos_request_list *req)
+{
+ return req->pm_qos_class != 0;
+}
+EXPORT_SYMBOL_GPL(pm_qos_request_active);
+
/**
* pm_qos_add_request - inserts new qos request into the list
* @pm_qos_class: identifies which list of qos request to us
@@ -211,27 +221,23 @@ EXPORT_SYMBOL_GPL(pm_qos_request);
* element as a handle for use in updating and removal. Call needs to save
* this handle for later use.
*/
-struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
+void pm_qos_add_request(struct pm_qos_request_list *dep,
+ int pm_qos_class, s32 value)
{
- struct pm_qos_request_list *dep;
- unsigned long flags;
+ struct pm_qos_object *o = pm_qos_array[pm_qos_class];
+ int new_value;
- dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
- if (dep) {
- if (value == PM_QOS_DEFAULT_VALUE)
- dep->value = pm_qos_array[pm_qos_class]->default_value;
- else
- dep->value = value;
- dep->pm_qos_class = pm_qos_class;
-
- spin_lock_irqsave(&pm_qos_lock, flags);
- list_add(&dep->list,
- &pm_qos_array[pm_qos_class]->requests.list);
- spin_unlock_irqrestore(&pm_qos_lock, flags);
- update_target(pm_qos_class);
+ if (pm_qos_request_active(dep)) {
+ WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n");
+ return;
}
-
- return dep;
+ if (value == PM_QOS_DEFAULT_VALUE)
+ new_value = o->default_value;
+ else
+ new_value = value;
+ plist_node_init(&dep->list, new_value);
+ dep->pm_qos_class = pm_qos_class;
+ update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE);
}
EXPORT_SYMBOL_GPL(pm_qos_add_request);
@@ -246,27 +252,28 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request);
* Attempts are made to make this code callable on hot code paths.
*/
void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
- s32 new_value)
+ s32 new_value)
{
- unsigned long flags;
- int pending_update = 0;
s32 temp;
+ struct pm_qos_object *o;
+
+ if (!pm_qos_req) /*guard against callers passing in null */
+ return;
- if (pm_qos_req) { /*guard against callers passing in null */
- spin_lock_irqsave(&pm_qos_lock, flags);
- if (new_value == PM_QOS_DEFAULT_VALUE)
- temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value;
- else
- temp = new_value;
-
- if (temp != pm_qos_req->value) {
- pending_update = 1;
- pm_qos_req->value = temp;
- }
- spin_unlock_irqrestore(&pm_qos_lock, flags);
- if (pending_update)
- update_target(pm_qos_req->pm_qos_class);
+ if (!pm_qos_request_active(pm_qos_req)) {
+ WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n");
+ return;
}
+
+ o = pm_qos_array[pm_qos_req->pm_qos_class];
+
+ if (new_value == PM_QOS_DEFAULT_VALUE)
+ temp = o->default_value;
+ else
+ temp = new_value;
+
+ if (temp != pm_qos_req->list.prio)
+ update_target(o, &pm_qos_req->list, 0, temp);
}
EXPORT_SYMBOL_GPL(pm_qos_update_request);
@@ -280,19 +287,20 @@ EXPORT_SYMBOL_GPL(pm_qos_update_request);
*/
void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
{
- unsigned long flags;
- int qos_class;
+ struct pm_qos_object *o;
if (pm_qos_req == NULL)
return;
/* silent return to keep pcm code cleaner */
- qos_class = pm_qos_req->pm_qos_class;
- spin_lock_irqsave(&pm_qos_lock, flags);
- list_del(&pm_qos_req->list);
- kfree(pm_qos_req);
- spin_unlock_irqrestore(&pm_qos_lock, flags);
- update_target(qos_class);
+ if (!pm_qos_request_active(pm_qos_req)) {
+ WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n");
+ return;
+ }
+
+ o = pm_qos_array[pm_qos_req->pm_qos_class];
+ update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE);
+ memset(pm_qos_req, 0, sizeof(*pm_qos_req));
}
EXPORT_SYMBOL_GPL(pm_qos_remove_request);
@@ -340,8 +348,12 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
if (pm_qos_class >= 0) {
- filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
- PM_QOS_DEFAULT_VALUE);
+ struct pm_qos_request_list *req = kzalloc(GFP_KERNEL, sizeof(*req));
+ if (!req)
+ return -ENOMEM;
+
+ pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE);
+ filp->private_data = req;
if (filp->private_data)
return 0;
@@ -353,8 +365,9 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp)
{
struct pm_qos_request_list *req;
- req = (struct pm_qos_request_list *)filp->private_data;
+ req = filp->private_data;
pm_qos_remove_request(req);
+ kfree(req);
return 0;
}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index aa9e916..8dc31e0 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
- * Copyright (c) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz>
* Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
*
* This file is released under the GPLv2.
@@ -277,7 +277,7 @@ static int create_image(int platform_mode)
goto Enable_irqs;
}
- if (hibernation_test(TEST_CORE))
+ if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events())
goto Power_up;
in_suspend = 1;
@@ -288,8 +288,10 @@ static int create_image(int platform_mode)
error);
/* Restore control flow magically appears here */
restore_processor_state();
- if (!in_suspend)
+ if (!in_suspend) {
+ events_check_enabled = false;
platform_leave(platform_mode);
+ }
Power_up:
sysdev_resume();
@@ -328,7 +330,7 @@ int hibernation_snapshot(int platform_mode)
error = platform_begin(platform_mode);
if (error)
- return error;
+ goto Close;
/* Preallocate image memory before shutting down devices. */
error = hibernate_preallocate_memory();
@@ -511,18 +513,24 @@ int hibernation_platform_enter(void)
local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
+ if (!pm_check_wakeup_events()) {
+ error = -EAGAIN;
+ goto Power_up;
+ }
+
hibernation_ops->enter();
/* We should never get here */
while (1);
- /*
- * We don't need to reenable the nonboot CPUs or resume consoles, since
- * the system is going to be halted anyway.
- */
+ Power_up:
+ sysdev_resume();
+ local_irq_enable();
+ enable_nonboot_cpus();
+
Platform_finish:
hibernation_ops->finish();
- dpm_suspend_noirq(PMSG_RESTORE);
+ dpm_resume_noirq(PMSG_RESTORE);
Resume_devices:
entering_platform_hibernation = false;
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b58800b..62b0bc6 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
power_attr(state);
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The 'wakeup_count' attribute, along with the functions defined in
+ * drivers/base/power/wakeup.c, provides a means by which wakeup events can be
+ * handled in a non-racy way.
+ *
+ * If a wakeup event occurs when the system is in a sleep state, it simply is
+ * woken up. In turn, if an event that would wake the system up from a sleep
+ * state occurs when it is undergoing a transition to that sleep state, the
+ * transition should be aborted. Moreover, if such an event occurs when the
+ * system is in the working state, an attempt to start a transition to the
+ * given sleep state should fail during certain period after the detection of
+ * the event. Using the 'state' attribute alone is not sufficient to satisfy
+ * these requirements, because a wakeup event may occur exactly when 'state'
+ * is being written to and may be delivered to user space right before it is
+ * frozen, so the event will remain only partially processed until the system is
+ * woken up by another event. In particular, it won't cause the transition to
+ * a sleep state to be aborted.
+ *
+ * This difficulty may be overcome if user space uses 'wakeup_count' before
+ * writing to 'state'. It first should read from 'wakeup_count' and store
+ * the read value. Then, after carrying out its own preparations for the system
+ * transition to a sleep state, it should write the stored value to
+ * 'wakeup_count'. If that fails, at least one wakeup event has occured since
+ * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it
+ * is allowed to write to 'state', but the transition will be aborted if there
+ * are any wakeup events detected after 'wakeup_count' was written to.
+ */
+
+static ssize_t wakeup_count_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ unsigned long val;
+
+ return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR;
+}
+
+static ssize_t wakeup_count_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (sscanf(buf, "%lu", &val) == 1) {
+ if (pm_save_wakeup_count(val))
+ return n;
+ }
+ return -EINVAL;
+}
+
+power_attr(wakeup_count);
+#endif /* CONFIG_PM_SLEEP */
+
#ifdef CONFIG_PM_TRACE
int pm_trace_enabled;
@@ -236,6 +290,7 @@ static struct attribute * g[] = {
#endif
#ifdef CONFIG_PM_SLEEP
&pm_async_attr.attr,
+ &wakeup_count_attr.attr,
#ifdef CONFIG_PM_DEBUG
&pm_test_attr.attr,
#endif
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 25ce010..f6cd6fa 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -3,7 +3,7 @@
*
* This file provides system snapshot/restore functionality for swsusp.
*
- * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
* Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
*
* This file is released under the GPLv2.
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index f37cb7d..7335952 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -136,19 +136,19 @@ static int suspend_enter(suspend_state_t state)
if (suspend_ops->prepare) {
error = suspend_ops->prepare();
if (error)
- return error;
+ goto Platform_finish;
}
error = dpm_suspend_noirq(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down\n");
- goto Platfrom_finish;
+ goto Platform_finish;
}
if (suspend_ops->prepare_late) {
error = suspend_ops->prepare_late();
if (error)
- goto Power_up_devices;
+ goto Platform_wake;
}
if (suspend_test(TEST_PLATFORM))
@@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state)
error = sysdev_suspend(PMSG_SUSPEND);
if (!error) {
- if (!suspend_test(TEST_CORE))
+ if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) {
error = suspend_ops->enter(state);
+ events_check_enabled = false;
+ }
sysdev_resume();
}
@@ -178,10 +180,9 @@ static int suspend_enter(suspend_state_t state)
if (suspend_ops->wake)
suspend_ops->wake();
- Power_up_devices:
dpm_resume_noirq(PMSG_RESUME);
- Platfrom_finish:
+ Platform_finish:
if (suspend_ops->finish)
suspend_ops->finish();
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b0bb217..e6a5bdf 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -4,7 +4,7 @@
* This file provides functions for reading the suspend image from
* and writing it to a swap partition.
*
- * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
* Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
*
* This file is released under the GPLv2.
@@ -32,7 +32,7 @@
/*
* The swap map is a data structure used for keeping track of each page
* written to a swap partition. It consists of many swap_map_page
- * structures that contain each an array of MAP_PAGE_SIZE swap entries.
+ * structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
* These structures are stored on the swap and linked together with the
* help of the .next_swap member.
*
@@ -148,7 +148,7 @@ sector_t alloc_swapdev_block(int swap)
/**
* free_all_swap_pages - free swap pages allocated for saving image data.
- * It also frees the extents used to register which swap entres had been
+ * It also frees the extents used to register which swap entries had been
* allocated.
*/
diff --git a/kernel/printk.c b/kernel/printk.c
index 444b770..4ab0164 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -37,6 +37,8 @@
#include <linux/ratelimit.h>
#include <linux/kmsg_dump.h>
#include <linux/syslog.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
#include <asm/uaccess.h>
@@ -985,6 +987,32 @@ void resume_console(void)
}
/**
+ * console_cpu_notify - print deferred console messages after CPU hotplug
+ * @self: notifier struct
+ * @action: CPU hotplug event
+ * @hcpu: unused
+ *
+ * If printk() is called from a CPU that is not online yet, the messages
+ * will be spooled but will not show up on the console. This function is
+ * called when a new CPU comes online (or fails to come up), and ensures
+ * that any such output gets printed.
+ */
+static int __cpuinit console_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_DEAD:
+ case CPU_DYING:
+ case CPU_DOWN_FAILED:
+ case CPU_UP_CANCELED:
+ acquire_console_sem();
+ release_console_sem();
+ }
+ return NOTIFY_OK;
+}
+
+/**
* acquire_console_sem - lock the console system for exclusive use.
*
* Acquires a semaphore which guarantees that the caller has
@@ -1371,7 +1399,7 @@ int unregister_console(struct console *console)
}
EXPORT_SYMBOL(unregister_console);
-static int __init disable_boot_consoles(void)
+static int __init printk_late_init(void)
{
struct console *con;
@@ -1382,9 +1410,10 @@ static int __init disable_boot_consoles(void)
unregister_console(con);
}
}
+ hotcpu_notifier(console_cpu_notify, 0);
return 0;
}
-late_initcall(disable_boot_consoles);
+late_initcall(printk_late_init);
#if defined CONFIG_PRINTK
diff --git a/kernel/signal.c b/kernel/signal.c
index 906ae5a..bded651 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -637,7 +637,7 @@ static inline bool si_fromuser(const struct siginfo *info)
/*
* Bad permissions for sending the signal
- * - the caller must hold at least the RCU read lock
+ * - the caller must hold the RCU read lock
*/
static int check_kill_permission(int sig, struct siginfo *info,
struct task_struct *t)
@@ -1127,11 +1127,14 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
/*
* send signal info to all the members of a group
- * - the caller must hold the RCU read lock at least
*/
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
- int ret = check_kill_permission(sig, info, p);
+ int ret;
+
+ rcu_read_lock();
+ ret = check_kill_permission(sig, info, p);
+ rcu_read_unlock();
if (!ret && sig)
ret = do_send_sig_info(sig, info, p, true);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index b3bafd5..48b2761 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -188,7 +188,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
/*
* Setup the next period for devices, which do not have
* periodic mode. We read dev->next_event first and add to it
- * when the event alrady expired. clockevents_program_event()
+ * when the event already expired. clockevents_program_event()
* sets dev->next_event only when the event is really
* programmed to the device.
*/
diff --git a/kernel/timer.c b/kernel/timer.c
index ee305c8..efde11e 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -577,6 +577,19 @@ static void __init_timer(struct timer_list *timer,
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}
+void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
+ const char *name,
+ struct lock_class_key *key,
+ void (*function)(unsigned long),
+ unsigned long data)
+{
+ timer->function = function;
+ timer->data = data;
+ init_timer_on_stack_key(timer, name, key);
+ timer_set_deferrable(timer);
+}
+EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
+
/**
* init_timer_key - initialize a timer
* @timer: the timer to be initialized
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b2d70d3..2591583 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
+#include <linux/highuid.h>
#include <linux/cred.h>
/*
@@ -82,3 +83,46 @@ void free_user_ns(struct kref *kref)
schedule_work(&ns->destroyer);
}
EXPORT_SYMBOL(free_user_ns);
+
+uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid)
+{
+ struct user_namespace *tmp;
+
+ if (likely(to == cred->user->user_ns))
+ return uid;
+
+
+ /* Is cred->user the creator of the target user_ns
+ * or the creator of one of it's parents?
+ */
+ for ( tmp = to; tmp != &init_user_ns;
+ tmp = tmp->creator->user_ns ) {
+ if (cred->user == tmp->creator) {
+ return (uid_t)0;
+ }
+ }
+
+ /* No useful relationship so no mapping */
+ return overflowuid;
+}
+
+gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid)
+{
+ struct user_namespace *tmp;
+
+ if (likely(to == cred->user->user_ns))
+ return gid;
+
+ /* Is cred->user the creator of the target user_ns
+ * or the creator of one of it's parents?
+ */
+ for ( tmp = to; tmp != &init_user_ns;
+ tmp = tmp->creator->user_ns ) {
+ if (cred->user == tmp->creator) {
+ return (gid_t)0;
+ }
+ }
+
+ /* No useful relationship so no mapping */
+ return overflowgid;
+}