aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c8
-rw-r--r--kernel/auditfilter.c10
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/gcov/Kconfig2
-rw-r--r--kernel/perf_event.c17
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/pm_qos_params.c24
-rw-r--r--kernel/power/Kconfig237
-rw-r--r--kernel/power/hibernate.c9
-rw-r--r--kernel/power/main.c3
-rw-r--r--kernel/power/snapshot.c8
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/printk.c138
-rw-r--r--kernel/sched.c12
-rw-r--r--kernel/smp.c71
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sysctl.c5
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace_events_filter.c2
-rw-r--r--kernel/workqueue.c6
21 files changed, 341 insertions, 228 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index e495624..9395003 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -74,6 +74,8 @@ static int audit_initialized;
int audit_enabled;
int audit_ever_enabled;
+EXPORT_SYMBOL_GPL(audit_enabled);
+
/* Default state when kernel boots without any parameters. */
static int audit_default;
@@ -671,9 +673,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
pid = NETLINK_CREDS(skb)->pid;
uid = NETLINK_CREDS(skb)->uid;
- loginuid = NETLINK_CB(skb).loginuid;
- sessionid = NETLINK_CB(skb).sessionid;
- sid = NETLINK_CB(skb).sid;
+ loginuid = audit_get_loginuid(current);
+ sessionid = audit_get_sessionid(current);
+ security_task_getsecid(current, &sid);
seq = nlh->nlmsg_seq;
data = NLMSG_DATA(nlh);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index add2819..f8277c8 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1238,6 +1238,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
for (i = 0; i < rule->field_count; i++) {
struct audit_field *f = &rule->fields[i];
int result = 0;
+ u32 sid;
switch (f->type) {
case AUDIT_PID:
@@ -1250,19 +1251,22 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
result = audit_comparator(cb->creds.gid, f->op, f->val);
break;
case AUDIT_LOGINUID:
- result = audit_comparator(cb->loginuid, f->op, f->val);
+ result = audit_comparator(audit_get_loginuid(current),
+ f->op, f->val);
break;
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
case AUDIT_SUBJ_TYPE:
case AUDIT_SUBJ_SEN:
case AUDIT_SUBJ_CLR:
- if (f->lsm_rule)
- result = security_audit_rule_match(cb->sid,
+ if (f->lsm_rule) {
+ security_task_getsecid(current, &sid);
+ result = security_audit_rule_match(sid,
f->type,
f->op,
f->lsm_rule,
NULL);
+ }
break;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 25e4291..05b92c4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,7 @@ void __put_task_struct(struct task_struct *tsk)
if (!profile_handoff_task(tsk))
free_task(tsk);
}
+EXPORT_SYMBOL_GPL(__put_task_struct);
/*
* macro override instead of weak attribute alias, to workaround
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 70a298d..b8cadf7 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -34,7 +34,7 @@ config GCOV_KERNEL
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
depends on GCOV_KERNEL
- depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
+ depends on SUPERH || S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
default n
---help---
This options activates profiling for the entire kernel.
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ed253aa..3472bb1 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5122,7 +5122,7 @@ static int perf_exclude_event(struct perf_event *event,
struct pt_regs *regs)
{
if (event->hw.state & PERF_HES_STOPPED)
- return 0;
+ return 1;
if (regs) {
if (event->attr.exclude_user && user_mode(regs))
@@ -5478,6 +5478,8 @@ static int perf_tp_event_match(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ if (event->hw.state & PERF_HES_STOPPED)
+ return 0;
/*
* All tracepoints are from kernel-space.
*/
@@ -6720,17 +6722,20 @@ __perf_event_exit_task(struct perf_event *child_event,
struct perf_event_context *child_ctx,
struct task_struct *child)
{
- struct perf_event *parent_event;
+ if (child_event->parent) {
+ raw_spin_lock_irq(&child_ctx->lock);
+ perf_group_detach(child_event);
+ raw_spin_unlock_irq(&child_ctx->lock);
+ }
perf_remove_from_context(child_event);
- parent_event = child_event->parent;
/*
- * It can happen that parent exits first, and has events
+ * It can happen that the parent exits first, and has events
* that are still around due to the child reference. These
- * events need to be zapped - but otherwise linger.
+ * events need to be zapped.
*/
- if (parent_event) {
+ if (child_event->parent) {
sync_child_event(child_event, child);
free_event(child_event);
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 39b65b6..02f2212 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -435,6 +435,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
rcu_read_unlock();
return pid;
}
+EXPORT_SYMBOL_GPL(get_task_pid);
struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
{
@@ -446,6 +447,7 @@ struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
rcu_read_unlock();
return result;
}
+EXPORT_SYMBOL_GPL(get_pid_task);
struct pid *find_get_pid(pid_t nr)
{
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index aeaa7f8..0da058b 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -103,11 +103,14 @@ static struct pm_qos_object *pm_qos_array[] = {
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos);
+static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos);
static int pm_qos_power_open(struct inode *inode, struct file *filp);
static int pm_qos_power_release(struct inode *inode, struct file *filp);
static const struct file_operations pm_qos_power_fops = {
.write = pm_qos_power_write,
+ .read = pm_qos_power_read,
.open = pm_qos_power_open,
.release = pm_qos_power_release,
.llseek = noop_llseek,
@@ -376,6 +379,27 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp)
}
+static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value;
+ unsigned long flags;
+ struct pm_qos_object *o;
+ struct pm_qos_request_list *pm_qos_req = filp->private_data;;
+
+ if (!pm_qos_req)
+ return -EINVAL;
+ if (!pm_qos_request_active(pm_qos_req))
+ return -EINVAL;
+
+ o = pm_qos_array[pm_qos_req->pm_qos_class];
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ value = pm_qos_get_value(o);
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+ return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32));
+}
+
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos)
{
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 2657299..4603f08 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -1,125 +1,12 @@
-config PM
- bool "Power Management support"
- depends on !IA64_HP_SIM
- ---help---
- "Power Management" means that parts of your computer are shut
- off or put into a power conserving "sleep" mode if they are not
- being used. There are two competing standards for doing this: APM
- and ACPI. If you want to use either one, say Y here and then also
- to the requisite support below.
-
- Power Management is most important for battery powered laptop
- computers; if you have a laptop, check out the Linux Laptop home
- page on the WWW at <http://www.linux-on-laptops.com/> or
- Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
- and the Battery Powered Linux mini-HOWTO, available from
- <http://www.tldp.org/docs.html#howto>.
-
- Note that, even if you say N here, Linux on the x86 architecture
- will issue the hlt instruction if nothing is to be done, thereby
- sending the processor to sleep and saving power.
-
-config PM_DEBUG
- bool "Power Management Debug Support"
- depends on PM
- ---help---
- This option enables various debugging support in the Power Management
- code. This is helpful when debugging and reporting PM bugs, like
- suspend support.
-
-config PM_ADVANCED_DEBUG
- bool "Extra PM attributes in sysfs for low-level debugging/testing"
- depends on PM_DEBUG
- default n
- ---help---
- Add extra sysfs attributes allowing one to access some Power Management
- fields of device objects from user space. If you are not a kernel
- developer interested in debugging/testing Power Management, say "no".
-
-config PM_VERBOSE
- bool "Verbose Power Management debugging"
- depends on PM_DEBUG
- default n
- ---help---
- This option enables verbose messages from the Power Management code.
-
-config CAN_PM_TRACE
- def_bool y
- depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
-
-config PM_TRACE
- bool
- help
- This enables code to save the last PM event point across
- reboot. The architecture needs to support this, x86 for
- example does by saving things in the RTC, see below.
-
- The architecture specific code must provide the extern
- functions from <linux/resume-trace.h> as well as the
- <asm/resume-trace.h> header with a TRACE_RESUME() macro.
-
- The way the information is presented is architecture-
- dependent, x86 will print the information during a
- late_initcall.
-
-config PM_TRACE_RTC
- bool "Suspend/resume event tracing"
- depends on CAN_PM_TRACE
- depends on X86
- select PM_TRACE
- default n
- ---help---
- This enables some cheesy code to save the last PM event point in the
- RTC across reboots, so that you can debug a machine that just hangs
- during suspend (or more commonly, during resume).
-
- To use this debugging feature you should attempt to suspend the
- machine, reboot it and then run
-
- dmesg -s 1000000 | grep 'hash matches'
-
- CAUTION: this option will cause your machine's real-time clock to be
- set to an invalid time after a resume.
-
-config PM_SLEEP_SMP
- bool
- depends on SMP
- depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
- depends on PM_SLEEP
- select HOTPLUG
- select HOTPLUG_CPU
- default y
-
-config PM_SLEEP
- bool
- depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
- default y
-
-config PM_SLEEP_ADVANCED_DEBUG
- bool
- depends on PM_ADVANCED_DEBUG
- default n
-
config SUSPEND
bool "Suspend to RAM and standby"
- depends on PM && ARCH_SUSPEND_POSSIBLE
+ depends on ARCH_SUSPEND_POSSIBLE
default y
---help---
Allow the system to enter sleep states in which main memory is
powered and thus its contents are preserved, such as the
suspend-to-RAM state (e.g. the ACPI S3 state).
-config PM_TEST_SUSPEND
- bool "Test suspend/resume and wakealarm during bootup"
- depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
- ---help---
- This option will let you suspend your machine during bootup, and
- make it wake up a few seconds later using an RTC wakeup alarm.
- Enable this with a kernel parameter like "test_suspend=mem".
-
- You probably want to have your system's RTC driver statically
- linked, ensuring that it's available when this test runs.
-
config SUSPEND_FREEZER
bool "Enable freezer for suspend to RAM/standby" \
if ARCH_WANTS_FREEZER_CONTROL || BROKEN
@@ -133,7 +20,7 @@ config SUSPEND_FREEZER
config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
- depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
+ depends on SWAP && ARCH_HIBERNATION_POSSIBLE
select LZO_COMPRESS
select LZO_DECOMPRESS
---help---
@@ -196,6 +83,106 @@ config PM_STD_PARTITION
suspended image to. It will simply pick the first available swap
device.
+config PM_SLEEP
+ def_bool y
+ depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
+
+config PM_SLEEP_SMP
+ def_bool y
+ depends on SMP
+ depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
+ depends on PM_SLEEP
+ select HOTPLUG
+ select HOTPLUG_CPU
+
+config PM_RUNTIME
+ bool "Run-time PM core functionality"
+ depends on !IA64_HP_SIM
+ ---help---
+ Enable functionality allowing I/O devices to be put into energy-saving
+ (low power) states at run time (or autosuspended) after a specified
+ period of inactivity and woken up in response to a hardware-generated
+ wake-up event or a driver's request.
+
+ Hardware support is generally required for this functionality to work
+ and the bus type drivers of the buses the devices are on are
+ responsible for the actual handling of the autosuspend requests and
+ wake-up events.
+
+config PM
+ def_bool y
+ depends on PM_SLEEP || PM_RUNTIME
+
+config PM_DEBUG
+ bool "Power Management Debug Support"
+ depends on PM
+ ---help---
+ This option enables various debugging support in the Power Management
+ code. This is helpful when debugging and reporting PM bugs, like
+ suspend support.
+
+config PM_VERBOSE
+ bool "Verbose Power Management debugging"
+ depends on PM_DEBUG
+ ---help---
+ This option enables verbose messages from the Power Management code.
+
+config PM_ADVANCED_DEBUG
+ bool "Extra PM attributes in sysfs for low-level debugging/testing"
+ depends on PM_DEBUG
+ ---help---
+ Add extra sysfs attributes allowing one to access some Power Management
+ fields of device objects from user space. If you are not a kernel
+ developer interested in debugging/testing Power Management, say "no".
+
+config PM_TEST_SUSPEND
+ bool "Test suspend/resume and wakealarm during bootup"
+ depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
+ ---help---
+ This option will let you suspend your machine during bootup, and
+ make it wake up a few seconds later using an RTC wakeup alarm.
+ Enable this with a kernel parameter like "test_suspend=mem".
+
+ You probably want to have your system's RTC driver statically
+ linked, ensuring that it's available when this test runs.
+
+config CAN_PM_TRACE
+ def_bool y
+ depends on PM_DEBUG && PM_SLEEP
+
+config PM_TRACE
+ bool
+ help
+ This enables code to save the last PM event point across
+ reboot. The architecture needs to support this, x86 for
+ example does by saving things in the RTC, see below.
+
+ The architecture specific code must provide the extern
+ functions from <linux/resume-trace.h> as well as the
+ <asm/resume-trace.h> header with a TRACE_RESUME() macro.
+
+ The way the information is presented is architecture-
+ dependent, x86 will print the information during a
+ late_initcall.
+
+config PM_TRACE_RTC
+ bool "Suspend/resume event tracing"
+ depends on CAN_PM_TRACE
+ depends on X86
+ select PM_TRACE
+ ---help---
+ This enables some cheesy code to save the last PM event point in the
+ RTC across reboots, so that you can debug a machine that just hangs
+ during suspend (or more commonly, during resume).
+
+ To use this debugging feature you should attempt to suspend the
+ machine, reboot it and then run
+
+ dmesg -s 1000000 | grep 'hash matches'
+
+ CAUTION: this option will cause your machine's real-time clock to be
+ set to an invalid time after a resume.
+
config APM_EMULATION
tristate "Advanced Power Management Emulation"
depends on PM && SYS_SUPPORTS_APM_EMULATION
@@ -222,31 +209,11 @@ config APM_EMULATION
anything, try disabling/enabling this option (or disabling/enabling
APM in your BIOS).
-config PM_RUNTIME
- bool "Run-time PM core functionality"
- depends on PM
- ---help---
- Enable functionality allowing I/O devices to be put into energy-saving
- (low power) states at run time (or autosuspended) after a specified
- period of inactivity and woken up in response to a hardware-generated
- wake-up event or a driver's request.
-
- Hardware support is generally required for this functionality to work
- and the bus type drivers of the buses the devices are on are
- responsible for the actual handling of the autosuspend requests and
- wake-up events.
-
-config PM_OPS
- bool
- depends on PM_SLEEP || PM_RUNTIME
- default y
-
config ARCH_HAS_OPP
bool
config PM_OPP
bool "Operating Performance Point (OPP) Layer library"
- depends on PM
depends on ARCH_HAS_OPP
---help---
SOCs have a standard set of tuples consisting of frequency and
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1832bd2..aeabd26 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -23,6 +23,7 @@
#include <linux/cpu.h>
#include <linux/freezer.h>
#include <linux/gfp.h>
+#include <linux/syscore_ops.h>
#include <scsi/scsi_scan.h>
#include <asm/suspend.h>
@@ -272,6 +273,8 @@ static int create_image(int platform_mode)
local_irq_disable();
error = sysdev_suspend(PMSG_FREEZE);
+ if (!error)
+ error = syscore_suspend();
if (error) {
printk(KERN_ERR "PM: Some system devices failed to power down, "
"aborting hibernation\n");
@@ -295,6 +298,7 @@ static int create_image(int platform_mode)
}
Power_up:
+ syscore_resume();
sysdev_resume();
/* NOTE: dpm_resume_noirq() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
@@ -403,6 +407,8 @@ static int resume_target_kernel(bool platform_mode)
local_irq_disable();
error = sysdev_suspend(PMSG_QUIESCE);
+ if (!error)
+ error = syscore_suspend();
if (error)
goto Enable_irqs;
@@ -429,6 +435,7 @@ static int resume_target_kernel(bool platform_mode)
restore_processor_state();
touch_softlockup_watchdog();
+ syscore_resume();
sysdev_resume();
Enable_irqs:
@@ -516,6 +523,7 @@ int hibernation_platform_enter(void)
local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
+ syscore_suspend();
if (pm_wakeup_pending()) {
error = -EAGAIN;
goto Power_up;
@@ -526,6 +534,7 @@ int hibernation_platform_enter(void)
while (1);
Power_up:
+ syscore_resume();
sysdev_resume();
local_irq_enable();
enable_nonboot_cpus();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7018530..8eaba5f 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -17,9 +17,6 @@
DEFINE_MUTEX(pm_mutex);
-unsigned int pm_flags;
-EXPORT_SYMBOL(pm_flags);
-
#ifdef CONFIG_PM_SLEEP
/* Routines for PM-transition notifications */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 64db648..ca0aacc 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -42,15 +42,15 @@ static void swsusp_unset_page_forbidden(struct page *);
/*
* Preferred image size in bytes (tunable via /sys/power/image_size).
- * When it is set to N, swsusp will do its best to ensure the image
- * size will not exceed N bytes, but if that is impossible, it will
- * try to create the smallest image possible.
+ * When it is set to N, the image creating code will do its best to
+ * ensure the image size will not exceed N bytes, but if that is
+ * impossible, it will try to create the smallest image possible.
*/
unsigned long image_size;
void __init hibernate_image_size_init(void)
{
- image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
+ image_size = (totalram_pages / 3) * PAGE_SIZE;
}
/* List of PBEs needed for restoring the pages that were allocated before
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index de6f86b..2814c32 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
#include <trace/events/power.h>
#include "power.h"
@@ -163,11 +164,14 @@ static int suspend_enter(suspend_state_t state)
BUG_ON(!irqs_disabled());
error = sysdev_suspend(PMSG_SUSPEND);
+ if (!error)
+ error = syscore_suspend();
if (!error) {
if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
error = suspend_ops->enter(state);
events_check_enabled = false;
}
+ syscore_resume();
sysdev_resume();
}
diff --git a/kernel/printk.c b/kernel/printk.c
index 3623152..33284ad 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -515,6 +515,71 @@ static void _call_console_drivers(unsigned start,
}
/*
+ * Parse the syslog header <[0-9]*>. The decimal value represents 32bit, the
+ * lower 3 bit are the log level, the rest are the log facility. In case
+ * userspace passes usual userspace syslog messages to /dev/kmsg or
+ * /dev/ttyprintk, the log prefix might contain the facility. Printk needs
+ * to extract the correct log level for in-kernel processing, and not mangle
+ * the original value.
+ *
+ * If a prefix is found, the length of the prefix is returned. If 'level' is
+ * passed, it will be filled in with the log level without a possible facility
+ * value. If 'special' is passed, the special printk prefix chars are accepted
+ * and returned. If no valid header is found, 0 is returned and the passed
+ * variables are not touched.
+ */
+static size_t log_prefix(const char *p, unsigned int *level, char *special)
+{
+ unsigned int lev = 0;
+ char sp = '\0';
+ size_t len;
+
+ if (p[0] != '<' || !p[1])
+ return 0;
+ if (p[2] == '>') {
+ /* usual single digit level number or special char */
+ switch (p[1]) {
+ case '0' ... '7':
+ lev = p[1] - '0';
+ break;
+ case 'c': /* KERN_CONT */
+ case 'd': /* KERN_DEFAULT */
+ sp = p[1];
+ break;
+ default:
+ return 0;
+ }
+ len = 3;
+ } else {
+ /* multi digit including the level and facility number */
+ char *endp = NULL;
+
+ if (p[1] < '0' && p[1] > '9')
+ return 0;
+
+ lev = (simple_strtoul(&p[1], &endp, 10) & 7);
+ if (endp == NULL || endp[0] != '>')
+ return 0;
+ len = (endp + 1) - p;
+ }
+
+ /* do not accept special char if not asked for */
+ if (sp && !special)
+ return 0;
+
+ if (special) {
+ *special = sp;
+ /* return special char, do not touch level */
+ if (sp)
+ return len;
+ }
+
+ if (level)
+ *level = lev;
+ return len;
+}
+
+/*
* Call the console drivers, asking them to write out
* log_buf[start] to log_buf[end - 1].
* The console_lock must be held.
@@ -529,13 +594,9 @@ static void call_console_drivers(unsigned start, unsigned end)
cur_index = start;
start_print = start;
while (cur_index != end) {
- if (msg_level < 0 && ((end - cur_index) > 2) &&
- LOG_BUF(cur_index + 0) == '<' &&
- LOG_BUF(cur_index + 1) >= '0' &&
- LOG_BUF(cur_index + 1) <= '7' &&
- LOG_BUF(cur_index + 2) == '>') {
- msg_level = LOG_BUF(cur_index + 1) - '0';
- cur_index += 3;
+ if (msg_level < 0 && ((end - cur_index) > 2)) {
+ /* strip log prefix */
+ cur_index += log_prefix(&LOG_BUF(cur_index), &msg_level, NULL);
start_print = cur_index;
}
while (cur_index != end) {
@@ -733,6 +794,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
unsigned long flags;
int this_cpu;
char *p;
+ size_t plen;
+ char special;
boot_delay_msec();
printk_delay();
@@ -773,45 +836,52 @@ asmlinkage int vprintk(const char *fmt, va_list args)
printed_len += vscnprintf(printk_buf + printed_len,
sizeof(printk_buf) - printed_len, fmt, args);
-
p = printk_buf;
- /* Do we have a loglevel in the string? */
- if (p[0] == '<') {
- unsigned char c = p[1];
- if (c && p[2] == '>') {
- switch (c) {
- case '0' ... '7': /* loglevel */
- current_log_level = c - '0';
- /* Fallthrough - make sure we're on a new line */
- case 'd': /* KERN_DEFAULT */
- if (!new_text_line) {
- emit_log_char('\n');
- new_text_line = 1;
- }
- /* Fallthrough - skip the loglevel */
- case 'c': /* KERN_CONT */
- p += 3;
- break;
+ /* Read log level and handle special printk prefix */
+ plen = log_prefix(p, &current_log_level, &special);
+ if (plen) {
+ p += plen;
+
+ switch (special) {
+ case 'c': /* Strip <c> KERN_CONT, continue line */
+ plen = 0;
+ break;
+ case 'd': /* Strip <d> KERN_DEFAULT, start new line */
+ plen = 0;
+ default:
+ if (!new_text_line) {
+ emit_log_char('\n');
+ new_text_line = 1;
}
}
}
/*
- * Copy the output into log_buf. If the caller didn't provide
- * appropriate log level tags, we insert them here
+ * Copy the output into log_buf. If the caller didn't provide
+ * the appropriate log prefix, we insert them here
*/
- for ( ; *p; p++) {
+ for (; *p; p++) {
if (new_text_line) {
- /* Always output the token */
- emit_log_char('<');
- emit_log_char(current_log_level + '0');
- emit_log_char('>');
- printed_len += 3;
new_text_line = 0;
+ if (plen) {
+ /* Copy original log prefix */
+ int i;
+
+ for (i = 0; i < plen; i++)
+ emit_log_char(printk_buf[i]);
+ printed_len += plen;
+ } else {
+ /* Add log prefix */
+ emit_log_char('<');
+ emit_log_char(current_log_level + '0');
+ emit_log_char('>');
+ printed_len += 3;
+ }
+
if (printk_time) {
- /* Follow the token with the time */
+ /* Add the current time stamp */
char tbuf[50], *tp;
unsigned tlen;
unsigned long long t;
diff --git a/kernel/sched.c b/kernel/sched.c
index c8e40b7..a172494 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -32,7 +32,6 @@
#include <linux/init.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
-#include <linux/smp_lock.h>
#include <asm/mmu_context.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
@@ -661,10 +660,9 @@ static void update_rq_clock(struct rq *rq)
#endif
/**
- * runqueue_is_locked
+ * runqueue_is_locked - Returns true if the current cpu runqueue is locked
* @cpu: the processor in question.
*
- * Returns true if the current cpu runqueue is locked.
* This interface allows printk to be called with the runqueue lock
* held and know whether or not it is OK to wake up the klogd.
*/
@@ -4087,9 +4085,6 @@ need_resched:
rcu_note_context_switch(cpu);
prev = rq->curr;
- release_kernel_lock(prev);
-need_resched_nonpreemptible:
-
schedule_debug(prev);
if (sched_feat(HRTICK))
@@ -4149,9 +4144,6 @@ need_resched_nonpreemptible:
post_schedule(rq);
- if (unlikely(reacquire_kernel_lock(prev)))
- goto need_resched_nonpreemptible;
-
preempt_enable_no_resched();
if (need_resched())
goto need_resched;
@@ -8278,7 +8270,7 @@ static inline int preempt_count_equals(int preempt_offset)
{
int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
- return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
+ return (nested == preempt_offset);
}
void __might_sleep(const char *file, int line, int preempt_offset)
diff --git a/kernel/smp.c b/kernel/smp.c
index 9910744..7cbd0f2 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -194,7 +194,7 @@ void generic_smp_call_function_interrupt(void)
*/
list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
int refs;
- void (*func) (void *info);
+ smp_call_func_t func;
/*
* Since we walk the list without any locks, we might
@@ -214,17 +214,17 @@ void generic_smp_call_function_interrupt(void)
if (atomic_read(&data->refs) == 0)
continue;
- func = data->csd.func; /* for later warn */
- data->csd.func(data->csd.info);
+ func = data->csd.func; /* save for later warn */
+ func(data->csd.info);
/*
- * If the cpu mask is not still set then it enabled interrupts,
- * we took another smp interrupt, and executed the function
- * twice on this cpu. In theory that copy decremented refs.
+ * If the cpu mask is not still set then func enabled
+ * interrupts (BUG), and this cpu took another smp call
+ * function interrupt and executed func(info) twice
+ * on this cpu. That nested execution decremented refs.
*/
if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
- WARN(1, "%pS enabled interrupts and double executed\n",
- func);
+ WARN(1, "%pf enabled interrupts and double executed\n", func);
continue;
}
@@ -450,7 +450,7 @@ void smp_call_function_many(const struct cpumask *mask,
{
struct call_function_data *data;
unsigned long flags;
- int cpu, next_cpu, this_cpu = smp_processor_id();
+ int refs, cpu, next_cpu, this_cpu = smp_processor_id();
/*
* Can deadlock when called with interrupts disabled.
@@ -461,7 +461,7 @@ void smp_call_function_many(const struct cpumask *mask,
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
- /* So, what's a CPU they want? Ignoring this one. */
+ /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu)
cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
@@ -483,22 +483,49 @@ void smp_call_function_many(const struct cpumask *mask,
data = &__get_cpu_var(cfd_data);
csd_lock(&data->csd);
+
+ /* This BUG_ON verifies our reuse assertions and can be removed */
BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
+ /*
+ * The global call function queue list add and delete are protected
+ * by a lock, but the list is traversed without any lock, relying
+ * on the rcu list add and delete to allow safe concurrent traversal.
+ * We reuse the call function data without waiting for any grace
+ * period after some other cpu removes it from the global queue.
+ * This means a cpu might find our data block as it is being
+ * filled out.
+ *
+ * We hold off the interrupt handler on the other cpu by
+ * ordering our writes to the cpu mask vs our setting of the
+ * refs counter. We assert only the cpu owning the data block
+ * will set a bit in cpumask, and each bit will only be cleared
+ * by the subject cpu. Each cpu must first find its bit is
+ * set and then check that refs is set indicating the element is
+ * ready to be processed, otherwise it must skip the entry.
+ *
+ * On the previous iteration refs was set to 0 by another cpu.
+ * To avoid the use of transitivity, set the counter to 0 here
+ * so the wmb will pair with the rmb in the interrupt handler.
+ */
+ atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */
+
data->csd.func = func;
data->csd.info = info;
- cpumask_and(data->cpumask, mask, cpu_online_mask);
- cpumask_clear_cpu(this_cpu, data->cpumask);
- /*
- * To ensure the interrupt handler gets an complete view
- * we order the cpumask and refs writes and order the read
- * of them in the interrupt handler. In addition we may
- * only clear our own cpu bit from the mask.
- */
+ /* Ensure 0 refs is visible before mask. Also orders func and info */
smp_wmb();
- atomic_set(&data->refs, cpumask_weight(data->cpumask));
+ /* We rely on the "and" being processed before the store */
+ cpumask_and(data->cpumask, mask, cpu_online_mask);
+ cpumask_clear_cpu(this_cpu, data->cpumask);
+ refs = cpumask_weight(data->cpumask);
+
+ /* Some callers race with other cpus changing the passed mask */
+ if (unlikely(!refs)) {
+ csd_unlock(&data->csd);
+ return;
+ }
raw_spin_lock_irqsave(&call_function.lock, flags);
/*
@@ -507,6 +534,12 @@ void smp_call_function_many(const struct cpumask *mask,
* will not miss any other list entries:
*/
list_add_rcu(&data->csd.list, &call_function.queue);
+ /*
+ * We rely on the wmb() in list_add_rcu to complete our writes
+ * to the cpumask before this write to refs, which indicates
+ * data is on the list and is ready to be processed.
+ */
+ atomic_set(&data->refs, refs);
raw_spin_unlock_irqrestore(&call_function.lock, flags);
/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 18da702..1ad48b3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,7 @@
#include <linux/ptrace.h>
#include <linux/fs_struct.h>
#include <linux/gfp.h>
+#include <linux/syscore_ops.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -298,6 +299,7 @@ void kernel_restart_prepare(char *cmd)
system_state = SYSTEM_RESTART;
device_shutdown();
sysdev_shutdown();
+ syscore_shutdown();
}
/**
@@ -336,6 +338,7 @@ void kernel_halt(void)
{
kernel_shutdown_prepare(SYSTEM_HALT);
sysdev_shutdown();
+ syscore_shutdown();
printk(KERN_EMERG "System halted.\n");
kmsg_dump(KMSG_DUMP_HALT);
machine_halt();
@@ -355,6 +358,7 @@ void kernel_power_off(void)
pm_power_off_prepare();
disable_nonboot_cpus();
sysdev_shutdown();
+ syscore_shutdown();
printk(KERN_EMERG "Power down.\n");
kmsg_dump(KMSG_DUMP_POWEROFF);
machine_power_off();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 51054fe..40245d6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1683,13 +1683,8 @@ static int test_perm(int mode, int op)
int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
{
- int error;
int mode;
- error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
- if (error)
- return error;
-
if (root->permissions)
mode = root->permissions(root, current->nsproxy, table);
else
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 14674dc..61d7d59f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -275,7 +275,7 @@ config PROFILE_ANNOTATED_BRANCHES
This tracer profiles all the the likely and unlikely macros
in the kernel. It will display the results in:
- /sys/kernel/debug/tracing/profile_annotated_branch
+ /sys/kernel/debug/tracing/trace_stat/branch_annotated
Note: this will add a significant overhead; only turn this
on if you need to profile the system's use of these macros.
@@ -288,7 +288,7 @@ config PROFILE_ALL_BRANCHES
taken in the kernel is recorded whether it hit or miss.
The results will be displayed in:
- /sys/kernel/debug/tracing/profile_branch
+ /sys/kernel/debug/tracing/trace_stat/branch_all
This option also enables the likely/unlikely profiler.
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index db7b439..d9c8bca 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -668,7 +668,7 @@ static struct list_head *rb_list_head(struct list_head *list)
* the reader page). But if the next page is a header page,
* its flags will be non zero.
*/
-static int inline
+static inline int
rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *page, struct list_head *list)
{
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 3249b4f..8008ddc 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -391,8 +391,8 @@ static int process_ops(struct filter_pred *preds,
struct filter_pred *op, void *rec)
{
struct filter_pred *pred;
+ int match = 0;
int type;
- int match;
int i;
/*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b5fe4c0..5ca7ce9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -251,10 +251,12 @@ struct workqueue_struct *system_wq __read_mostly;
struct workqueue_struct *system_long_wq __read_mostly;
struct workqueue_struct *system_nrt_wq __read_mostly;
struct workqueue_struct *system_unbound_wq __read_mostly;
+struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_wq);
EXPORT_SYMBOL_GPL(system_long_wq);
EXPORT_SYMBOL_GPL(system_nrt_wq);
EXPORT_SYMBOL_GPL(system_unbound_wq);
+EXPORT_SYMBOL_GPL(system_freezable_wq);
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
@@ -3781,8 +3783,10 @@ static int __init init_workqueues(void)
system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
+ system_freezable_wq = alloc_workqueue("events_freezable",
+ WQ_FREEZABLE, 0);
BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
- !system_unbound_wq);
+ !system_unbound_wq || !system_freezable_wq);
return 0;
}
early_initcall(init_workqueues);