aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c9
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/stacktrace.c31
10 files changed, 95 insertions, 127 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d..f2da20f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
struct perf_event *event);
struct event_constraint *event_constraints;
void (*quirks)(void);
+ int perfctr_second_write;
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
* count to the generic event atomically:
*/
again:
- prev_raw_count = atomic64_read(&hwc->prev_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
rdmsrl(hwc->event_base + idx, new_raw_count);
- if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
@@ -313,8 +314,8 @@ again:
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &hwc->period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
if (!hwc->sample_period) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
} else {
/*
* If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
x86_perf_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- s64 left = atomic64_read(&hwc->period_left);
+ s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
*/
if (unlikely(left <= -period)) {
left = period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
* The hw event starts counting from this event offset,
* mark it to be able to extra future deltas:
*/
- atomic64_set(&hwc->prev_count, (u64)-left);
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+ /*
+ * Due to erratum on certan cpu we need
+ * a second write to be sure the register
+ * is updated properly
+ */
+ if (x86_pmu.perfctr_second_write) {
+ wrmsrl(hwc->event_base + idx,
(u64)(-left) & x86_pmu.cntval_mask);
+ }
perf_event_update_userpage(event);
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
goto out;
ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
return;
x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+ cpuc->group_flag |= PERF_EVENT_TXN;
cpuc->n_txn = 0;
}
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
/*
* Truncate the collected events.
*/
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
- /*
- * Clear out the txn count so that ->cancel_txn() which gets
- * run after ->commit_txn() doesn't undo things.
- */
- cpuc->n_txn = 0;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
return 0;
}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
.walk_stack = print_context_stack_bp,
};
-#include "../dumpstack.h"
-
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
- regs->ip = ip;
- /*
- * perf_arch_fetch_caller_regs adds another call, we need to increment
- * the skip level
- */
- regs->bp = rewind_frame_pointer(skip + 1);
- regs->cs = __KERNEL_CS;
- /*
- * We abuse bit 3 to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
- */
- regs->flags = 0;
-}
-
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69..9286e73 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -829,6 +829,15 @@ static __initconst const struct x86_pmu p4_pmu = {
.max_period = (1ULL << 39) - 1,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
+ /*
+ * This handles erratum N15 in intel doc 249199-029,
+ * the counter may not be updated correctly on write
+ * so we need a second write operation to do the trick
+ * (the official workaround didn't work)
+ *
+ * the former idea is taken from OProfile code
+ */
+ .perfctr_second_write = 1,
};
static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386..6e8752c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be..0000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- */
-
-#ifndef DUMPSTACK_H
-#define DUMPSTACK_H
-
-#ifdef CONFIG_X86_32
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-#else
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-#endif
-
-#include <linux/uaccess.h>
-
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl);
-
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl);
-
-extern unsigned int code_bytes;
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-struct stack_frame_ia32 {
- u32 next_frame;
- u32 return_address;
-};
-
-static inline unsigned long rewind_frame_pointer(int n)
-{
- struct stack_frame *frame;
-
- get_bp(frame);
-
-#ifdef CONFIG_FRAME_POINTER
- while (n--) {
- if (probe_kernel_address(&frame->next_frame, frame))
- break;
- }
-#endif
-
- return (unsigned long)frame;
-}
-
-#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a1..0f6376f 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
-
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1..57a21f1 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
#define N_EXCEPTION_STACKS_END \
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b80..a474ec3 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
{
/* Len */
switch (x86_len) {
+ case X86_BREAKPOINT_LEN_X:
+ *gen_len = sizeof(long);
+ break;
case X86_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
info->address = bp->attr.bp_addr;
+ /* Type */
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_W:
+ info->type = X86_BREAKPOINT_WRITE;
+ break;
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ info->type = X86_BREAKPOINT_RW;
+ break;
+ case HW_BREAKPOINT_X:
+ info->type = X86_BREAKPOINT_EXECUTE;
+ /*
+ * x86 inst breakpoints need to have a specific undefined len.
+ * But we still need to check userspace is not trying to setup
+ * an unsupported length, to get a range breakpoint for example.
+ */
+ if (bp->attr.bp_len == sizeof(long)) {
+ info->len = X86_BREAKPOINT_LEN_X;
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+
/* Len */
switch (bp->attr.bp_len) {
case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
return -EINVAL;
}
- /* Type */
- switch (bp->attr.bp_type) {
- case HW_BREAKPOINT_W:
- info->type = X86_BREAKPOINT_WRITE;
- break;
- case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- info->type = X86_BREAKPOINT_RW;
- break;
- case HW_BREAKPOINT_X:
- info->type = X86_BREAKPOINT_EXECUTE;
- break;
- default:
- return -EINVAL;
- }
-
return 0;
}
/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
ret = -EINVAL;
switch (info->len) {
+ case X86_BREAKPOINT_LEN_X:
+ align = sizeof(long) -1;
+ break;
case X86_BREAKPOINT_LEN_1:
align = 0;
break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
perf_bp_event(bp, args->regs);
+ /*
+ * Set up resume flag to avoid breakpoint recursion when
+ * returning back to origin.
+ */
+ if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+ args->regs->flags |= X86_EFLAGS_RF;
+
rcu_read_unlock();
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d12878..96586c3 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
/*
@@ -111,6 +113,8 @@ void cpu_idle(void)
stop_critical_timings();
pm_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a..3d9ea53 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
stop_critical_timings();
pm_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
+
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefb..b53c525 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
return 0;
}
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static void
+__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
{
struct stack_trace *trace = data;
+#ifdef CONFIG_FRAME_POINTER
if (!reliable)
return;
+#endif
+ if (nosched && in_sched_functions(addr))
+ return;
if (trace->skip > 0) {
trace->skip--;
return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
trace->entries[trace->nr_entries++] = addr;
}
+static void save_stack_address(void *data, unsigned long addr, int reliable)
+{
+ return __save_stack_address(data, addr, reliable, false);
+}
+
static void
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
- struct stack_trace *trace = (struct stack_trace *)data;
- if (!reliable)
- return;
- if (in_sched_functions(addr))
- return;
- if (trace->skip > 0) {
- trace->skip--;
- return;
- }
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = addr;
+ return __save_stack_address(data, addr, reliable, true);
}
static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
-struct stack_frame {
+struct stack_frame_user {
const void __user *next_fp;
unsigned long ret_addr;
};
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+static int
+copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
{
int ret;
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
trace->entries[trace->nr_entries++] = regs->ip;
while (trace->nr_entries < trace->max_entries) {
- struct stack_frame frame;
+ struct stack_frame_user frame;
frame.next_fp = NULL;
frame.ret_addr = 0;