diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/hw_breakpoint.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/local64.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 18 | ||||
-rw-r--r-- | arch/x86/include/asm/stacktrace.h | 49 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 62 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.h | 56 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 51 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/stacktrace.c | 31 |
15 files changed, 179 insertions, 146 deletions
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 9422553..528a11e 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,10 +20,10 @@ struct arch_hw_breakpoint { #include <linux/list.h> /* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_X 0x00 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c -#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 #define X86_BREAKPOINT_LEN_8 0x48 diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 0000000..36c93b5 --- /dev/null +++ b/arch/x86/include/asm/local64.h @@ -0,0 +1 @@ +#include <asm-generic/local64.h> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d..6e742cc 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -68,8 +68,9 @@ union cpuid10_eax { union cpuid10_edx { struct { - unsigned int num_counters_fixed:4; - unsigned int reserved:28; + unsigned int num_counters_fixed:5; + unsigned int bit_width_fixed:8; + unsigned int reserved:19; } split; unsigned int full; }; @@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#include <asm/stacktrace.h> + +/* + * We abuse bit 3 from flags to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ip = (__ip); \ + (regs)->bp = caller_frame_pointer(); \ + (regs)->cs = __KERNEL_CS; \ + regs->flags = 0; \ +} + #else static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 4dab78e..2b16a2a 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -1,6 +1,13 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ + #ifndef _ASM_X86_STACKTRACE_H #define _ASM_X86_STACKTRACE_H +#include <linux/uaccess.h> + extern int kstack_depth_to_print; struct thread_info; @@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data); +#ifdef CONFIG_X86_32 +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) +#else +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) +#endif + +extern void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl); + +extern void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl); + +extern unsigned int code_bytes; + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +struct stack_frame_ia32 { + u32 next_frame; + u32 return_address; +}; + +static inline unsigned long caller_frame_pointer(void) +{ + struct stack_frame *frame; + + get_bp(frame); + +#ifdef CONFIG_FRAME_POINTER + frame = frame->next_frame; +#endif + + return (unsigned long)frame; +} + #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d..f2da20f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -220,6 +220,7 @@ struct x86_pmu { struct perf_event *event); struct event_constraint *event_constraints; void (*quirks)(void); + int perfctr_second_write; int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); @@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) * count to the generic event atomically: */ again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); rdmsrl(hwc->event_base + idx, new_raw_count); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -313,8 +314,8 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } else { /* * If we have a PMU initialized but no APIC @@ -885,7 +886,7 @@ static int x86_perf_event_set_period(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0, idx = hwc->idx; @@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) */ if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } @@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event) * The hw event starts counting from this event offset, * mark it to be able to extra future deltas: */ - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base + idx, + wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + + /* + * Due to erratum on certan cpu we need + * a second write to be sure the register + * is updated properly + */ + if (x86_pmu.perfctr_second_write) { + wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + } perf_event_update_userpage(event); @@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) goto out; ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event) * The events never got scheduled and ->cancel_txn will truncate * the event_list. */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) return; x86_pmu_stop(event); @@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag |= PERF_EVENT_TXN_STARTED; + cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuc->group_flag &= ~PERF_EVENT_TXN; /* * Truncate the collected events. */ @@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); - /* - * Clear out the txn count so that ->cancel_txn() which gets - * run after ->commit_txn() doesn't undo things. - */ - cpuc->n_txn = 0; + cpuc->group_flag &= ~PERF_EVENT_TXN; return 0; } @@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -#include "../dumpstack.h" - static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { @@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } -void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) -{ - regs->ip = ip; - /* - * perf_arch_fetch_caller_regs adds another call, we need to increment - * the skip level - */ - regs->bp = rewind_frame_pointer(skip + 1); - regs->cs = __KERNEL_CS; - /* - * We abuse bit 3 to pass exact information, see perf_misc_flags - * and the comment with PERF_EFLAGS_EXACT. - */ - regs->flags = 0; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69..9286e73 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -829,6 +829,15 @@ static __initconst const struct x86_pmu p4_pmu = { .max_period = (1ULL << 39) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, + /* + * This handles erratum N15 in intel doc 249199-029, + * the counter may not be updated correctly on write + * so we need a second write operation to do the trick + * (the official workaround didn't work) + * + * the former idea is taken from OProfile code + */ + .perfctr_second_write = 1, }; static __init int p4_pmu_init(void) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c89a386..6e8752c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,7 +18,6 @@ #include <asm/stacktrace.h> -#include "dumpstack.h" int panic_on_unrecovered_nmi; int panic_on_io_nmi; diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h deleted file mode 100644 index e1a93be..0000000 --- a/arch/x86/kernel/dumpstack.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ - -#ifndef DUMPSTACK_H -#define DUMPSTACK_H - -#ifdef CONFIG_X86_32 -#define STACKSLOTS_PER_LINE 8 -#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) -#else -#define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) -#endif - -#include <linux/uaccess.h> - -extern void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl); - -extern void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl); - -extern unsigned int code_bytes; - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -struct stack_frame_ia32 { - u32 next_frame; - u32 return_address; -}; - -static inline unsigned long rewind_frame_pointer(int n) -{ - struct stack_frame *frame; - - get_bp(frame); - -#ifdef CONFIG_FRAME_POINTER - while (n--) { - if (probe_kernel_address(&frame->next_frame, frame)) - break; - } -#endif - - return (unsigned long)frame; -} - -#endif /* DUMPSTACK_H */ diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 11540a1..0f6376f 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,8 +16,6 @@ #include <asm/stacktrace.h> -#include "dumpstack.h" - void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 272c9f1..57a21f1 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -16,7 +16,6 @@ #include <asm/stacktrace.h> -#include "dumpstack.h" #define N_EXCEPTION_STACKS_END \ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a8f1b80..a474ec3 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type, { /* Len */ switch (x86_len) { + case X86_BREAKPOINT_LEN_X: + *gen_len = sizeof(long); + break; case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp) info->address = bp->attr.bp_addr; + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + /* + * x86 inst breakpoints need to have a specific undefined len. + * But we still need to check userspace is not trying to setup + * an unsupported length, to get a range breakpoint for example. + */ + if (bp->attr.bp_len == sizeof(long)) { + info->len = X86_BREAKPOINT_LEN_X; + return 0; + } + default: + return -EINVAL; + } + /* Len */ switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: @@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } - /* Type */ - switch (bp->attr.bp_type) { - case HW_BREAKPOINT_W: - info->type = X86_BREAKPOINT_WRITE; - break; - case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - info->type = X86_BREAKPOINT_RW; - break; - case HW_BREAKPOINT_X: - info->type = X86_BREAKPOINT_EXECUTE; - break; - default: - return -EINVAL; - } - return 0; } /* @@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { + case X86_BREAKPOINT_LEN_X: + align = sizeof(long) -1; + break; case X86_BREAKPOINT_LEN_1: align = 0; break; @@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) perf_bp_event(bp, args->regs); + /* + * Set up resume flag to avoid breakpoint recursion when + * returning back to origin. + */ + if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) + args->regs->flags |= X86_EFLAGS_RF; + rcu_read_unlock(); } /* diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 345a4b1..175f85c 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to) } /* - * Check for the REX prefix which can only exist on X86_64 - * X86_32 always returns 0 + * Skip the prefixes of the instruction. */ -static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) +static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) { + insn_attr_t attr; + + attr = inat_get_opcode_attribute((insn_byte_t)*insn); + while (inat_is_legacy_prefix(attr)) { + insn++; + attr = inat_get_opcode_attribute((insn_byte_t)*insn); + } #ifdef CONFIG_X86_64 - if ((*insn & 0xf0) == 0x40) - return 1; + if (inat_is_rex_prefix(attr)) + insn++; #endif - return 0; + return insn; } /* @@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr) */ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) { + /* Skip prefixes */ + insn = skip_prefixes(insn); + switch (*insn) { case 0xfa: /* cli */ case 0xfb: /* sti */ @@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) return 1; } - /* - * on X86_64, 0x40-0x4f are REX prefixes so we need to look - * at the next byte instead.. but of course not recurse infinitely - */ - if (is_REX_prefix(insn)) - return is_IF_modifier(++insn); - return 0; } @@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p, unsigned long orig_ip = (unsigned long)p->addr; kprobe_opcode_t *insn = p->ainsn.insn; - /*skip the REX prefix*/ - if (is_REX_prefix(insn)) - insn++; + /* Skip prefixes */ + insn = skip_prefixes(insn); regs->flags &= ~X86_EFLAGS_TF; switch (*insn) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d12878..96586c3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,6 +57,8 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> +#include <trace/events/power.h> + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); /* @@ -111,6 +113,8 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); + + trace_power_end(smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c2422a..3d9ea53 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,8 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> +#include <trace/events/power.h> + asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); @@ -138,6 +140,9 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); + + trace_power_end(smp_processor_id()); + /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 922eefb..b53c525 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name) return 0; } -static void save_stack_address(void *data, unsigned long addr, int reliable) +static void +__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) { struct stack_trace *trace = data; +#ifdef CONFIG_FRAME_POINTER if (!reliable) return; +#endif + if (nosched && in_sched_functions(addr)) + return; if (trace->skip > 0) { trace->skip--; return; @@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable) trace->entries[trace->nr_entries++] = addr; } +static void save_stack_address(void *data, unsigned long addr, int reliable) +{ + return __save_stack_address(data, addr, reliable, false); +} + static void save_stack_address_nosched(void *data, unsigned long addr, int reliable) { - struct stack_trace *trace = (struct stack_trace *)data; - if (!reliable) - return; - if (in_sched_functions(addr)) - return; - if (trace->skip > 0) { - trace->skip--; - return; - } - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = addr; + return __save_stack_address(data, addr, reliable, true); } static const struct stacktrace_ops save_stack_ops = { @@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ -struct stack_frame { +struct stack_frame_user { const void __user *next_fp; unsigned long ret_addr; }; -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +static int +copy_stack_frame(const void __user *fp, struct stack_frame_user *frame) { int ret; @@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace) trace->entries[trace->nr_entries++] = regs->ip; while (trace->nr_entries < trace->max_entries) { - struct stack_frame frame; + struct stack_frame_user frame; frame.next_fp = NULL; frame.ret_addr = 0; |