diff options
author | David S. Miller <davem@davemloft.net> | 2010-01-19 00:26:13 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-01-20 16:23:02 -0800 |
commit | 4f6dbe4ac01d2664231d3f3eceadd33a44cde993 (patch) | |
tree | 029201f8597df383c5fac18b2693227570b1693e /arch/sparc | |
parent | 0299b1371d8f1b074c8284a19beb9094ada9405f (diff) | |
download | kernel_samsung_tuna-4f6dbe4ac01d2664231d3f3eceadd33a44cde993.zip kernel_samsung_tuna-4f6dbe4ac01d2664231d3f3eceadd33a44cde993.tar.gz kernel_samsung_tuna-4f6dbe4ac01d2664231d3f3eceadd33a44cde993.tar.bz2 |
sparc64: Add perf callchain support.
Pretty straightforward, and it should be easy to add accurate
walk through of signal stack frames in userspace.
Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'arch/sparc')
-rw-r--r-- | arch/sparc/kernel/perf_event.c | 120 |
1 files changed, 119 insertions, 1 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 198fb4e..2386ac6 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1,6 +1,6 @@ /* Performance event support for sparc64. * - * Copyright (C) 2009 David S. Miller <davem@davemloft.net> + * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net> * * This code is based almost entirely upon the x86 perf event * code, which is: @@ -18,11 +18,15 @@ #include <linux/kdebug.h> #include <linux/mutex.h> +#include <asm/stacktrace.h> #include <asm/cpudata.h> +#include <asm/uaccess.h> #include <asm/atomic.h> #include <asm/nmi.h> #include <asm/pcr.h> +#include "kstack.h" + /* Sparc64 chips have two performance counters, 32-bits each, with * overflow interrupts generated on transition from 0xffffffff to 0. * The counters are accessed in one go using a 64-bit register. @@ -1062,3 +1066,117 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); } + +static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + +static void perf_callchain_kernel(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long ksp, fp; + + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->tpc); + + ksp = regs->u_regs[UREG_I6]; + fp = ksp + STACK_BIAS; + do { + struct sparc_stackf *sf; + struct pt_regs *regs; + unsigned long pc; + + if (!kstack_valid(current_thread_info(), fp)) + break; + + sf = (struct sparc_stackf *) fp; + regs = (struct pt_regs *) (sf + 1); + + if (kstack_is_trap_frame(current_thread_info(), regs)) { + if (user_mode(regs)) + break; + pc = regs->tpc; + fp = regs->u_regs[UREG_I6] + STACK_BIAS; + } else { + pc = sf->callers_pc; + fp = (unsigned long)sf->fp + STACK_BIAS; + } + callchain_store(entry, pc); + } while (entry->nr < PERF_MAX_STACK_DEPTH); +} + +static void perf_callchain_user_64(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long ufp; + + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, regs->tpc); + + ufp = regs->u_regs[UREG_I6] + STACK_BIAS; + do { + struct sparc_stackf *usf, sf; + unsigned long pc; + + usf = (struct sparc_stackf *) ufp; + if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) + break; + + pc = sf.callers_pc; + ufp = (unsigned long)sf.fp + STACK_BIAS; + callchain_store(entry, pc); + } while (entry->nr < PERF_MAX_STACK_DEPTH); +} + +static void perf_callchain_user_32(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long ufp; + + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, regs->tpc); + + ufp = regs->u_regs[UREG_I6]; + do { + struct sparc_stackf32 *usf, sf; + unsigned long pc; + + usf = (struct sparc_stackf32 *) ufp; + if (__copy_from_user_inatomic(&sf, usf, sizeof(sf))) + break; + + pc = sf.callers_pc; + ufp = (unsigned long)sf.fp; + callchain_store(entry, pc); + } while (entry->nr < PERF_MAX_STACK_DEPTH); +} + +/* Like powerpc we can't get PMU interrupts within the PMU handler, + * so no need for seperate NMI and IRQ chains as on x86. + */ +static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry = &__get_cpu_var(callchain); + + entry->nr = 0; + if (!user_mode(regs)) { + stack_trace_flush(); + perf_callchain_kernel(regs, entry); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + if (regs) { + flushw_user(); + if (test_thread_flag(TIF_32BIT)) + perf_callchain_user_32(regs, entry); + else + perf_callchain_user_64(regs, entry); + } + return entry; +} |