diff options
Diffstat (limited to 'Documentation/trace')
-rw-r--r-- | Documentation/trace/events.txt | 11 | ||||
-rw-r--r-- | Documentation/trace/ftrace-design.txt | 31 | ||||
-rw-r--r-- | Documentation/trace/ftrace.txt | 54 | ||||
-rw-r--r-- | Documentation/trace/kprobetrace.txt | 61 |
4 files changed, 116 insertions, 41 deletions
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 02ac6ed..09bd8e9 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt @@ -90,7 +90,8 @@ In order to facilitate early boot debugging, use boot option: trace_event=[event-list] -The format of this boot option is the same as described in section 2.1. +event-list is a comma separated list of events. See section 2.1 for event +format. 3. Defining an event-enabled tracepoint ======================================= @@ -238,7 +239,7 @@ subsystem's filter file. For convenience, filters for every event in a subsystem can be set or cleared as a group by writing a filter expression into the filter file -at the root of the subsytem. Note however, that if a filter for any +at the root of the subsystem. Note however, that if a filter for any event within the subsystem lacks a field specified in the subsystem filter, or if the filter can't be applied for any other reason, the filter for that event will retain its previous setting. This can @@ -250,7 +251,7 @@ fields can be guaranteed to propagate successfully to all events. Here are a few subsystem filter examples that also illustrate the above points: -Clear the filters on all events in the sched subsytem: +Clear the filters on all events in the sched subsystem: # cd /sys/kernel/debug/tracing/events/sched # echo 0 > filter @@ -260,7 +261,7 @@ none none Set a filter using only common fields for all events in the sched -subsytem (all events end up with the same filter): +subsystem (all events end up with the same filter): # cd /sys/kernel/debug/tracing/events/sched # echo common_pid == 0 > filter @@ -270,7 +271,7 @@ common_pid == 0 common_pid == 0 Attempt to set a filter using a non-common field for all events in the -sched subsytem (all events but those that have a prev_pid field retain +sched subsystem (all events but those that have a prev_pid field retain their old filters): # cd /sys/kernel/debug/tracing/events/sched diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 239f14b..f1f81af 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -1,5 +1,6 @@ function tracer guts ==================== + By Mike Frysinger Introduction ------------ @@ -173,14 +174,16 @@ void ftrace_graph_caller(void) unsigned long *frompc = &...; unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE; - prepare_ftrace_return(frompc, selfpc); + /* passing frame pointer up is optional -- see below */ + prepare_ftrace_return(frompc, selfpc, frame_pointer); /* restore all state needed by the ABI */ } #endif -For information on how to implement prepare_ftrace_return(), simply look at -the x86 version. The only architecture-specific piece in it is the setup of +For information on how to implement prepare_ftrace_return(), simply look at the +x86 version (the frame pointer passing is optional; see the next section for +more information). The only architecture-specific piece in it is the setup of the fault recovery table (the asm(...) code). The rest should be the same across architectures. @@ -205,6 +208,23 @@ void return_to_handler(void) #endif +HAVE_FUNCTION_GRAPH_FP_TEST +--------------------------- + +An arch may pass in a unique value (frame pointer) to both the entering and +exiting of a function. On exit, the value is compared and if it does not +match, then it will panic the kernel. This is largely a sanity check for bad +code generation with gcc. If gcc for your port sanely updates the frame +pointer under different opitmization levels, then ignore this option. + +However, adding support for it isn't terribly difficult. In your assembly code +that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument. +Then in the C version of that function, do what the x86 port does and pass it +along to ftrace_push_return_trace() instead of a stub value of 0. + +Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer. + + HAVE_FTRACE_NMI_ENTER --------------------- @@ -218,11 +238,10 @@ HAVE_SYSCALL_TRACEPOINTS You need very few things to get the syscalls tracing in an arch. +- Support HAVE_ARCH_TRACEHOOK (see arch/Kconfig). - Have a NR_syscalls variable in <asm/unistd.h> that provides the number of syscalls supported by the arch. -- Implement arch_syscall_addr() that resolves a syscall address from a - syscall number. -- Support the TIF_SYSCALL_TRACEPOINT thread flags +- Support the TIF_SYSCALL_TRACEPOINT thread flags. - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace in the ptrace syscalls tracing path. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 8179692..557c1ed 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -155,6 +155,9 @@ of ftrace. Here is a list of some of the key files: to be traced. Echoing names of functions into this file will limit the trace to only those functions. + This interface also allows for commands to be used. See the + "Filter commands" section for more details. + set_ftrace_notrace: This has an effect opposite to that of @@ -1337,12 +1340,14 @@ ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one can either use the sysctl function or set it via the proc system interface. - sysctl kernel.ftrace_dump_on_oops=1 + sysctl kernel.ftrace_dump_on_oops=n or - echo 1 > /proc/sys/kernel/ftrace_dump_on_oops + echo n > /proc/sys/kernel/ftrace_dump_on_oops +If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will +only dump the buffer of the CPU that triggered the oops. Here's an example of such a dump after a null pointer dereference in a kernel module: @@ -1588,7 +1593,7 @@ module author does not need to worry about it. When tracing is enabled, kstop_machine is called to prevent races with the CPUS executing code being modified (which can -cause the CPU to do undesireable things), and the nops are +cause the CPU to do undesirable things), and the nops are patched back to calls. But this time, they do not call mcount (which is just a function stub). They now call into the ftrace infrastructure. @@ -1625,7 +1630,7 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt: # echo sys_nanosleep hrtimer_interrupt \ > set_ftrace_filter - # echo ftrace > current_tracer + # echo function > current_tracer # echo 1 > tracing_enabled # usleep 1 # echo 0 > tracing_enabled @@ -1822,6 +1827,47 @@ this special filter via: echo > set_graph_function +Filter commands +--------------- + +A few commands are supported by the set_ftrace_filter interface. +Trace commands have the following format: + +<function>:<command>:<parameter> + +The following commands are supported: + +- mod + This command enables function filtering per module. The + parameter defines the module. For example, if only the write* + functions in the ext3 module are desired, run: + + echo 'write*:mod:ext3' > set_ftrace_filter + + This command interacts with the filter in the same way as + filtering based on function names. Thus, adding more functions + in a different module is accomplished by appending (>>) to the + filter file. Remove specific module functions by prepending + '!': + + echo '!writeback*:mod:ext3' >> set_ftrace_filter + +- traceon/traceoff + These commands turn tracing on and off when the specified + functions are hit. The parameter determines how many times the + tracing system is turned on and off. If unspecified, there is + no limit. For example, to disable tracing when a schedule bug + is hit the first 5 times, run: + + echo '__schedule_bug:traceoff:5' > set_ftrace_filter + + These commands are cumulative whether or not they are appended + to set_ftrace_filter. To remove a command, prepend it by '!' + and drop the parameter: + + echo '!__schedule_bug:traceoff' > set_ftrace_filter + + trace_pipe ---------- diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 47aabee..ec94748 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -24,6 +24,7 @@ Synopsis of kprobe_events ------------------------- p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + -:[GRP/]EVENT : Clear a probe GRP : Group name. If omitted, use "kprobes" for it. EVENT : Event name. If omitted, the event name is generated @@ -37,15 +38,14 @@ Synopsis of kprobe_events @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) $stackN : Fetch Nth entry of stack (N >= 0) $stack : Fetch stack address. - $argN : Fetch function argument. (N >= 0)(*) - $retval : Fetch return value.(**) - +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) - NAME=FETCHARG: Set NAME as the argument name of FETCHARG. + $retval : Fetch return value.(*) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) + NAME=FETCHARG : Set NAME as the argument name of FETCHARG. + FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types + (u8/u16/u32/u64/s8/s16/s32/s64) are supported. - (*) aN may not correct on asmlinkaged functions and at the middle of - function body. - (**) only for return probe. - (***) this is useful for fetching a field of data structures. + (*) only for return probe. + (**) this is useful for fetching a field of data structures. Per-Probe Event Filtering @@ -82,13 +82,16 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events + echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording -1st to 4th arguments as "myprobe" event. As this example shows, users can -choose more familiar names for each arguments. +1st to 4th arguments as "myprobe" event. Note, which register/stack entry is +assigned to each function argument depends on arch-specific ABI. If you unsure +the ABI, please try to use probe subcommand of perf-tools (you can find it +under tools/perf/). +As this example shows, users can choose more familiar names for each arguments. - echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events + echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events This sets a kretprobe on the return point of do_sys_open() function with recording return value as "myretprobe" event. @@ -97,23 +100,24 @@ recording return value as "myretprobe" event. cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format name: myprobe -ID: 75 +ID: 780 format: - field:unsigned short common_type; offset:0; size:2; - field:unsigned char common_flags; offset:2; size:1; - field:unsigned char common_preempt_count; offset:3; size:1; - field:int common_pid; offset:4; size:4; - field:int common_tgid; offset:8; size:4; + field:unsigned short common_type; offset:0; size:2; signed:0; + field:unsigned char common_flags; offset:2; size:1; signed:0; + field:unsigned char common_preempt_count; offset:3; size:1;signed:0; + field:int common_pid; offset:4; size:4; signed:1; + field:int common_lock_depth; offset:8; size:4; signed:1; - field: unsigned long ip; offset:16;tsize:8; - field: int nargs; offset:24;tsize:4; - field: unsigned long dfd; offset:32;tsize:8; - field: unsigned long filename; offset:40;tsize:8; - field: unsigned long flags; offset:48;tsize:8; - field: unsigned long mode; offset:56;tsize:8; + field:unsigned long __probe_ip; offset:12; size:4; signed:0; + field:int __probe_nargs; offset:16; size:4; signed:1; + field:unsigned long dfd; offset:20; size:4; signed:0; + field:unsigned long filename; offset:24; size:4; signed:0; + field:unsigned long flags; offset:28; size:4; signed:0; + field:unsigned long mode; offset:32; size:4; signed:0; -print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip, +REC->dfd, REC->filename, REC->flags, REC->mode You can see that the event has 4 arguments as in the expressions you specified. @@ -121,6 +125,12 @@ print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, R This clears all probe points. + Or, + + echo -:myprobe >> kprobe_events + + This clears probe points selectively. + Right after definition, each event is disabled by default. For tracing these events, you need to enable it. @@ -146,4 +156,3 @@ events, you need to enable it. returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel returns from do_sys_open to sys_open+0x1b). - |