aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/trace
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/trace')
-rw-r--r--Documentation/trace/events.txt11
-rw-r--r--Documentation/trace/ftrace-design.txt31
-rw-r--r--Documentation/trace/ftrace.txt54
-rw-r--r--Documentation/trace/kprobetrace.txt61
4 files changed, 116 insertions, 41 deletions
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index 02ac6ed..09bd8e9 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -90,7 +90,8 @@ In order to facilitate early boot debugging, use boot option:
trace_event=[event-list]
-The format of this boot option is the same as described in section 2.1.
+event-list is a comma separated list of events. See section 2.1 for event
+format.
3. Defining an event-enabled tracepoint
=======================================
@@ -238,7 +239,7 @@ subsystem's filter file.
For convenience, filters for every event in a subsystem can be set or
cleared as a group by writing a filter expression into the filter file
-at the root of the subsytem. Note however, that if a filter for any
+at the root of the subsystem. Note however, that if a filter for any
event within the subsystem lacks a field specified in the subsystem
filter, or if the filter can't be applied for any other reason, the
filter for that event will retain its previous setting. This can
@@ -250,7 +251,7 @@ fields can be guaranteed to propagate successfully to all events.
Here are a few subsystem filter examples that also illustrate the
above points:
-Clear the filters on all events in the sched subsytem:
+Clear the filters on all events in the sched subsystem:
# cd /sys/kernel/debug/tracing/events/sched
# echo 0 > filter
@@ -260,7 +261,7 @@ none
none
Set a filter using only common fields for all events in the sched
-subsytem (all events end up with the same filter):
+subsystem (all events end up with the same filter):
# cd /sys/kernel/debug/tracing/events/sched
# echo common_pid == 0 > filter
@@ -270,7 +271,7 @@ common_pid == 0
common_pid == 0
Attempt to set a filter using a non-common field for all events in the
-sched subsytem (all events but those that have a prev_pid field retain
+sched subsystem (all events but those that have a prev_pid field retain
their old filters):
# cd /sys/kernel/debug/tracing/events/sched
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index 239f14b..f1f81af 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -1,5 +1,6 @@
function tracer guts
====================
+ By Mike Frysinger
Introduction
------------
@@ -173,14 +174,16 @@ void ftrace_graph_caller(void)
unsigned long *frompc = &...;
unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
- prepare_ftrace_return(frompc, selfpc);
+ /* passing frame pointer up is optional -- see below */
+ prepare_ftrace_return(frompc, selfpc, frame_pointer);
/* restore all state needed by the ABI */
}
#endif
-For information on how to implement prepare_ftrace_return(), simply look at
-the x86 version. The only architecture-specific piece in it is the setup of
+For information on how to implement prepare_ftrace_return(), simply look at the
+x86 version (the frame pointer passing is optional; see the next section for
+more information). The only architecture-specific piece in it is the setup of
the fault recovery table (the asm(...) code). The rest should be the same
across architectures.
@@ -205,6 +208,23 @@ void return_to_handler(void)
#endif
+HAVE_FUNCTION_GRAPH_FP_TEST
+---------------------------
+
+An arch may pass in a unique value (frame pointer) to both the entering and
+exiting of a function. On exit, the value is compared and if it does not
+match, then it will panic the kernel. This is largely a sanity check for bad
+code generation with gcc. If gcc for your port sanely updates the frame
+pointer under different opitmization levels, then ignore this option.
+
+However, adding support for it isn't terribly difficult. In your assembly code
+that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
+Then in the C version of that function, do what the x86 port does and pass it
+along to ftrace_push_return_trace() instead of a stub value of 0.
+
+Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
+
+
HAVE_FTRACE_NMI_ENTER
---------------------
@@ -218,11 +238,10 @@ HAVE_SYSCALL_TRACEPOINTS
You need very few things to get the syscalls tracing in an arch.
+- Support HAVE_ARCH_TRACEHOOK (see arch/Kconfig).
- Have a NR_syscalls variable in <asm/unistd.h> that provides the number
of syscalls supported by the arch.
-- Implement arch_syscall_addr() that resolves a syscall address from a
- syscall number.
-- Support the TIF_SYSCALL_TRACEPOINT thread flags
+- Support the TIF_SYSCALL_TRACEPOINT thread flags.
- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
in the ptrace syscalls tracing path.
- Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 8179692..557c1ed 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -155,6 +155,9 @@ of ftrace. Here is a list of some of the key files:
to be traced. Echoing names of functions into this file
will limit the trace to only those functions.
+ This interface also allows for commands to be used. See the
+ "Filter commands" section for more details.
+
set_ftrace_notrace:
This has an effect opposite to that of
@@ -1337,12 +1340,14 @@ ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one
can either use the sysctl function or set it via the proc system
interface.
- sysctl kernel.ftrace_dump_on_oops=1
+ sysctl kernel.ftrace_dump_on_oops=n
or
- echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
+ echo n > /proc/sys/kernel/ftrace_dump_on_oops
+If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will
+only dump the buffer of the CPU that triggered the oops.
Here's an example of such a dump after a null pointer
dereference in a kernel module:
@@ -1588,7 +1593,7 @@ module author does not need to worry about it.
When tracing is enabled, kstop_machine is called to prevent
races with the CPUS executing code being modified (which can
-cause the CPU to do undesireable things), and the nops are
+cause the CPU to do undesirable things), and the nops are
patched back to calls. But this time, they do not call mcount
(which is just a function stub). They now call into the ftrace
infrastructure.
@@ -1625,7 +1630,7 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt:
# echo sys_nanosleep hrtimer_interrupt \
> set_ftrace_filter
- # echo ftrace > current_tracer
+ # echo function > current_tracer
# echo 1 > tracing_enabled
# usleep 1
# echo 0 > tracing_enabled
@@ -1822,6 +1827,47 @@ this special filter via:
echo > set_graph_function
+Filter commands
+---------------
+
+A few commands are supported by the set_ftrace_filter interface.
+Trace commands have the following format:
+
+<function>:<command>:<parameter>
+
+The following commands are supported:
+
+- mod
+ This command enables function filtering per module. The
+ parameter defines the module. For example, if only the write*
+ functions in the ext3 module are desired, run:
+
+ echo 'write*:mod:ext3' > set_ftrace_filter
+
+ This command interacts with the filter in the same way as
+ filtering based on function names. Thus, adding more functions
+ in a different module is accomplished by appending (>>) to the
+ filter file. Remove specific module functions by prepending
+ '!':
+
+ echo '!writeback*:mod:ext3' >> set_ftrace_filter
+
+- traceon/traceoff
+ These commands turn tracing on and off when the specified
+ functions are hit. The parameter determines how many times the
+ tracing system is turned on and off. If unspecified, there is
+ no limit. For example, to disable tracing when a schedule bug
+ is hit the first 5 times, run:
+
+ echo '__schedule_bug:traceoff:5' > set_ftrace_filter
+
+ These commands are cumulative whether or not they are appended
+ to set_ftrace_filter. To remove a command, prepend it by '!'
+ and drop the parameter:
+
+ echo '!__schedule_bug:traceoff' > set_ftrace_filter
+
+
trace_pipe
----------
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 47aabee..ec94748 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -24,6 +24,7 @@ Synopsis of kprobe_events
-------------------------
p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ -:[GRP/]EVENT : Clear a probe
GRP : Group name. If omitted, use "kprobes" for it.
EVENT : Event name. If omitted, the event name is generated
@@ -37,15 +38,14 @@ Synopsis of kprobe_events
@SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
$stackN : Fetch Nth entry of stack (N >= 0)
$stack : Fetch stack address.
- $argN : Fetch function argument. (N >= 0)(*)
- $retval : Fetch return value.(**)
- +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
- NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
+ $retval : Fetch return value.(*)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
+ NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
+ FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
+ (u8/u16/u32/u64/s8/s16/s32/s64) are supported.
- (*) aN may not correct on asmlinkaged functions and at the middle of
- function body.
- (**) only for return probe.
- (***) this is useful for fetching a field of data structures.
+ (*) only for return probe.
+ (**) this is useful for fetching a field of data structures.
Per-Probe Event Filtering
@@ -82,13 +82,16 @@ Usage examples
To add a probe as a new event, write a new definition to kprobe_events
as below.
- echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
+ echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
This sets a kprobe on the top of do_sys_open() function with recording
-1st to 4th arguments as "myprobe" event. As this example shows, users can
-choose more familiar names for each arguments.
+1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
+assigned to each function argument depends on arch-specific ABI. If you unsure
+the ABI, please try to use probe subcommand of perf-tools (you can find it
+under tools/perf/).
+As this example shows, users can choose more familiar names for each arguments.
- echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
+ echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events
This sets a kretprobe on the return point of do_sys_open() function with
recording return value as "myretprobe" event.
@@ -97,23 +100,24 @@ recording return value as "myretprobe" event.
cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
name: myprobe
-ID: 75
+ID: 780
format:
- field:unsigned short common_type; offset:0; size:2;
- field:unsigned char common_flags; offset:2; size:1;
- field:unsigned char common_preempt_count; offset:3; size:1;
- field:int common_pid; offset:4; size:4;
- field:int common_tgid; offset:8; size:4;
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1;signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+ field:int common_lock_depth; offset:8; size:4; signed:1;
- field: unsigned long ip; offset:16;tsize:8;
- field: int nargs; offset:24;tsize:4;
- field: unsigned long dfd; offset:32;tsize:8;
- field: unsigned long filename; offset:40;tsize:8;
- field: unsigned long flags; offset:48;tsize:8;
- field: unsigned long mode; offset:56;tsize:8;
+ field:unsigned long __probe_ip; offset:12; size:4; signed:0;
+ field:int __probe_nargs; offset:16; size:4; signed:1;
+ field:unsigned long dfd; offset:20; size:4; signed:0;
+ field:unsigned long filename; offset:24; size:4; signed:0;
+ field:unsigned long flags; offset:28; size:4; signed:0;
+ field:unsigned long mode; offset:32; size:4; signed:0;
-print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip,
+REC->dfd, REC->filename, REC->flags, REC->mode
You can see that the event has 4 arguments as in the expressions you specified.
@@ -121,6 +125,12 @@ print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, R
This clears all probe points.
+ Or,
+
+ echo -:myprobe >> kprobe_events
+
+ This clears probe points selectively.
+
Right after definition, each event is disabled by default. For tracing these
events, you need to enable it.
@@ -146,4 +156,3 @@ events, you need to enable it.
returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
returns from do_sys_open to sys_open+0x1b).
-