diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-04-27 05:20:22 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-26 20:04:57 +0200 |
commit | 1fc570ad89e55dc32dfa4dda1311948b38f26524 (patch) | |
tree | 5e775a1f2627301110bd11246dd68cf727961c94 /tools/perf | |
parent | 481f988a016f7a0327a5537bde4794349fc4625c (diff) | |
download | kernel_goldelico_gta04-1fc570ad89e55dc32dfa4dda1311948b38f26524.zip kernel_goldelico_gta04-1fc570ad89e55dc32dfa4dda1311948b38f26524.tar.gz kernel_goldelico_gta04-1fc570ad89e55dc32dfa4dda1311948b38f26524.tar.bz2 |
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/builtin-stat.c | 5 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 11 |
2 files changed, 8 insertions, 8 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e881c20..924d18c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,11 +65,10 @@ static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, }; @@ -468,7 +467,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (total) ratio = avg * 100 / total; - fprintf(stderr, " # %8.3f %% of all branches", ratio); + fprintf(stderr, " # %5.2f %% of all branches ", ratio); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && runtime_cacherefs_stats[cpu].n != 0) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b5bfef1..bbbb735 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -32,13 +32,13 @@ char debugfs_path[MAXPATHLEN]; static struct event_symbol event_symbols[] = { { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(STALLED_CYCLES), "stalled-cycles", "idle-cycles" }, { CHW(INSTRUCTIONS), "instructions", "" }, { CHW(CACHE_REFERENCES), "cache-references", "" }, { CHW(CACHE_MISSES), "cache-misses", "" }, { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, { CHW(BRANCH_MISSES), "branch-misses", "" }, { CHW(BUS_CYCLES), "bus-cycles", "" }, - { CHW(STALLED_CYCLES), "stalled-cycles", "" }, { CSW(CPU_CLOCK), "cpu-clock", "" }, { CSW(TASK_CLOCK), "task-clock", "" }, @@ -54,9 +54,9 @@ static struct event_symbol event_symbols[] = { #define __PERF_EVENT_FIELD(config, name) \ ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT) -#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) +#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW) #define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG) -#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) +#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) static const char *hw_event_names[] = { @@ -67,6 +67,7 @@ static const char *hw_event_names[] = { "branches", "branch-misses", "bus-cycles", + "stalled-cycles", }; static const char *sw_event_names[] = { @@ -308,7 +309,7 @@ const char *__event_name(int type, u64 config) switch (type) { case PERF_TYPE_HARDWARE: - if (config < PERF_COUNT_HW_MAX) + if (config < PERF_COUNT_HW_MAX && hw_event_names[config]) return hw_event_names[config]; return "unknown-hardware"; @@ -334,7 +335,7 @@ const char *__event_name(int type, u64 config) } case PERF_TYPE_SOFTWARE: - if (config < PERF_COUNT_SW_MAX) + if (config < PERF_COUNT_SW_MAX && sw_event_names[config]) return sw_event_names[config]; return "unknown-software"; |